ThreeB 1.1
|
00001 #ifndef CONJUGATE_GRADIENT_ONLY 00002 #define CONJUGATE_GRADIENT_ONLY 00003 #include <TooN/TooN.h> 00004 #include <utility> 00005 #include <cstdlib> 00006 00007 ///Class for performing optimization with Conjugate Gradient, where only the derivatives are available. 00008 ///@ingroup gStorm 00009 template<int Size=-1, class Precision=double> struct ConjugateGradientOnly 00010 { 00011 const int size; ///< Dimensionality of the space. 00012 TooN::Vector<Size> g; ///< Gradient vector used by the next call to iterate() 00013 TooN::Vector<Size> new_g; ///< The new gradient set at the end of iterate() 00014 TooN::Vector<Size> h; ///< Conjugate vector to be searched along in the next call to iterate() 00015 TooN::Vector<Size> minus_h;///< negative of h as this is required to be passed into a function which uses references (so can't be temporary) 00016 TooN::Vector<Size> old_g; ///< Gradient vector used to compute $h$ in the last call to iterate() 00017 TooN::Vector<Size> old_h; ///< Conjugate vector searched along in the last call to iterate() 00018 TooN::Vector<Size> x; ///< Current position (best known point) 00019 TooN::Vector<Size> old_x; ///< Previous point (not set at construction) 00020 00021 Precision tolerance; ///< Tolerance used to determine if the optimization is complete. Defaults to square root of machine precision. 00022 int max_iterations; ///< Maximum number of iterations. Defaults to \c size\f$*100\f$ 00023 00024 TooN::Vector<Size> delta_max; ///< Maximum distance to travel along all axes in line search 00025 00026 Precision linesearch_tolerance; ///< Tolerance used to determine if the linesearch is complete. Defaults to square root of machine precision. 00027 Precision linesearch_epsilon; ///< Additive term in tolerance to prevent excessive iterations if \f$x_\mathrm{optimal} = 0\f$. Known as \c ZEPS in numerical recipies. Defaults to 1e-20 00028 00029 00030 int iterations; ///< Number of iterations performed 00031 00032 ///Initialize the ConjugateGradient class with sensible values. 00033 ///@param start Starting point, \e x 00034 ///@param deriv Function to compute \f$\nabla f(x)\f$ 00035 ///@param d Maximum allowed movement (delta) in any dimension 00036 template<class Deriv> ConjugateGradientOnly(const TooN::Vector<Size>& start, const Deriv& deriv, const TooN::Vector<Size>& d) 00037 : size(start.size()), 00038 g(size),new_g(size),h(size),minus_h(size),old_g(size),old_h(size),x(start),old_x(size),delta_max(d) 00039 { 00040 init(start, deriv); 00041 } 00042 00043 ///Initialise given a starting point and the derivative at the starting point 00044 ///@param start starting point 00045 ///@param deriv derivative at starting point 00046 template<int S, class P, class B> void init(const TooN::Vector<Size>& start, const TooN::Vector<S, P, B>& deriv) 00047 { 00048 00049 using std::numeric_limits; 00050 x = start; 00051 00052 //Start with the conjugate direction aligned with 00053 //the gradient 00054 g = deriv; 00055 h = g; 00056 minus_h=-h; 00057 00058 tolerance = sqrt(numeric_limits<Precision>::epsilon()); 00059 max_iterations = size * 100; 00060 00061 00062 linesearch_tolerance = sqrt(numeric_limits<Precision>::epsilon()); 00063 linesearch_epsilon = 1e-20; 00064 00065 iterations=0; 00066 } 00067 00068 ///Initialise given a starting point and a functor for computing derivatives 00069 ///@param start starting point 00070 ///@param deriv derivative computing functor 00071 template<class Deriv> void init(const TooN::Vector<Size>& start, const Deriv& deriv) 00072 { 00073 init(start, deriv(start)); 00074 } 00075 00076 ///Perform a linesearch from the current point (x) along the current 00077 ///conjugate vector (h). The linesearch does not make use of values. 00078 ///You probably do not want to call this function directly. See iterate() instead. 00079 ///This function updates: 00080 /// - x 00081 /// - old_c 00082 /// - iterations 00083 /// Note that the conjugate direction and gradient are not updated. 00084 /// If bracket_minimum_forward detects a local maximum, then essentially a zero 00085 /// sized step is taken. 00086 /// @param deriv Functor returning the derivative value at a given point. 00087 template<class Deriv> void find_next_point(const Deriv& deriv) 00088 { 00089 iterations++; 00090 using std::abs; 00091 //Conjugate direction is -h 00092 //grad.-h is (should be negative) 00093 00094 //We should have that f1 is negative. 00095 new_g = g; 00096 double f1 = g * minus_h; 00097 //If f1 is positive, then the conjugate vector points agains the 00098 //newly computed gradient. This can only happen if there is an error 00099 //in the computation of the gradient (eg we're using a stochastic method) 00100 //not much to do here except to stop. 00101 if(f1 > 0) 00102 { 00103 //Return, so try to search in a direction conjugate to the current one. 00104 return; 00105 } 00106 //What step size takes us up to the maximum length 00107 Precision max_step = HUGE_VAL; 00108 for(int i=0; i < minus_h.size(); i++) 00109 max_step = min(max_step, abs(delta_max[i]/h[i])); 00110 00111 //Taking the maximum step size may result in NaNs. 00112 //Try the maximum step size, and seccessively reduce it. 00113 Precision full_max_step = max_step; 00114 00115 for(;;) 00116 { 00117 new_g = deriv(x + max_step * minus_h); 00118 00119 if(!TooN::isnan(new_g)) 00120 { 00121 // cout << "new_g is NAN free :)\n"; 00122 break; 00123 } 00124 else 00125 max_step /=2; 00126 00127 //If the step size gets too small then 00128 //return as we can't really do anything 00129 if(max_step < full_max_step * linesearch_tolerance) 00130 return; 00131 } 00132 00133 double f2 = new_g * minus_h; 00134 00135 00136 //If f2 hasn't gone negative, then the largest allowed step is not large enough. 00137 //So, take a full step, then keep going in the same direction 00138 if(f2 < 0) 00139 { 00140 //Take a full step 00141 x += max_step * minus_h; 00142 return; 00143 } 00144 00145 //Now, x1 and x2 bracket a root, and find the root using bisection 00146 //x1 and x2 are represented by the scalars s1 and s2 00147 double s1 = 0; 00148 double s2 = max_step; 00149 double s_new = max_step; 00150 00151 int updates[2] = {0,0}; 00152 00153 while(abs(s1 - s2) > abs(s1 + s2) * linesearch_tolerance + linesearch_epsilon) 00154 { 00155 if(updates[0] != updates[1] && updates[0] != 0) 00156 { 00157 00158 //Compute the new point using false position. 00159 s_new = s1 + f1 * (s2 - s1)/(f1 - f2); 00160 new_g = deriv(x + s_new * minus_h); 00161 double f_new = new_g*minus_h; 00162 00163 updates[0] = updates[1]; 00164 00165 if(f_new == 0) 00166 break; 00167 00168 if(f_new < 0) 00169 { 00170 s1 = s_new; 00171 f1 = f_new; 00172 updates[1] = 1; 00173 } 00174 else 00175 { 00176 s2 = s_new; 00177 f2 = f_new; 00178 updates[1] = 2; 00179 } 00180 } 00181 else 00182 { 00183 //Compute the new point 00184 00185 s_new = (s1 + s2) / 2; 00186 00187 new_g = deriv(x + s_new * minus_h); 00188 double f_new = new_g*minus_h; 00189 00190 if(f_new < 0) 00191 { 00192 s1 = s_new; 00193 f1 = f_new; 00194 updates[0] = 1; 00195 } 00196 else 00197 { 00198 s2 = s_new; 00199 f2 = f_new; 00200 updates[0] = 2; 00201 } 00202 00203 } 00204 00205 } 00206 00207 //Update the current position and value 00208 //The most recent gradient computation is at s_new 00209 x += minus_h * s_new; 00210 00211 } 00212 00213 ///Check to see it iteration should stop. You probably do not want to use 00214 ///this function. See iterate() instead. This function updates nothing. 00215 bool finished() 00216 { 00217 using std::abs; 00218 return iterations > max_iterations || norm_inf(new_g) < tolerance; 00219 } 00220 00221 ///After an iteration, update the gradient and conjugate using the 00222 ///Polak-Ribiere equations. 00223 ///This function updates: 00224 ///- g 00225 ///- old_g 00226 ///- h 00227 ///- old_h 00228 ///@param grad The derivatives of the function at \e x 00229 void update_vectors_PR(const TooN::Vector<Size>& grad) 00230 { 00231 //Update the position, gradient and conjugate directions 00232 old_g = g; 00233 old_h = h; 00234 00235 g = grad; 00236 //Precision gamma = (g * g - oldg*g)/(oldg * oldg); 00237 Precision gamma = (g * g - old_g*g)/(old_g * old_g); 00238 h = g + gamma * old_h; 00239 minus_h=-h; 00240 } 00241 00242 ///Use this function to iterate over the optimization. Note that after 00243 ///iterate returns false, g, h, old_g and old_h will not have been 00244 ///updated. 00245 ///This function updates: 00246 /// - x 00247 /// - old_c 00248 /// - iterations 00249 /// - g* 00250 /// - old_g* 00251 /// - h* 00252 /// - old_h* 00253 /// *'d variables not updated on the last iteration. 00254 ///@param deriv Functor to compute derivatives at the specified point. 00255 ///@return Whether to continue. 00256 template<class Deriv> bool iterate(const Deriv& deriv) 00257 { 00258 find_next_point(deriv); 00259 00260 if(!finished()) 00261 { 00262 update_vectors_PR(new_g); 00263 return 1; 00264 } 00265 else 00266 return 0; 00267 } 00268 }; 00269 00270 00271 #endif 00272