CLHEP VERSION Reference Documentation
CLHEP Home Page CLHEP Documentation CLHEP Bug Reports |
00001 // $Id: RandPoissonQ.cc,v 1.7 2010/06/16 17:24:53 garren Exp $ 00002 // -*- C++ -*- 00003 // 00004 // ----------------------------------------------------------------------- 00005 // HEP Random 00006 // --- RandPoissonQ --- 00007 // class implementation file 00008 // ----------------------------------------------------------------------- 00009 00010 // ======================================================================= 00011 // M. Fischler - Implemented new, much faster table-driven algorithm 00012 // applicable for mu < 100 00013 // - Implemented "quick()" methods, shich are the same as the 00014 // new methods for mu < 100 and are a skew-corrected gaussian 00015 // approximation for large mu. 00016 // M. Fischler - Removed mean=100 from the table-driven set, since it 00017 // uses a value just off the end of the table. (April 2004) 00018 // M Fischler - put and get to/from streams 12/15/04 00019 // M Fischler - Utilize RandGaussQ rather than RandGauss, as clearly 00020 // intended by the inclusion of RandGaussQ.h. Using RandGauss 00021 // introduces a subtle trap in that the state of RandPoissonQ 00022 // can never be properly captured without also saveing the 00023 // state of RandGauss! RandGaussQ is, on the other hand, 00024 // stateless except for the engine used. 00025 // M Fisculer - Modified use of wrong engine when shoot (anEngine, mean) 00026 // is called. This flaw was preventing any hope of proper 00027 // saving and restoring in the instance cases. 00028 // M Fischler - fireArray using defaultMean 2/10/05 00029 // M Fischler - put/get to/from streams uses pairs of ulongs when 00030 // + storing doubles avoid problems with precision 00031 // 4/14/05 00032 // M Fisculer - Modified use of shoot (mean) instead of 00033 // shoot(getLocalEngine(), mean) when fire(mean) is called. 00034 // This flaw was causing bad "cross-talk" between modules 00035 // in CMS, where one used its own engine, and the other 00036 // used the static generator. 10/18/07 00037 // 00038 // ======================================================================= 00039 00040 #include "CLHEP/Random/defs.h" 00041 #include "CLHEP/Random/RandPoissonQ.h" 00042 #include "CLHEP/Random/RandGaussQ.h" 00043 #include "CLHEP/Random/DoubConv.hh" 00044 #include "CLHEP/Random/Stat.h" 00045 #include <cmath> // for std::pow() 00046 00047 namespace CLHEP { 00048 00049 std::string RandPoissonQ::name() const {return "RandPoissonQ";} 00050 HepRandomEngine & RandPoissonQ::engine() {return RandPoisson::engine();} 00051 00052 // Initialization of static data: Note that this is all const static data, 00053 // so that saveEngineStatus properly saves all needed information. 00054 00055 // The following MUST MATCH the corresponding values used (in 00056 // poissonTables.cc) when poissonTables.cdat was created. 00057 00058 const double RandPoissonQ::FIRST_MU = 10;// lowest mu value in table 00059 const double RandPoissonQ::LAST_MU = 95;// highest mu value 00060 const double RandPoissonQ::S = 5; // Spacing between mu values 00061 const int RandPoissonQ::BELOW = 30; // Starting point for N is at mu - BELOW 00062 const int RandPoissonQ::ENTRIES = 51; // Number of entries in each mu row 00063 00064 const double RandPoissonQ::MAXIMUM_POISSON_DEVIATE = 2.0E9; 00065 // Careful -- this is NOT the maximum number that can be held in 00066 // a long. It actually should be some large number of sigma below 00067 // that. 00068 00069 // Here comes the big (9K bytes) table, kept in a file of 00070 // ENTRIES * (FIRST_MU - LAST_MU + 1)/S doubles 00071 00072 static const double poissonTables [ 51 * ( (95-10)/5 + 1 ) ] = { 00073 #include "poissonTables.cdat" 00074 }; 00075 00076 00077 // 00078 // Constructors and destructors: 00079 // 00080 00081 RandPoissonQ::~RandPoissonQ() { 00082 } 00083 00084 void RandPoissonQ::setupForDefaultMu() { 00085 00086 // The following are useful for quick approximation, for large mu 00087 00088 double sig2 = defaultMean * (.9998654 - .08346/defaultMean); 00089 sigma = std::sqrt(sig2); 00090 // sigma for the Guassian which approximates the Poisson -- naively 00091 // std::sqrt (defaultMean). 00092 // 00093 // The multiplier corrects for fact that discretization of the form 00094 // [gaussian+.5] increases the second moment by a small amount. 00095 00096 double t = 1./(sig2); 00097 00098 a2 = t/6 + t*t/324; 00099 a1 = std::sqrt (1-2*a2*a2*sig2); 00100 a0 = defaultMean + .5 - sig2 * a2; 00101 00102 // The formula will be a0 + a1*x + a2*x*x where x has 2nd moment of sigma. 00103 // The coeffeicients are chosen to match the first THREE moments of the 00104 // true Poisson distribution. 00105 // 00106 // Actually, if the correction for discretization were not needed, then 00107 // a2 could be taken one order higher by adding t*t*t/5832. However, 00108 // the discretization correction is not perfect, leading to inaccuracy 00109 // on the order to 1/mu**2, so adding a third term is overkill. 00110 00111 } // setupForDefaultMu() 00112 00113 00114 // 00115 // fire, quick, operator(), and shoot methods: 00116 // 00117 00118 long RandPoissonQ::shoot(double xm) { 00119 return shoot(getTheEngine(), xm); 00120 } 00121 00122 double RandPoissonQ::operator()() { 00123 return (double) fire(); 00124 } 00125 00126 double RandPoissonQ::operator()( double mean ) { 00127 return (double) fire(mean); 00128 } 00129 00130 long RandPoissonQ::fire(double mean) { 00131 return shoot(getLocalEngine(), mean); 00132 } 00133 00134 long RandPoissonQ::fire() { 00135 if ( defaultMean < LAST_MU + S ) { 00136 return poissonDeviateSmall ( getLocalEngine(), defaultMean ); 00137 } else { 00138 return poissonDeviateQuick ( getLocalEngine(), a0, a1, a2, sigma ); 00139 } 00140 } // fire() 00141 00142 long RandPoissonQ::shoot(HepRandomEngine* anEngine, double mean) { 00143 00144 // The following variables, static to this method, apply to the 00145 // last time a large mean was supplied; they obviate certain calculations 00146 // if consecutive calls use the same mean. 00147 00148 static double lastLargeMean = -1.; // Mean from previous shoot 00149 // requiring poissonDeviateQuick() 00150 static double lastA0; 00151 static double lastA1; 00152 static double lastA2; 00153 static double lastSigma; 00154 00155 if ( mean < LAST_MU + S ) { 00156 return poissonDeviateSmall ( anEngine, mean ); 00157 } else { 00158 if ( mean != lastLargeMean ) { 00159 // Compute the coefficients defining the quadratic transformation from a 00160 // Gaussian to a Poisson for this mean. Also save these for next time. 00161 double sig2 = mean * (.9998654 - .08346/mean); 00162 lastSigma = std::sqrt(sig2); 00163 double t = 1./sig2; 00164 lastA2 = t*(1./6.) + t*t*(1./324.); 00165 lastA1 = std::sqrt (1-2*lastA2*lastA2*sig2); 00166 lastA0 = mean + .5 - sig2 * lastA2; 00167 } 00168 return poissonDeviateQuick ( anEngine, lastA0, lastA1, lastA2, lastSigma ); 00169 } 00170 00171 } // shoot (anEngine, mean) 00172 00173 void RandPoissonQ::shootArray(const int size, long* vect, double m) { 00174 for( long* v = vect; v != vect + size; ++v ) 00175 *v = shoot(m); 00176 // Note: We could test for m > 100, and if it is, precompute a0, a1, a2, 00177 // and sigma and call the appropriate form of poissonDeviateQuick. 00178 // But since those are cached anyway, not much time would be saved. 00179 } 00180 00181 void RandPoissonQ::fireArray(const int size, long* vect, double m) { 00182 for( long* v = vect; v != vect + size; ++v ) 00183 *v = fire( m ); 00184 } 00185 00186 void RandPoissonQ::fireArray(const int size, long* vect) { 00187 for( long* v = vect; v != vect + size; ++v ) 00188 *v = fire( defaultMean ); 00189 } 00190 00191 00192 // Quick Poisson deviate algorithm used by quick for large mu: 00193 00194 long RandPoissonQ::poissonDeviateQuick ( HepRandomEngine *e, double mu ) { 00195 00196 // Compute the coefficients defining the quadratic transformation from a 00197 // Gaussian to a Poisson: 00198 00199 double sig2 = mu * (.9998654 - .08346/mu); 00200 double sig = std::sqrt(sig2); 00201 // The multiplier corrects for fact that discretization of the form 00202 // [gaussian+.5] increases the second moment by a small amount. 00203 00204 double t = 1./sig2; 00205 00206 double sa2 = t*(1./6.) + t*t*(1./324.); 00207 double sa1 = std::sqrt (1-2*sa2*sa2*sig2); 00208 double sa0 = mu + .5 - sig2 * sa2; 00209 00210 // The formula will be sa0 + sa1*x + sa2*x*x where x has sigma of sq. 00211 // The coeffeicients are chosen to match the first THREE moments of the 00212 // true Poisson distribution. 00213 00214 return poissonDeviateQuick ( e, sa0, sa1, sa2, sig ); 00215 } 00216 00217 00218 long RandPoissonQ::poissonDeviateQuick ( HepRandomEngine *e, 00219 double A0, double A1, double A2, double sig) { 00220 // 00221 // Quick Poisson deviate algorithm used by quick for large mu: 00222 // 00223 // The principle: For very large mu, a poisson distribution can be approximated 00224 // by a gaussian: return the integer part of mu + .5 + g where g is a unit 00225 // normal. However, this yelds a miserable approximation at values as 00226 // "large" as 100. The primary problem is that the poisson distribution is 00227 // supposed to have a skew of 1/mu**2, and the zero skew of the Guassian 00228 // leads to errors of order as big as 1/mu**2. 00229 // 00230 // We substitute for the gaussian a quadratic function of that gaussian random. 00231 // The expression looks very nearly like mu + .5 - 1/6 + g + g**2/(6*mu). 00232 // The small positive quadratic term causes the resulting variate to have 00233 // a positive skew; the -1/6 constant term is there to correct for this bias 00234 // in the mean. By adjusting these two and the linear term, we can match the 00235 // first three moments to high accuracy in 1/mu. 00236 // 00237 // The sigma used is not precisely std::sqrt(mu) since a rounded-off Gaussian 00238 // has a second moment which is slightly larger than that of the Gaussian. 00239 // To compensate, sig is multiplied by a factor which is slightly less than 1. 00240 00241 // double g = RandGauss::shootQuick( e ); // TEMPORARY MOD: 00242 double g = RandGaussQ::shoot( e ); // Unit normal 00243 g *= sig; 00244 double p = A2*g*g + A1*g + A0; 00245 if ( p < 0 ) return 0; // Shouldn't ever possibly happen since 00246 // mean should not be less than 100, but 00247 // we check due to paranoia. 00248 if ( p > MAXIMUM_POISSON_DEVIATE ) p = MAXIMUM_POISSON_DEVIATE; 00249 00250 return long(p); 00251 00252 } // poissonDeviateQuick () 00253 00254 00255 00256 long RandPoissonQ::poissonDeviateSmall (HepRandomEngine * e, double mean) { 00257 long N1; 00258 long N2; 00259 // The following are for later use to form a secondary random s: 00260 double rRange; // This will hold the interval between cdf for the 00261 // computed N1 and cdf for N1+1. 00262 double rRemainder = 0; // This will hold the length into that interval. 00263 00264 // Coming in, mean should not be more than LAST_MU + S. However, we will 00265 // be paranoid and test for this: 00266 00267 if ( mean > LAST_MU + S ) { 00268 return RandPoisson::shoot(e, mean); 00269 } 00270 00271 if (mean <= 0) { 00272 return 0; // Perhaps we ought to balk harder here! 00273 } 00274 00275 // >>> 1 <<< 00276 // Generate the first random, which we always will need. 00277 00278 double r = e->flat(); 00279 00280 // >>> 2 <<< 00281 // For small mean, below the start of the tables, 00282 // do the series for cdf directly. 00283 00284 // In this case, since we know the series will terminate relatively quickly, 00285 // almost alwaye use a precomputed 1/N array without fear of overrunning it. 00286 00287 static const double oneOverN[50] = 00288 { 0, 1., 1/2., 1/3., 1/4., 1/5., 1/6., 1/7., 1/8., 1/9., 00289 1/10., 1/11., 1/12., 1/13., 1/14., 1/15., 1/16., 1/17., 1/18., 1/19., 00290 1/20., 1/21., 1/22., 1/23., 1/24., 1/25., 1/26., 1/27., 1/28., 1/29., 00291 1/30., 1/31., 1/32., 1/33., 1/34., 1/35., 1/36., 1/37., 1/38., 1/39., 00292 1/40., 1/41., 1/42., 1/43., 1/44., 1/45., 1/46., 1/47., 1/48., 1/49. }; 00293 00294 00295 if ( mean < FIRST_MU ) { 00296 00297 long N = 0; 00298 double term = std::exp(-mean); 00299 double cdf = term; 00300 00301 if ( r < (1 - 1.0E-9) ) { 00302 // 00303 // **** This is a normal path: **** 00304 // 00305 // Except when r is very close to 1, it is certain that we will exceed r 00306 // before the 30-th term in the series, so a simple while loop is OK. 00307 const double* oneOverNptr = oneOverN; 00308 while( cdf <= r ) { 00309 N++ ; 00310 oneOverNptr++; 00311 term *= ( mean * (*oneOverNptr) ); 00312 cdf += term; 00313 } 00314 return N; 00315 // 00316 // **** **** 00317 // 00318 } else { // r is almost 1... 00319 // For r very near to 1 we would have to check that we don't fall 00320 // off the end of the table of 1/N. Since this is very rare, we just 00321 // ignore the table and do the identical while loop, using explicit 00322 // division. 00323 double cdf0; 00324 while ( cdf <= r ) { 00325 N++ ; 00326 term *= ( mean / N ); 00327 cdf0 = cdf; 00328 cdf += term; 00329 if (cdf == cdf0) break; // Can't happen, but just in case... 00330 } 00331 return N; 00332 } // end of if ( r compared to (1 - 1.0E-9) ) 00333 00334 } // End of the code for mean < FIRST_MU 00335 00336 // >>> 3 <<< 00337 // Find the row of the tables corresponding to the highest tabulated mu 00338 // which is no greater than our actual mean. 00339 00340 int rowNumber = int((mean - FIRST_MU)/S); 00341 const double * cdfs = &poissonTables [rowNumber*ENTRIES]; 00342 double mu = FIRST_MU + rowNumber*S; 00343 double deltaMu = mean - mu; 00344 int Nmin = int(mu - BELOW); 00345 if (Nmin < 1) Nmin = 1; 00346 int Nmax = Nmin + (ENTRIES - 1); 00347 00348 00349 // >>> 4 <<< 00350 // If r is less that the smallest entry in the row, then 00351 // generate the deviate directly from the series. 00352 00353 if ( r < cdfs[0] ) { 00354 00355 // In this case, we are tempted to use the actual mean, and not 00356 // generate a second deviate to account for the leftover part mean - mu. 00357 // That would be an error, generating a distribution with enough excess 00358 // at Nmin + (mean-mu)/2 to be detectable in 4,000,000 trials. 00359 00360 // Since this case is very rare (never more than .2% of the r values) 00361 // and can happen where N will be large (up to 65 for the mu=95 row) 00362 // we use explicit division so as to avoid having to worry about running 00363 // out of oneOverN table. 00364 00365 long N = 0; 00366 double term = std::exp(-mu); 00367 double cdf = term; 00368 double cdf0; 00369 00370 while(cdf <= r) { 00371 N++ ; 00372 term *= ( mu / N ); 00373 cdf0 = cdf; 00374 cdf += term; 00375 if (cdf == cdf0) break; // Can't happen, but just in case... 00376 } 00377 N1 = N; 00378 // std::cout << r << " " << N << " "; 00379 // DBG_small = true; 00380 rRange = 0; // In this case there is always a second r needed 00381 00382 } // end of small-r case 00383 00384 00385 // >>> 5 <<< 00386 // Assuming r lies within the scope of the row for this mu, find the 00387 // largest entry not greater than r. N1 is the N corresponding to the 00388 // index a. 00389 00390 else if ( r < cdfs[ENTRIES-1] ) { // r is also >= cdfs[0] 00391 00392 // 00393 // **** This is the normal code path **** 00394 // 00395 00396 int a = 0; // Highest value of index such that cdfs[a] 00397 // is known NOT to be greater than r. 00398 int b = ENTRIES - 1; // Lowest value of index such that cdfs[b] is 00399 // known to exeed r. 00400 00401 while (b != (a+1) ) { 00402 int c = (a+b+1)>>1; 00403 if (r > cdfs[c]) { 00404 a = c; 00405 } else { 00406 b = c; 00407 } 00408 } 00409 00410 N1 = Nmin + a; 00411 rRange = cdfs[a+1] - cdfs[a]; 00412 rRemainder = r - cdfs[a]; 00413 00414 // 00415 // **** **** 00416 // 00417 00418 } // end of medium-r (normal) case 00419 00420 00421 // >>> 6 <<< 00422 // If r exceeds the greatest entry in the table for this mu, then start 00423 // from that cdf, and use the series to compute from there until r is 00424 // exceeded. 00425 00426 else { // if ( r >= cdfs[ENTRIES-1] ) { 00427 00428 // Here, division must be done explicitly, and we must also protect against 00429 // roundoff preventing termination. 00430 00431 // 00432 //+++ cdfs[ENTRIES-1] is std::exp(-mu) sum (mu**m/m! , m=0 to Nmax) 00433 //+++ (where Nmax = mu - BELOW + ENTRIES - 1) 00434 //+++ cdfs[ENTRIES-1]-cdfs[ENTRIES-2] is std::exp(-mu) mu**(Nmax)/(Nmax)! 00435 //+++ If the sum up to k-1 <= r < sum up to k, then N = k-1 00436 //+++ Consider k = Nmax in the above statement: 00437 //+++ If cdfs[ENTRIES-2] <= r < cdfs[ENTRIES-1], N would be Nmax-1 00438 //+++ But here r >= cdfs[ENTRIES-1] so N >= Nmax 00439 // 00440 00441 // Erroneous: 00442 //+++ cdfs[ENTRIES-1] is std::exp(-mu) sum (mu**m/m! , m=0 to Nmax-1) 00443 //+++ cdfs[ENTRIES-1]-cdfs[ENTRIES-2] is std::exp(-mu) mu**(Nmax-1)/(Nmax-1)! 00444 //+++ If a sum up to k-1 <= r < sum up to k, then N = k-1 00445 //+++ So if cdfs[ENTRIES-1] were > r, N would be Nmax-1 (or less) 00446 //+++ But here r >= cdfs[ENTRIES-1] so N >= Nmax 00447 // 00448 00449 // std::cout << "r = " << r << " mu = " << mu << "\n"; 00450 long N = Nmax -1; 00451 double cdf = cdfs[ENTRIES-1]; 00452 double term = cdf - cdfs[ENTRIES-2]; 00453 double cdf0; 00454 while(cdf <= r) { 00455 N++ ; 00456 // std::cout << " N " << N << " term " << 00457 // term << " cdf " << cdf << "\n"; 00458 term *= ( mu / N ); 00459 cdf0 = cdf; 00460 cdf += term; 00461 if (cdf == cdf0) break; // If term gets so small cdf stops increasing, 00462 // terminate using that value of N since we 00463 // would never reach r. 00464 } 00465 N1 = N; 00466 rRange = 0; // We can't validly omit the second true random 00467 00468 // N = Nmax -1; 00469 // cdf = cdfs[ENTRIES-1]; 00470 // term = cdf - cdfs[ENTRIES-2]; 00471 // for (int isxz=0; isxz < 100; isxz++) { 00472 // N++ ; 00473 // term *= ( mu / N ); 00474 // cdf0 = cdf; 00475 // cdf += term; 00476 // } 00477 // std::cout.precision(20); 00478 // std::cout << "Final sum is " << cdf << "\n"; 00479 00480 } // end of large-r case 00481 00482 00483 00484 // >>> 7 <<< 00485 // Form a second random, s, based on the position of r within the range 00486 // of this table entry to the next entry. 00487 00488 // However, if this range is very small, then we lose too many bits of 00489 // randomness. In that situation, we generate a second random for s. 00490 00491 double s; 00492 00493 static const double MINRANGE = .01; // Sacrifice up to two digits of 00494 // randomness when using r to produce 00495 // a second random s. Leads to up to 00496 // .09 extra randoms each time. 00497 00498 if ( rRange > MINRANGE ) { 00499 // 00500 // **** This path taken 90% of the time **** 00501 // 00502 s = rRemainder / rRange; 00503 } else { 00504 s = e->flat(); // extra true random needed about one time in 10. 00505 } 00506 00507 // >>> 8 <<< 00508 // Use the direct summation method to form a second poisson deviate N2 00509 // from deltaMu and s. 00510 00511 N2 = 0; 00512 double term = std::exp(-deltaMu); 00513 double cdf = term; 00514 00515 if ( s < (1 - 1.0E-10) ) { 00516 // 00517 // This is the normal path: 00518 // 00519 const double* oneOverNptr = oneOverN; 00520 while( cdf <= s ) { 00521 N2++ ; 00522 oneOverNptr++; 00523 term *= ( deltaMu * (*oneOverNptr) ); 00524 cdf += term; 00525 } 00526 } else { // s is almost 1... 00527 while( cdf <= s ) { 00528 N2++ ; 00529 term *= ( deltaMu / N2 ); 00530 cdf += term; 00531 } 00532 } // end of if ( s compared to (1 - 1.0E-10) ) 00533 00534 // >>> 9 <<< 00535 // The result is the sum of those two deviates 00536 00537 // if (DBG_small) { 00538 // std::cout << N2 << " " << N1+N2 << "\n"; 00539 // DBG_small = false; 00540 // } 00541 00542 return N1 + N2; 00543 00544 } // poissonDeviate() 00545 00546 std::ostream & RandPoissonQ::put ( std::ostream & os ) const { 00547 int pr=os.precision(20); 00548 std::vector<unsigned long> t(2); 00549 os << " " << name() << "\n"; 00550 os << "Uvec" << "\n"; 00551 t = DoubConv::dto2longs(a0); 00552 os << a0 << " " << t[0] << " " << t[1] << "\n"; 00553 t = DoubConv::dto2longs(a1); 00554 os << a1 << " " << t[0] << " " << t[1] << "\n"; 00555 t = DoubConv::dto2longs(a2); 00556 os << a2 << " " << t[0] << " " << t[1] << "\n"; 00557 t = DoubConv::dto2longs(sigma); 00558 os << sigma << " " << t[0] << " " << t[1] << "\n"; 00559 RandPoisson::put(os); 00560 os.precision(pr); 00561 return os; 00562 #ifdef REMOVED 00563 int pr=os.precision(20); 00564 os << " " << name() << "\n"; 00565 os << a0 << " " << a1 << " " << a2 << "\n"; 00566 os << sigma << "\n"; 00567 RandPoisson::put(os); 00568 os.precision(pr); 00569 return os; 00570 #endif 00571 } 00572 00573 std::istream & RandPoissonQ::get ( std::istream & is ) { 00574 std::string inName; 00575 is >> inName; 00576 if (inName != name()) { 00577 is.clear(std::ios::badbit | is.rdstate()); 00578 std::cerr << "Mismatch when expecting to read state of a " 00579 << name() << " distribution\n" 00580 << "Name found was " << inName 00581 << "\nistream is left in the badbit state\n"; 00582 return is; 00583 } 00584 if (possibleKeywordInput(is, "Uvec", a0)) { 00585 std::vector<unsigned long> t(2); 00586 is >> a0 >> t[0] >> t[1]; a0 = DoubConv::longs2double(t); 00587 is >> a1 >> t[0] >> t[1]; a1 = DoubConv::longs2double(t); 00588 is >> a2 >> t[0] >> t[1]; a2 = DoubConv::longs2double(t); 00589 is >> sigma >> t[0] >> t[1]; sigma = DoubConv::longs2double(t); 00590 RandPoisson::get(is); 00591 return is; 00592 } 00593 // is >> a0 encompassed by possibleKeywordInput 00594 is >> a1 >> a2 >> sigma; 00595 RandPoisson::get(is); 00596 return is; 00597 } 00598 00599 } // namespace CLHEP 00600