00001 //---------------------------------------------------------------------------- 00002 /** @file SgStatistics.h 00003 Classes for incremental computation of statistical properties. 00004 The classes SgStatisticsBase (mean), SgStatistics (mean, variance) and 00005 SgStatisticsExt (mean, variance, min, max) are extensions of each other, 00006 but don't derive from each other, to avoid the overhead of virtual 00007 functions and because a base class has can (or could in the future) have 00008 functions that are not supported in derived classes (like removing a 00009 sample, which is easy in SgStatisticsBase, but not possible in 00010 SgStatisticsExt). However, member functions with the same meaning have the 00011 same name, so that the classes are easily replacable in user code and can 00012 be used as template arguments. */ 00013 //---------------------------------------------------------------------------- 00014 00015 #ifndef SG_STATISTICS_H 00016 #define SG_STATISTICS_H 00017 00018 #include <cmath> 00019 #include <iostream> 00020 #include <limits> 00021 #include <map> 00022 #include <sstream> 00023 #include <string> 00024 #include <vector> 00025 #include "SgException.h" 00026 #include "SgWrite.h" 00027 00028 //---------------------------------------------------------------------------- 00029 00030 /** Computes mean of a statistical variable. 00031 The template parameters are the floating point type and the counter type, 00032 depending on the precision-memory tradeoff. */ 00033 template<typename VALUE, typename COUNT> 00034 class SgStatisticsBase 00035 { 00036 public: 00037 SgStatisticsBase(); 00038 00039 /** Create statistics initialized with values. 00040 Note that value must be initialized to 0 if count is 0. 00041 Equivalent to creating a statistics and calling @c count times 00042 Add(val) */ 00043 SgStatisticsBase(VALUE val, COUNT count); 00044 00045 void Add(VALUE val); 00046 00047 void Remove(VALUE val); 00048 00049 /** Add a value n times */ 00050 void Add(VALUE val, COUNT n); 00051 00052 /** Remove a value n times. */ 00053 void Remove(VALUE val, COUNT n); 00054 00055 void Clear(); 00056 00057 COUNT Count() const; 00058 00059 /** Initialize with values. 00060 Equivalent to calling Clear() and calling @c count times 00061 Add(val) */ 00062 void Initialize(VALUE val, COUNT count); 00063 00064 /** Check if the mean value is defined. 00065 The mean value is defined, if the count if greater than zero. The 00066 result of this function is equivalent to <tt>Count() > 0</tt>, for 00067 integer count types and <tt>Count() > epsilon()</tt> for floating 00068 point count types. */ 00069 bool IsDefined() const; 00070 00071 VALUE Mean() const; 00072 00073 /** Write in human readable format. */ 00074 void Write(std::ostream& out) const; 00075 00076 /** Save in a compact platform-independent text format. 00077 The data is written in a single line, without trailing newline. */ 00078 void SaveAsText(std::ostream& out) const; 00079 00080 /** Load from text format. 00081 See SaveAsText() */ 00082 void LoadFromText(std::istream& in); 00083 00084 private: 00085 COUNT m_count; 00086 00087 VALUE m_mean; 00088 }; 00089 00090 template<typename VALUE, typename COUNT> 00091 inline SgStatisticsBase<VALUE,COUNT>::SgStatisticsBase() 00092 { 00093 Clear(); 00094 } 00095 00096 template<typename VALUE, typename COUNT> 00097 inline SgStatisticsBase<VALUE,COUNT>::SgStatisticsBase(VALUE val, COUNT count) 00098 : m_count(count), 00099 m_mean(val) 00100 { 00101 } 00102 00103 template<typename VALUE, typename COUNT> 00104 void SgStatisticsBase<VALUE,COUNT>::Add(VALUE val) 00105 { 00106 // Write order dependency: at least one class (SgUctSearch in lock-free 00107 // mode) uses SgStatisticsBase concurrently without locking and assumes 00108 // that m_mean is valid, if m_count is greater zero 00109 COUNT count = m_count; 00110 ++count; 00111 SG_ASSERT(! std::numeric_limits<COUNT>::is_exact 00112 || count > 0); // overflow 00113 val -= m_mean; 00114 m_mean += val / VALUE(count); 00115 m_count = count; 00116 } 00117 00118 template<typename VALUE, typename COUNT> 00119 void SgStatisticsBase<VALUE,COUNT>::Remove(VALUE val) 00120 { 00121 // Write order dependency: at least on class (SgUctSearch in lock-free 00122 // mode) uses SgStatisticsBase concurrently without locking and assumes 00123 // that m_mean is valid, if m_count is greater zero 00124 COUNT count = m_count; 00125 if (count > 1) 00126 { 00127 --count; 00128 m_mean += (m_mean - val) / VALUE(count); 00129 m_count = count; 00130 } 00131 else 00132 Clear(); 00133 } 00134 00135 template<typename VALUE, typename COUNT> 00136 void SgStatisticsBase<VALUE,COUNT>::Remove(VALUE val, COUNT n) 00137 { 00138 SG_ASSERT(m_count >= n); 00139 // Write order dependency: at least on class (SgUctSearch in lock-free 00140 // mode) uses SgStatisticsBase concurrently without locking and assumes 00141 // that m_mean is valid, if m_count is greater zero 00142 COUNT count = m_count; 00143 if (count > n) 00144 { 00145 count -= n; 00146 m_mean += VALUE(n) * (m_mean - val) / VALUE(count); 00147 m_count = count; 00148 } 00149 else 00150 Clear(); 00151 } 00152 00153 template<typename VALUE, typename COUNT> 00154 void SgStatisticsBase<VALUE,COUNT>::Add(VALUE val, COUNT n) 00155 { 00156 // Write order dependency: at least one class (SgUctSearch in lock-free 00157 // mode) uses SgStatisticsBase concurrently without locking and assumes 00158 // that m_mean is valid, if m_count is greater zero 00159 COUNT count = m_count; 00160 count += n; 00161 SG_ASSERT(! std::numeric_limits<COUNT>::is_exact 00162 || count > 0); // overflow 00163 val -= m_mean; 00164 m_mean += VALUE(n) * val / VALUE(count); 00165 m_count = count; 00166 } 00167 00168 template<typename VALUE, typename COUNT> 00169 inline void SgStatisticsBase<VALUE,COUNT>::Clear() 00170 { 00171 m_count = 0; 00172 m_mean = 0; 00173 } 00174 00175 template<typename VALUE, typename COUNT> 00176 inline COUNT SgStatisticsBase<VALUE,COUNT>::Count() const 00177 { 00178 return m_count; 00179 } 00180 00181 template<typename VALUE, typename COUNT> 00182 inline void SgStatisticsBase<VALUE,COUNT>::Initialize(VALUE val, COUNT count) 00183 { 00184 SG_ASSERT(count > 0); 00185 m_count = count; 00186 m_mean = val; 00187 } 00188 00189 template<typename VALUE, typename COUNT> 00190 inline bool SgStatisticsBase<VALUE,COUNT>::IsDefined() const 00191 { 00192 if (std::numeric_limits<COUNT>::is_exact) 00193 return m_count > 0; 00194 else 00195 return m_count > std::numeric_limits<COUNT>::epsilon(); 00196 } 00197 00198 template<typename VALUE, typename COUNT> 00199 void SgStatisticsBase<VALUE,COUNT>::LoadFromText(std::istream& in) 00200 { 00201 in >> m_count >> m_mean; 00202 } 00203 00204 template<typename VALUE, typename COUNT> 00205 inline VALUE SgStatisticsBase<VALUE,COUNT>::Mean() const 00206 { 00207 SG_ASSERT(IsDefined()); 00208 return m_mean; 00209 } 00210 00211 template<typename VALUE, typename COUNT> 00212 void SgStatisticsBase<VALUE,COUNT>::Write(std::ostream& out) const 00213 { 00214 if (IsDefined()) 00215 out << Mean(); 00216 else 00217 out << '-'; 00218 } 00219 00220 template<typename VALUE, typename COUNT> 00221 void SgStatisticsBase<VALUE,COUNT>::SaveAsText(std::ostream& out) const 00222 { 00223 out << m_count << ' ' << m_mean; 00224 } 00225 00226 //---------------------------------------------------------------------------- 00227 00228 /** Computes mean and variance of a statistical variable. 00229 The template parameters are the floating point type and the counter type, 00230 depending on the precision-memory tradeoff. */ 00231 template<typename VALUE, typename COUNT> 00232 class SgStatistics 00233 { 00234 public: 00235 SgStatistics(); 00236 00237 /** Create statistics initialized with values. 00238 Equivalent to creating a statistics and calling @c count times 00239 Add(val) */ 00240 SgStatistics(VALUE val, COUNT count); 00241 00242 void Add(VALUE val); 00243 00244 void Clear(); 00245 00246 bool IsDefined() const; 00247 00248 VALUE Mean() const; 00249 00250 COUNT Count() const; 00251 00252 VALUE Deviation() const; 00253 00254 VALUE Variance() const; 00255 00256 /** Write in human readable format. */ 00257 void Write(std::ostream& out) const; 00258 00259 /** Save in a compact platform-independent text format. 00260 The data is written in a single line, without trailing newline. */ 00261 void SaveAsText(std::ostream& out) const; 00262 00263 /** Load from text format. 00264 See SaveAsText() */ 00265 void LoadFromText(std::istream& in); 00266 00267 private: 00268 SgStatisticsBase<VALUE,COUNT> m_statisticsBase; 00269 00270 VALUE m_variance; 00271 }; 00272 00273 template<typename VALUE, typename COUNT> 00274 inline SgStatistics<VALUE,COUNT>::SgStatistics() 00275 { 00276 Clear(); 00277 } 00278 00279 template<typename VALUE, typename COUNT> 00280 inline SgStatistics<VALUE,COUNT>::SgStatistics(VALUE val, COUNT count) 00281 : m_statisticsBase(val, count) 00282 { 00283 m_variance = 0; 00284 } 00285 00286 template<typename VALUE, typename COUNT> 00287 void SgStatistics<VALUE,COUNT>::Add(VALUE val) 00288 { 00289 if (IsDefined()) 00290 { 00291 COUNT countOld = Count(); 00292 VALUE meanOld = Mean(); 00293 m_statisticsBase.Add(val); 00294 VALUE mean = Mean(); 00295 COUNT count = Count(); 00296 m_variance = (VALUE(countOld) * (m_variance + meanOld * meanOld) 00297 + val * val) / VALUE(count) - mean * mean; 00298 } 00299 else 00300 { 00301 m_statisticsBase.Add(val); 00302 m_variance = 0; 00303 } 00304 } 00305 00306 template<typename VALUE, typename COUNT> 00307 inline void SgStatistics<VALUE,COUNT>::Clear() 00308 { 00309 m_statisticsBase.Clear(); 00310 m_variance = 0; 00311 } 00312 00313 template<typename VALUE, typename COUNT> 00314 inline COUNT SgStatistics<VALUE,COUNT>::Count() const 00315 { 00316 return m_statisticsBase.Count(); 00317 } 00318 00319 template<typename VALUE, typename COUNT> 00320 inline VALUE SgStatistics<VALUE,COUNT>::Deviation() const 00321 { 00322 return (VALUE)std::sqrt(m_variance); 00323 } 00324 00325 template<typename VALUE, typename COUNT> 00326 inline bool SgStatistics<VALUE,COUNT>::IsDefined() const 00327 { 00328 return m_statisticsBase.IsDefined(); 00329 } 00330 00331 template<typename VALUE, typename COUNT> 00332 void SgStatistics<VALUE,COUNT>::LoadFromText(std::istream& in) 00333 { 00334 m_statisticsBase.LoadFromText(in); 00335 in >> m_variance; 00336 } 00337 00338 template<typename VALUE, typename COUNT> 00339 inline VALUE SgStatistics<VALUE,COUNT>::Mean() const 00340 { 00341 return m_statisticsBase.Mean(); 00342 } 00343 00344 template<typename VALUE, typename COUNT> 00345 inline VALUE SgStatistics<VALUE,COUNT>::Variance() const 00346 { 00347 return m_variance; 00348 } 00349 00350 template<typename VALUE, typename COUNT> 00351 void SgStatistics<VALUE,COUNT>::Write(std::ostream& out) const 00352 { 00353 if (IsDefined()) 00354 out << Mean() << " dev=" << Deviation(); 00355 else 00356 out << '-'; 00357 } 00358 00359 template<typename VALUE, typename COUNT> 00360 void SgStatistics<VALUE,COUNT>::SaveAsText(std::ostream& out) const 00361 { 00362 m_statisticsBase.SaveAsText(out); 00363 out << ' ' << m_variance; 00364 } 00365 00366 //---------------------------------------------------------------------------- 00367 00368 /** Extended version of SgStatistics. 00369 Also stores minimum and maximum values. 00370 The template parameters are the floating point type and the counter type, 00371 depending on the precision-memory tradeoff. */ 00372 template<typename VALUE, typename COUNT> 00373 class SgStatisticsExt 00374 { 00375 public: 00376 SgStatisticsExt(); 00377 00378 void Add(VALUE val); 00379 00380 void Clear(); 00381 00382 bool IsDefined() const; 00383 00384 VALUE Mean() const; 00385 00386 COUNT Count() const; 00387 00388 VALUE Max() const; 00389 00390 VALUE Min() const; 00391 00392 VALUE Deviation() const; 00393 00394 VALUE Variance() const; 00395 00396 void Write(std::ostream& out) const; 00397 00398 private: 00399 SgStatistics<VALUE,COUNT> m_statistics; 00400 00401 VALUE m_max; 00402 00403 VALUE m_min; 00404 }; 00405 00406 template<typename VALUE, typename COUNT> 00407 inline SgStatisticsExt<VALUE,COUNT>::SgStatisticsExt() 00408 { 00409 Clear(); 00410 } 00411 00412 template<typename VALUE, typename COUNT> 00413 void SgStatisticsExt<VALUE,COUNT>::Add(VALUE val) 00414 { 00415 m_statistics.Add(val); 00416 if (val > m_max) 00417 m_max = val; 00418 if (val < m_min) 00419 m_min = val; 00420 } 00421 00422 template<typename VALUE, typename COUNT> 00423 inline void SgStatisticsExt<VALUE,COUNT>::Clear() 00424 { 00425 m_statistics.Clear(); 00426 m_min = std::numeric_limits<VALUE>::max(); 00427 m_max = -std::numeric_limits<VALUE>::max(); 00428 } 00429 00430 template<typename VALUE, typename COUNT> 00431 inline COUNT SgStatisticsExt<VALUE,COUNT>::Count() const 00432 { 00433 return m_statistics.Count(); 00434 } 00435 00436 template<typename VALUE, typename COUNT> 00437 inline VALUE SgStatisticsExt<VALUE,COUNT>::Deviation() const 00438 { 00439 return m_statistics.Deviation(); 00440 } 00441 00442 template<typename VALUE, typename COUNT> 00443 inline bool SgStatisticsExt<VALUE,COUNT>::IsDefined() const 00444 { 00445 return m_statistics.IsDefined(); 00446 } 00447 00448 template<typename VALUE, typename COUNT> 00449 inline VALUE SgStatisticsExt<VALUE,COUNT>::Max() const 00450 { 00451 return m_max; 00452 } 00453 00454 template<typename VALUE, typename COUNT> 00455 inline VALUE SgStatisticsExt<VALUE,COUNT>::Mean() const 00456 { 00457 return m_statistics.Mean(); 00458 } 00459 00460 template<typename VALUE, typename COUNT> 00461 inline VALUE SgStatisticsExt<VALUE,COUNT>::Min() const 00462 { 00463 return m_min; 00464 } 00465 00466 template<typename VALUE, typename COUNT> 00467 inline VALUE SgStatisticsExt<VALUE,COUNT>::Variance() const 00468 { 00469 return m_statistics.Variance(); 00470 } 00471 00472 template<typename VALUE, typename COUNT> 00473 void SgStatisticsExt<VALUE,COUNT>::Write(std::ostream& out) const 00474 { 00475 if (IsDefined()) 00476 { 00477 m_statistics.Write(out); 00478 out << " min=" << m_min << " max=" << m_max; 00479 } 00480 else 00481 out << '-'; 00482 } 00483 00484 //---------------------------------------------------------------------------- 00485 00486 /** Set of named statistical variables. 00487 The template parameters are the floating point type and the counter type, 00488 depending on the precision-memory tradeoff. */ 00489 template<typename VALUE, typename COUNT> 00490 class SgStatisticsCollection 00491 { 00492 public: 00493 /** Add the statistics of another collection. 00494 The collections must contain the same entries. */ 00495 void Add(const SgStatisticsCollection<VALUE,COUNT>& collection); 00496 00497 void Clear(); 00498 00499 bool Contains(const std::string& name) const; 00500 00501 /** Create a new variable. */ 00502 void Create(const std::string& name); 00503 00504 const SgStatistics<VALUE,COUNT>& Get(const std::string& name) const; 00505 00506 SgStatistics<VALUE,COUNT>& Get(const std::string& name); 00507 00508 void Write(std::ostream& o) const; 00509 00510 private: 00511 typedef std::map<std::string,SgStatistics<VALUE,COUNT> > Map; 00512 00513 typedef typename Map::iterator Iterator; 00514 00515 typedef typename Map::const_iterator ConstIterator; 00516 00517 Map m_map; 00518 }; 00519 00520 template<typename VALUE, typename COUNT> 00521 void 00522 SgStatisticsCollection<VALUE,COUNT> 00523 ::Add(const SgStatisticsCollection<VALUE,COUNT>& collection) 00524 { 00525 if (m_map.size() != collection.m_map.size()) 00526 throw SgException("Incompatible statistics collections"); 00527 for (Iterator p = m_map.begin(); p != m_map.end(); ++p) 00528 { 00529 ConstIterator k = collection.m_map.find(p->first); 00530 if (k == collection.m_map.end()) 00531 throw SgException("Incompatible statistics collections"); 00532 p->second.Add(k->second); 00533 } 00534 } 00535 00536 template<typename VALUE, typename COUNT> 00537 void SgStatisticsCollection<VALUE,COUNT>::Clear() 00538 { 00539 for (Iterator p = m_map.begin(); p != m_map.end(); ++p) 00540 p->second.Clear(); 00541 } 00542 00543 template<typename VALUE, typename COUNT> 00544 bool SgStatisticsCollection<VALUE,COUNT>::Contains(const std::string& name) 00545 const 00546 { 00547 return (m_map.find(name) != m_map.end()); 00548 } 00549 00550 template<typename VALUE, typename COUNT> 00551 void SgStatisticsCollection<VALUE,COUNT>::Create(const std::string& name) 00552 { 00553 m_map[name] = SgStatistics<VALUE,COUNT>(); 00554 } 00555 00556 template<typename VALUE, typename COUNT> 00557 const SgStatistics<VALUE,COUNT>& 00558 SgStatisticsCollection<VALUE,COUNT>::Get(const std::string& name) const 00559 { 00560 ConstIterator p = m_map.find(name); 00561 if (p == m_map.end()) 00562 { 00563 std::ostringstream o; 00564 o << "Unknown statistics name " << name << '.'; 00565 throw SgException(o.str()); 00566 } 00567 return p->second; 00568 } 00569 00570 template<typename VALUE, typename COUNT> 00571 SgStatistics<VALUE,COUNT>& 00572 SgStatisticsCollection<VALUE,COUNT>::Get(const std::string& name) 00573 { 00574 Iterator p = m_map.find(name); 00575 if (p == m_map.end()) 00576 { 00577 std::ostringstream o; 00578 o << "Unknown statistics name " << name << '.'; 00579 throw SgException(o.str()); 00580 } 00581 return p->second; 00582 } 00583 00584 template<typename VALUE, typename COUNT> 00585 void SgStatisticsCollection<VALUE,COUNT>::Write(std::ostream& o) const 00586 { 00587 for (ConstIterator p = m_map.begin(); p != m_map.end(); ++p) 00588 o << p->first << ": " << p->second.Write(o) << '\n'; 00589 } 00590 00591 //---------------------------------------------------------------------------- 00592 00593 /** Histogram. 00594 The template parameters are the floating point type and the counter type, 00595 depending on the precision-memory tradeoff. */ 00596 template<typename VALUE, typename COUNT> 00597 class SgHistogram 00598 { 00599 public: 00600 SgHistogram(); 00601 00602 SgHistogram(VALUE min, VALUE max, int bins); 00603 00604 /** Reinitialize and clear histogram. */ 00605 void Init(VALUE min, VALUE max, int bins); 00606 00607 void Add(VALUE value); 00608 00609 void Clear(); 00610 00611 int Bins() const; 00612 00613 COUNT Count() const; 00614 00615 /** Get count in a certain bin. */ 00616 COUNT Count(int i) const; 00617 00618 /** Write as x,y-table. 00619 Writes the historgram in a format that likely can be used by other 00620 programs. Writes one x,y pair per line. The separator is TAB. 00621 The x-values are the left border values of the bins, the y-values 00622 are the counts of the bins. */ 00623 void Write(std::ostream& out) const; 00624 00625 /** Write with labels. 00626 Example output with label "Value", the numbers in brackets are the 00627 left border of each bin: 00628 @verbatim 00629 Value[0] 100 00630 Value[10] 2000 00631 Value[20] 500 00632 @endverbatim */ 00633 void WriteWithLabels(std::ostream& out, const std::string& label) const; 00634 00635 private: 00636 typedef std::vector<COUNT> Vector; 00637 00638 int m_bins; 00639 00640 COUNT m_count; 00641 00642 VALUE m_binSize; 00643 00644 VALUE m_min; 00645 00646 VALUE m_max; 00647 00648 Vector m_array; 00649 }; 00650 00651 template<typename VALUE, typename COUNT> 00652 SgHistogram<VALUE,COUNT>::SgHistogram() 00653 { 00654 Init(0, 1, 1); 00655 } 00656 00657 template<typename VALUE, typename COUNT> 00658 SgHistogram<VALUE,COUNT>::SgHistogram(VALUE min, VALUE max, int bins) 00659 { 00660 Init(min, max, bins); 00661 } 00662 00663 template<typename VALUE, typename COUNT> 00664 void SgHistogram<VALUE,COUNT>::Add(VALUE value) 00665 { 00666 ++m_count; 00667 int i = static_cast<int>((value - m_min) / m_binSize); 00668 if (i < 0) 00669 i = 0; 00670 if (i >= m_bins) 00671 i = m_bins - 1; 00672 ++m_array[i]; 00673 } 00674 00675 template<typename VALUE, typename COUNT> 00676 int SgHistogram<VALUE,COUNT>::Bins() const 00677 { 00678 return m_bins; 00679 } 00680 00681 template<typename VALUE, typename COUNT> 00682 void SgHistogram<VALUE,COUNT>::Clear() 00683 { 00684 m_count = 0; 00685 for (typename Vector::iterator it = m_array.begin(); it != m_array.end(); 00686 ++ it) 00687 *it = 0; 00688 } 00689 00690 template<typename VALUE, typename COUNT> 00691 COUNT SgHistogram<VALUE,COUNT>::Count() const 00692 { 00693 return m_count; 00694 } 00695 00696 template<typename VALUE, typename COUNT> 00697 COUNT SgHistogram<VALUE,COUNT>::Count(int i) const 00698 { 00699 SG_ASSERT(i >= 0); 00700 SG_ASSERT(i < m_bins); 00701 return m_array[i]; 00702 } 00703 00704 template<typename VALUE, typename COUNT> 00705 void SgHistogram<VALUE,COUNT>::Init(VALUE min, VALUE max, int bins) 00706 { 00707 m_array.resize(bins); 00708 m_min = min; 00709 m_max = max; 00710 m_bins = bins; 00711 m_binSize = (m_max - m_min) / m_bins; 00712 Clear(); 00713 } 00714 00715 template<typename VALUE, typename COUNT> 00716 void SgHistogram<VALUE,COUNT>::Write(std::ostream& out) const 00717 { 00718 for (int i = 0; i < m_bins; ++i) 00719 out << (m_min + i * m_binSize) << '\t' << m_array[i] << '\n'; 00720 00721 } 00722 00723 template<typename VALUE, typename COUNT> 00724 void SgHistogram<VALUE,COUNT>::WriteWithLabels(std::ostream& out, 00725 const std::string& label) const 00726 { 00727 for (int i = 0; i < m_bins; ++i) 00728 { 00729 std::ostringstream binLabel; 00730 binLabel << label << '[' << (m_min + i * m_binSize) << ']'; 00731 out << SgWriteLabel(binLabel.str()) << m_array[i] << '\n'; 00732 } 00733 } 00734 00735 //---------------------------------------------------------------------------- 00736 00737 #endif // SG_STATISTICS_H