VectorStatistics.cpp 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. /**
  2. * @file VectorStatistics.cpp
  3. * @brief B. P. Welford Computation of Mean and Variance download at: http://www.johndcook.com/standard_deviation.html
  4. * @author Michael Koch
  5. * @date 18/02/2009
  6. */
  7. #include "core/vector/VectorT.h"
  8. #include "core/vector/MatrixT.h"
  9. #include "vislearning/baselib/RunningStat.h"
  10. #include "vislearning/baselib/VectorStatistics.h"
  11. using namespace OBJREC;
  12. using namespace std;
  13. using namespace NICE;
  14. //non-static
  15. VectorStatistics::VectorStatistics(const NICE::Vector &data, bool compute)
  16. {
  17. datavector = data;
  18. initVariables();
  19. if (compute)
  20. {
  21. calculateStatistics();
  22. }
  23. }
  24. VectorStatistics::~VectorStatistics()
  25. {
  26. }
  27. void VectorStatistics::calculateStatistics()
  28. {
  29. setMean();
  30. setMedian();
  31. setVariance();
  32. setSkewness();
  33. setKurtosis();
  34. setEntropy();
  35. setNormalizedEntropy();
  36. }
  37. void VectorStatistics::setData(const NICE::Vector &data)
  38. {
  39. datavector = data;
  40. initVariables();
  41. }
  42. void VectorStatistics::initVariables()
  43. {
  44. mean = 0.0;
  45. med = 0.0;
  46. var = 0.0;
  47. skew = 0.0;
  48. kurt = 0.0;
  49. entropy=0.0;
  50. meancalculated = false;
  51. mediancalculated = false;
  52. varcalculated = false;
  53. stdcalculated = false;
  54. skewcalculated = false;
  55. kurtcalculated = false;
  56. entropycalculated = false;
  57. normalizedentropy = false;
  58. }
  59. void VectorStatistics::setMean()
  60. {
  61. if (!meancalculated)
  62. {
  63. mean = computeMean(datavector);
  64. meancalculated = true;
  65. }
  66. }
  67. void VectorStatistics::setMedian()
  68. {
  69. if (!mediancalculated)
  70. {
  71. med = computeMedian(datavector);
  72. mediancalculated = true;
  73. }
  74. }
  75. void VectorStatistics::setVariance()
  76. {
  77. if (!varcalculated)
  78. {
  79. var = computeVariance(datavector);
  80. varcalculated = true;
  81. }
  82. }
  83. void VectorStatistics::setSkewness()
  84. {
  85. if (!skewcalculated)
  86. {
  87. skew = computeSkew(datavector);
  88. skewcalculated = true;
  89. }
  90. }
  91. void VectorStatistics::setKurtosis()
  92. {
  93. if (!kurtcalculated)
  94. {
  95. kurt = computeKurtosis(datavector);
  96. kurtcalculated = true;
  97. }
  98. }
  99. void VectorStatistics::setEntropy()
  100. {
  101. if (!entropycalculated)
  102. {
  103. entropy = computeEntropy(datavector);
  104. entropycalculated = true;
  105. }
  106. }
  107. void VectorStatistics::setNormalizedEntropy()
  108. {
  109. if (!normalizedentropycalculated)
  110. {
  111. normalizedentropy = computeNormalizedEntropy(datavector);
  112. normalizedentropycalculated = true;
  113. }
  114. }
  115. double VectorStatistics::getMean()
  116. {
  117. if (meancalculated)
  118. {
  119. return mean;
  120. }
  121. else
  122. {
  123. setMean();
  124. return mean;
  125. }
  126. }
  127. double VectorStatistics::getMedian()
  128. {
  129. if (mediancalculated)
  130. {
  131. return med;
  132. }
  133. else
  134. {
  135. setMedian();
  136. return med;
  137. }
  138. }
  139. double VectorStatistics::getVariance()
  140. {
  141. if (varcalculated)
  142. {
  143. return var;
  144. }
  145. else
  146. {
  147. setVariance();
  148. return var;
  149. }
  150. }
  151. double VectorStatistics::getStandardDeviation()
  152. {
  153. if (varcalculated)
  154. {
  155. return sqrt(var);
  156. }
  157. else
  158. {
  159. setVariance();
  160. return sqrt(var);
  161. }
  162. }
  163. double VectorStatistics::getSkewness()
  164. {
  165. if (skewcalculated)
  166. {
  167. return skew;
  168. }
  169. else
  170. {
  171. setSkewness();
  172. return skew;
  173. }
  174. }
  175. double VectorStatistics::getKurtosis()
  176. {
  177. if (kurtcalculated)
  178. {
  179. return kurt;
  180. }
  181. else
  182. {
  183. setKurtosis();
  184. return kurt;
  185. }
  186. }
  187. double VectorStatistics::getEntropy()
  188. {
  189. if (entropycalculated)
  190. {
  191. return entropy;
  192. }
  193. else
  194. {
  195. setEntropy();
  196. return entropy;
  197. }
  198. }
  199. double VectorStatistics::getNormalizedEntropy()
  200. {
  201. if (normalizedentropycalculated)
  202. {
  203. return normalizedentropy;
  204. }
  205. else
  206. {
  207. setNormalizedEntropy();
  208. return normalizedentropy;
  209. }
  210. }
  211. //static
  212. double VectorStatistics::computeMedian(const NICE::Vector &data)
  213. {
  214. if (data.size() > 0)
  215. {
  216. NICE::Vector data_tmp(data);
  217. data_tmp.sortAscend();
  218. return data_tmp[data.size() / 2];
  219. }
  220. else
  221. {
  222. fprintf(stderr, "VectorStatistics: median - size of data is 0!\n");
  223. return 0.0;
  224. }
  225. }
  226. double VectorStatistics::computeNormalizedEntropy(const NICE::Vector &data)
  227. {
  228. double entropy = computeEntropy(data);
  229. if (data.size()>1)
  230. {
  231. double max_entropy = -log(1.0 / (double) data.size());
  232. entropy/=max_entropy;
  233. }
  234. return entropy;
  235. }
  236. double VectorStatistics::computeEntropy(const NICE::Vector &data)
  237. {
  238. double entropy = 0.0;
  239. double sum = 0.0;
  240. for (size_t i = 0; i < data.size(); i++)
  241. {
  242. double val = data[i];
  243. if (val <= 0.0)
  244. continue;
  245. entropy -= val * log(val);
  246. sum += val;
  247. }
  248. if (fabs(sum) > 1e-6)
  249. {
  250. entropy /= sum;
  251. entropy += log(sum);
  252. }
  253. else
  254. {
  255. fprintf(stderr,
  256. "VectorStatistics: entropy - sum of values is numerically small\n");
  257. }
  258. return entropy;
  259. }
  260. double VectorStatistics::computeSkew(const NICE::Vector &data)
  261. {
  262. double skew = 0.0;
  263. double meanvalue = computeMean(data);
  264. double var = computeVariance(data);
  265. if (data.size() > 0 && var>1e-8)
  266. {
  267. for (size_t i = 0; i < data.size(); ++i)
  268. {
  269. skew += (data[i] - meanvalue) * (data[i] - meanvalue) * (data[i]
  270. - meanvalue);
  271. }
  272. skew *= sqrt(double(data.size())) / pow(var * double(data.size()), 1.5);
  273. }
  274. return skew;
  275. }
  276. double VectorStatistics::computeKurtosis(const NICE::Vector &data)
  277. {
  278. double kurt = 0.0;
  279. double meanvalue = computeMean(data);
  280. double var = computeVariance(data);
  281. if (data.size() > 0 && var>1e-8)
  282. {
  283. for (size_t i = 0; i < data.size(); i++)
  284. {
  285. kurt += pow((data[i] - meanvalue), 4.0);
  286. }
  287. kurt /= double(data.size()) * pow(var, 2.0);
  288. kurt -= 3.0;
  289. }
  290. return kurt;
  291. }
  292. double VectorStatistics::computeMean(const NICE::Vector &data)
  293. {
  294. RunningStat rs;
  295. for (size_t i = 0; i < data.size(); i++)
  296. {
  297. rs.Push(data[i]);
  298. }
  299. return rs.Mean();
  300. }
  301. double VectorStatistics::computeVariance(const NICE::Vector &data)
  302. {
  303. RunningStat rs;
  304. for (size_t i = 0; i < data.size(); i++)
  305. {
  306. rs.Push(data[i]);
  307. }
  308. return rs.Variance();
  309. }
  310. double VectorStatistics::computeStandardDeviation(const NICE::Vector &data)
  311. {
  312. RunningStat rs;
  313. for (size_t i = 0; i < data.size(); i++)
  314. {
  315. rs.Push(data[i]);
  316. }
  317. return rs.StandardDeviation();
  318. }