VectorStatistics.cpp 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. /**
  2. * @file VectorStatistics.cpp
  3. * @brief B. P. Welford Computation of Mean and Variance download at: http://www.johndcook.com/standard_deviation.html
  4. * @author Michael Koch
  5. * @date 18/02/2009
  6. */
  7. #ifdef NOVISUAL
  8. #include <vislearning/nice_nonvis.h>
  9. #else
  10. #include <vislearning/nice.h>
  11. #endif
  12. #include "vislearning/baselib/RunningStat.h"
  13. #include "vislearning/baselib/VectorStatistics.h"
  14. using namespace OBJREC;
  15. using namespace std;
  16. using namespace NICE;
  17. //non-static
  18. VectorStatistics::VectorStatistics(const NICE::Vector &data, bool compute)
  19. {
  20. datavector = data;
  21. initVariables();
  22. if (compute)
  23. {
  24. calculateStatistics();
  25. }
  26. }
  27. VectorStatistics::~VectorStatistics()
  28. {
  29. }
  30. void VectorStatistics::calculateStatistics()
  31. {
  32. setMean();
  33. setMedian();
  34. setVariance();
  35. setSkewness();
  36. setKurtosis();
  37. setEntropy();
  38. setNormalizedEntropy();
  39. }
  40. void VectorStatistics::setData(const NICE::Vector &data)
  41. {
  42. datavector = data;
  43. initVariables();
  44. }
  45. void VectorStatistics::initVariables()
  46. {
  47. mean = 0.0;
  48. med = 0.0;
  49. var = 0.0;
  50. skew = 0.0;
  51. kurt = 0.0;
  52. entropy=0.0;
  53. meancalculated = false;
  54. mediancalculated = false;
  55. varcalculated = false;
  56. stdcalculated = false;
  57. skewcalculated = false;
  58. kurtcalculated = false;
  59. entropycalculated = false;
  60. normalizedentropy = false;
  61. }
  62. void VectorStatistics::setMean()
  63. {
  64. if (!meancalculated)
  65. {
  66. mean = computeMean(datavector);
  67. meancalculated = true;
  68. }
  69. }
  70. void VectorStatistics::setMedian()
  71. {
  72. if (!mediancalculated)
  73. {
  74. med = computeMedian(datavector);
  75. mediancalculated = true;
  76. }
  77. }
  78. void VectorStatistics::setVariance()
  79. {
  80. if (!varcalculated)
  81. {
  82. var = computeVariance(datavector);
  83. varcalculated = true;
  84. }
  85. }
  86. void VectorStatistics::setSkewness()
  87. {
  88. if (!skewcalculated)
  89. {
  90. skew = computeSkew(datavector);
  91. skewcalculated = true;
  92. }
  93. }
  94. void VectorStatistics::setKurtosis()
  95. {
  96. if (!kurtcalculated)
  97. {
  98. kurt = computeKurtosis(datavector);
  99. kurtcalculated = true;
  100. }
  101. }
  102. void VectorStatistics::setEntropy()
  103. {
  104. if (!entropycalculated)
  105. {
  106. entropy = computeEntropy(datavector);
  107. entropycalculated = true;
  108. }
  109. }
  110. void VectorStatistics::setNormalizedEntropy()
  111. {
  112. if (!normalizedentropycalculated)
  113. {
  114. normalizedentropy = computeNormalizedEntropy(datavector);
  115. normalizedentropycalculated = true;
  116. }
  117. }
  118. double VectorStatistics::getMean()
  119. {
  120. if (meancalculated)
  121. {
  122. return mean;
  123. }
  124. else
  125. {
  126. setMean();
  127. return mean;
  128. }
  129. }
  130. double VectorStatistics::getMedian()
  131. {
  132. if (mediancalculated)
  133. {
  134. return med;
  135. }
  136. else
  137. {
  138. setMedian();
  139. return med;
  140. }
  141. }
  142. double VectorStatistics::getVariance()
  143. {
  144. if (varcalculated)
  145. {
  146. return var;
  147. }
  148. else
  149. {
  150. setVariance();
  151. return var;
  152. }
  153. }
  154. double VectorStatistics::getStandardDeviation()
  155. {
  156. if (varcalculated)
  157. {
  158. return sqrt(var);
  159. }
  160. else
  161. {
  162. setVariance();
  163. return sqrt(var);
  164. }
  165. }
  166. double VectorStatistics::getSkewness()
  167. {
  168. if (skewcalculated)
  169. {
  170. return skew;
  171. }
  172. else
  173. {
  174. setSkewness();
  175. return skew;
  176. }
  177. }
  178. double VectorStatistics::getKurtosis()
  179. {
  180. if (kurtcalculated)
  181. {
  182. return kurt;
  183. }
  184. else
  185. {
  186. setKurtosis();
  187. return kurt;
  188. }
  189. }
  190. double VectorStatistics::getEntropy()
  191. {
  192. if (entropycalculated)
  193. {
  194. return entropy;
  195. }
  196. else
  197. {
  198. setEntropy();
  199. return entropy;
  200. }
  201. }
  202. double VectorStatistics::getNormalizedEntropy()
  203. {
  204. if (normalizedentropycalculated)
  205. {
  206. return normalizedentropy;
  207. }
  208. else
  209. {
  210. setNormalizedEntropy();
  211. return normalizedentropy;
  212. }
  213. }
  214. //static
  215. double VectorStatistics::computeMedian(const NICE::Vector &data)
  216. {
  217. if (data.size() > 0)
  218. {
  219. NICE::Vector data_tmp(data);
  220. data_tmp.sortAscend();
  221. return data_tmp[data.size() / 2];
  222. }
  223. else
  224. {
  225. fprintf(stderr, "VectorStatistics: median - size of data is 0!\n");
  226. return 0.0;
  227. }
  228. }
  229. double VectorStatistics::computeNormalizedEntropy(const NICE::Vector &data)
  230. {
  231. double entropy = computeEntropy(data);
  232. if (data.size()>1)
  233. {
  234. double max_entropy = -log(1.0 / (double) data.size());
  235. entropy/=max_entropy;
  236. }
  237. return entropy;
  238. }
  239. double VectorStatistics::computeEntropy(const NICE::Vector &data)
  240. {
  241. double entropy = 0.0;
  242. double sum = 0.0;
  243. for (size_t i = 0; i < data.size(); i++)
  244. {
  245. double val = data[i];
  246. if (val <= 0.0)
  247. continue;
  248. entropy -= val * log(val);
  249. sum += val;
  250. }
  251. if (fabs(sum) > 1e-6)
  252. {
  253. entropy /= sum;
  254. entropy += log(sum);
  255. }
  256. else
  257. {
  258. fprintf(stderr,
  259. "VectorStatistics: entropy - sum of values is numerically small\n");
  260. }
  261. return entropy;
  262. }
  263. double VectorStatistics::computeSkew(const NICE::Vector &data)
  264. {
  265. double skew = 0.0;
  266. double meanvalue = computeMean(data);
  267. double var = computeVariance(data);
  268. if (data.size() > 0 && var>1e-8)
  269. {
  270. for (size_t i = 0; i < data.size(); ++i)
  271. {
  272. skew += (data[i] - meanvalue) * (data[i] - meanvalue) * (data[i]
  273. - meanvalue);
  274. }
  275. skew *= sqrt(double(data.size())) / pow(var * double(data.size()), 1.5);
  276. }
  277. return skew;
  278. }
  279. double VectorStatistics::computeKurtosis(const NICE::Vector &data)
  280. {
  281. double kurt = 0.0;
  282. double meanvalue = computeMean(data);
  283. double var = computeVariance(data);
  284. if (data.size() > 0 && var>1e-8)
  285. {
  286. for (size_t i = 0; i < data.size(); i++)
  287. {
  288. kurt += pow((data[i] - meanvalue), 4.0);
  289. }
  290. kurt /= double(data.size()) * pow(var, 2.0);
  291. kurt -= 3.0;
  292. }
  293. return kurt;
  294. }
  295. double VectorStatistics::computeMean(const NICE::Vector &data)
  296. {
  297. RunningStat rs;
  298. for (size_t i = 0; i < data.size(); i++)
  299. {
  300. rs.Push(data[i]);
  301. }
  302. return rs.Mean();
  303. }
  304. double VectorStatistics::computeVariance(const NICE::Vector &data)
  305. {
  306. RunningStat rs;
  307. for (size_t i = 0; i < data.size(); i++)
  308. {
  309. rs.Push(data[i]);
  310. }
  311. return rs.Variance();
  312. }
  313. double VectorStatistics::computeStandardDeviation(const NICE::Vector &data)
  314. {
  315. RunningStat rs;
  316. for (size_t i = 0; i < data.size(); i++)
  317. {
  318. rs.Push(data[i]);
  319. }
  320. return rs.StandardDeviation();
  321. }