extended_p_square.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // extended_p_square.hpp
  3. //
  4. // Copyright 2005 Daniel Egloff. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006
  8. #define BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006
  9. #include <vector>
  10. #include <functional>
  11. #include <boost/range/begin.hpp>
  12. #include <boost/range/end.hpp>
  13. #include <boost/range/iterator_range.hpp>
  14. #include <boost/iterator/transform_iterator.hpp>
  15. #include <boost/iterator/counting_iterator.hpp>
  16. #include <boost/iterator/permutation_iterator.hpp>
  17. #include <boost/parameter/keyword.hpp>
  18. #include <boost/mpl/placeholders.hpp>
  19. #include <boost/accumulators/accumulators_fwd.hpp>
  20. #include <boost/accumulators/framework/extractor.hpp>
  21. #include <boost/accumulators/numeric/functional.hpp>
  22. #include <boost/accumulators/framework/parameters/sample.hpp>
  23. #include <boost/accumulators/framework/depends_on.hpp>
  24. #include <boost/accumulators/statistics_fwd.hpp>
  25. #include <boost/accumulators/statistics/count.hpp>
  26. #include <boost/accumulators/statistics/times2_iterator.hpp>
  27. namespace boost { namespace accumulators
  28. {
  29. ///////////////////////////////////////////////////////////////////////////////
  30. // probabilities named parameter
  31. //
  32. BOOST_PARAMETER_NESTED_KEYWORD(tag, extended_p_square_probabilities, probabilities)
  33. BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square_probabilities)
  34. namespace impl
  35. {
  36. ///////////////////////////////////////////////////////////////////////////////
  37. // extended_p_square_impl
  38. // multiple quantile estimation
  39. /**
  40. @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm
  41. Extended \f$P^2\f$ algorithm for estimation of several quantiles without storing samples.
  42. Assume that \f$m\f$ quantiles \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated.
  43. Instead of storing the whole sample cumulative distribution, the algorithm maintains only
  44. \f$m+2\f$ principal markers and \f$m+1\f$ middle markers, whose positions are updated
  45. with each sample and whose heights are adjusted (if necessary) using a piecewise-parablic
  46. formula. The heights of these central markers are the current estimates of the quantiles
  47. and returned as an iterator range.
  48. For further details, see
  49. K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49,
  50. Number 4 (October), 1986, p. 159-164.
  51. The extended \f$ P^2 \f$ algorithm generalizes the \f$ P^2 \f$ algorithm of
  52. R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
  53. histograms without storing observations, Communications of the ACM,
  54. Volume 28 (October), Number 10, 1985, p. 1076-1085.
  55. @param extended_p_square_probabilities A vector of quantile probabilities.
  56. */
  57. template<typename Sample>
  58. struct extended_p_square_impl
  59. : accumulator_base
  60. {
  61. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
  62. typedef std::vector<float_type> array_type;
  63. // for boost::result_of
  64. typedef iterator_range<
  65. detail::lvalue_index_iterator<
  66. permutation_iterator<
  67. typename array_type::const_iterator
  68. , detail::times2_iterator
  69. >
  70. >
  71. > result_type;
  72. template<typename Args>
  73. extended_p_square_impl(Args const &args)
  74. : probabilities(
  75. boost::begin(args[extended_p_square_probabilities])
  76. , boost::end(args[extended_p_square_probabilities])
  77. )
  78. , heights(2 * probabilities.size() + 3)
  79. , actual_positions(heights.size())
  80. , desired_positions(heights.size())
  81. , positions_increments(heights.size())
  82. {
  83. std::size_t num_quantiles = this->probabilities.size();
  84. std::size_t num_markers = this->heights.size();
  85. for(std::size_t i = 0; i < num_markers; ++i)
  86. {
  87. this->actual_positions[i] = i + 1;
  88. }
  89. this->positions_increments[0] = 0.;
  90. this->positions_increments[num_markers - 1] = 1.;
  91. for(std::size_t i = 0; i < num_quantiles; ++i)
  92. {
  93. this->positions_increments[2 * i + 2] = probabilities[i];
  94. }
  95. for(std::size_t i = 0; i <= num_quantiles; ++i)
  96. {
  97. this->positions_increments[2 * i + 1] =
  98. 0.5 * (this->positions_increments[2 * i] + this->positions_increments[2 * i + 2]);
  99. }
  100. for(std::size_t i = 0; i < num_markers; ++i)
  101. {
  102. this->desired_positions[i] = 1. + 2. * (num_quantiles + 1.) * this->positions_increments[i];
  103. }
  104. }
  105. template<typename Args>
  106. void operator ()(Args const &args)
  107. {
  108. std::size_t cnt = count(args);
  109. // m+2 principal markers and m+1 middle markers
  110. std::size_t num_markers = 2 * this->probabilities.size() + 3;
  111. // first accumulate num_markers samples
  112. if(cnt <= num_markers)
  113. {
  114. this->heights[cnt - 1] = args[sample];
  115. // complete the initialization of heights by sorting
  116. if(cnt == num_markers)
  117. {
  118. std::sort(this->heights.begin(), this->heights.end());
  119. }
  120. }
  121. else
  122. {
  123. std::size_t sample_cell = 1;
  124. // find cell k = sample_cell such that heights[k-1] <= sample < heights[k]
  125. if(args[sample] < this->heights[0])
  126. {
  127. this->heights[0] = args[sample];
  128. sample_cell = 1;
  129. }
  130. else if(args[sample] >= this->heights[num_markers - 1])
  131. {
  132. this->heights[num_markers - 1] = args[sample];
  133. sample_cell = num_markers - 1;
  134. }
  135. else
  136. {
  137. typedef typename array_type::iterator iterator;
  138. iterator it = std::upper_bound(
  139. this->heights.begin()
  140. , this->heights.end()
  141. , args[sample]
  142. );
  143. sample_cell = std::distance(this->heights.begin(), it);
  144. }
  145. // update actual positions of all markers above sample_cell index
  146. for(std::size_t i = sample_cell; i < num_markers; ++i)
  147. {
  148. ++this->actual_positions[i];
  149. }
  150. // update desired positions of all markers
  151. for(std::size_t i = 0; i < num_markers; ++i)
  152. {
  153. this->desired_positions[i] += this->positions_increments[i];
  154. }
  155. // adjust heights and actual positions of markers 1 to num_markers-2 if necessary
  156. for(std::size_t i = 1; i <= num_markers - 2; ++i)
  157. {
  158. // offset to desired position
  159. float_type d = this->desired_positions[i] - this->actual_positions[i];
  160. // offset to next position
  161. float_type dp = this->actual_positions[i+1] - this->actual_positions[i];
  162. // offset to previous position
  163. float_type dm = this->actual_positions[i-1] - this->actual_positions[i];
  164. // height ds
  165. float_type hp = (this->heights[i+1] - this->heights[i]) / dp;
  166. float_type hm = (this->heights[i-1] - this->heights[i]) / dm;
  167. if((d >= 1 && dp > 1) || (d <= -1 && dm < -1))
  168. {
  169. short sign_d = static_cast<short>(d / std::abs(d));
  170. float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp
  171. + (dp - sign_d) * hm);
  172. // try adjusting heights[i] using p-squared formula
  173. if(this->heights[i - 1] < h && h < this->heights[i + 1])
  174. {
  175. this->heights[i] = h;
  176. }
  177. else
  178. {
  179. // use linear formula
  180. if(d > 0)
  181. {
  182. this->heights[i] += hp;
  183. }
  184. if(d < 0)
  185. {
  186. this->heights[i] -= hm;
  187. }
  188. }
  189. this->actual_positions[i] += sign_d;
  190. }
  191. }
  192. }
  193. }
  194. result_type result(dont_care) const
  195. {
  196. // for i in [1,probabilities.size()], return heights[i * 2]
  197. detail::times2_iterator idx_begin = detail::make_times2_iterator(1);
  198. detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1);
  199. return result_type(
  200. make_permutation_iterator(this->heights.begin(), idx_begin)
  201. , make_permutation_iterator(this->heights.begin(), idx_end)
  202. );
  203. }
  204. private:
  205. array_type probabilities; // the quantile probabilities
  206. array_type heights; // q_i
  207. array_type actual_positions; // n_i
  208. array_type desired_positions; // d_i
  209. array_type positions_increments; // f_i
  210. };
  211. } // namespace impl
  212. ///////////////////////////////////////////////////////////////////////////////
  213. // tag::extended_p_square
  214. //
  215. namespace tag
  216. {
  217. struct extended_p_square
  218. : depends_on<count>
  219. , extended_p_square_probabilities
  220. {
  221. typedef accumulators::impl::extended_p_square_impl<mpl::_1> impl;
  222. #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
  223. /// tag::extended_p_square::probabilities named parameter
  224. static boost::parameter::keyword<tag::probabilities> const probabilities;
  225. #endif
  226. };
  227. }
  228. ///////////////////////////////////////////////////////////////////////////////
  229. // extract::extended_p_square
  230. //
  231. namespace extract
  232. {
  233. extractor<tag::extended_p_square> const extended_p_square = {};
  234. BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square)
  235. }
  236. using extract::extended_p_square;
  237. // So that extended_p_square can be automatically substituted with
  238. // weighted_extended_p_square when the weight parameter is non-void
  239. template<>
  240. struct as_weighted_feature<tag::extended_p_square>
  241. {
  242. typedef tag::weighted_extended_p_square type;
  243. };
  244. template<>
  245. struct feature_of<tag::weighted_extended_p_square>
  246. : feature_of<tag::extended_p_square>
  247. {
  248. };
  249. }} // namespace boost::accumulators
  250. #endif