weighted_median.hpp 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // weighted_median.hpp
  3. //
  4. // Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005
  8. #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005
  9. #include <boost/mpl/placeholders.hpp>
  10. #include <boost/range/iterator_range.hpp>
  11. #include <boost/accumulators/framework/accumulator_base.hpp>
  12. #include <boost/accumulators/framework/extractor.hpp>
  13. #include <boost/accumulators/numeric/functional.hpp>
  14. #include <boost/accumulators/framework/parameters/sample.hpp>
  15. #include <boost/accumulators/framework/depends_on.hpp>
  16. #include <boost/accumulators/statistics_fwd.hpp>
  17. #include <boost/accumulators/statistics/count.hpp>
  18. #include <boost/accumulators/statistics/median.hpp>
  19. #include <boost/accumulators/statistics/weighted_p_square_quantile.hpp>
  20. #include <boost/accumulators/statistics/weighted_density.hpp>
  21. #include <boost/accumulators/statistics/weighted_p_square_cumul_dist.hpp>
  22. namespace boost { namespace accumulators
  23. {
  24. namespace impl
  25. {
  26. ///////////////////////////////////////////////////////////////////////////////
  27. // weighted_median_impl
  28. //
  29. /**
  30. @brief Median estimation for weighted samples based on the \f$P^2\f$ quantile estimator
  31. The \f$P^2\f$ algorithm for weighted samples is invoked with a quantile probability of 0.5.
  32. */
  33. template<typename Sample>
  34. struct weighted_median_impl
  35. : accumulator_base
  36. {
  37. // for boost::result_of
  38. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type;
  39. weighted_median_impl(dont_care) {}
  40. template<typename Args>
  41. result_type result(Args const &args) const
  42. {
  43. return weighted_p_square_quantile_for_median(args);
  44. }
  45. };
  46. ///////////////////////////////////////////////////////////////////////////////
  47. // with_density_weighted_median_impl
  48. //
  49. /**
  50. @brief Median estimation for weighted samples based on the density estimator
  51. The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being
  52. the total number of samples. It returns the approximate horizontal position of this sample,
  53. based on a linear interpolation inside the bin.
  54. */
  55. template<typename Sample>
  56. struct with_density_weighted_median_impl
  57. : accumulator_base
  58. {
  59. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
  60. typedef std::vector<std::pair<float_type, float_type> > histogram_type;
  61. typedef iterator_range<typename histogram_type::iterator> range_type;
  62. // for boost::result_of
  63. typedef float_type result_type;
  64. template<typename Args>
  65. with_density_weighted_median_impl(Args const &args)
  66. : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
  67. , is_dirty(true)
  68. {
  69. }
  70. void operator ()(dont_care)
  71. {
  72. this->is_dirty = true;
  73. }
  74. template<typename Args>
  75. result_type result(Args const &args) const
  76. {
  77. if (this->is_dirty)
  78. {
  79. this->is_dirty = false;
  80. std::size_t cnt = count(args);
  81. range_type histogram = weighted_density(args);
  82. typename range_type::iterator it = histogram.begin();
  83. while (this->sum < 0.5 * cnt)
  84. {
  85. this->sum += it->second * cnt;
  86. ++it;
  87. }
  88. --it;
  89. float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt);
  90. this->median = it->first * over + (it + 1)->first * ( 1. - over );
  91. }
  92. return this->median;
  93. }
  94. private:
  95. mutable float_type sum;
  96. mutable bool is_dirty;
  97. mutable float_type median;
  98. };
  99. ///////////////////////////////////////////////////////////////////////////////
  100. // with_p_square_cumulative_distribution_weighted_median_impl
  101. //
  102. /**
  103. @brief Median estimation for weighted samples based on the \f$P^2\f$ cumulative distribution estimator
  104. The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It
  105. returns the approximate horizontal position of where the cumulative distribution
  106. equals 0.5, based on a linear interpolation inside the bin.
  107. */
  108. template<typename Sample, typename Weight>
  109. struct with_p_square_cumulative_distribution_weighted_median_impl
  110. : accumulator_base
  111. {
  112. typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
  113. typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
  114. typedef std::vector<std::pair<float_type, float_type> > histogram_type;
  115. typedef iterator_range<typename histogram_type::iterator> range_type;
  116. // for boost::result_of
  117. typedef float_type result_type;
  118. with_p_square_cumulative_distribution_weighted_median_impl(dont_care)
  119. : is_dirty(true)
  120. {
  121. }
  122. void operator ()(dont_care)
  123. {
  124. this->is_dirty = true;
  125. }
  126. template<typename Args>
  127. result_type result(Args const &args) const
  128. {
  129. if (this->is_dirty)
  130. {
  131. this->is_dirty = false;
  132. range_type histogram = weighted_p_square_cumulative_distribution(args);
  133. typename range_type::iterator it = histogram.begin();
  134. while (it->second < 0.5)
  135. {
  136. ++it;
  137. }
  138. float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second);
  139. this->median = it->first * over + (it + 1)->first * ( 1. - over );
  140. }
  141. return this->median;
  142. }
  143. private:
  144. mutable bool is_dirty;
  145. mutable float_type median;
  146. };
  147. } // namespace impl
  148. ///////////////////////////////////////////////////////////////////////////////
  149. // tag::weighted_median
  150. // tag::with_density_weighted_median
  151. // tag::with_p_square_cumulative_distribution_weighted_median
  152. //
  153. namespace tag
  154. {
  155. struct weighted_median
  156. : depends_on<weighted_p_square_quantile_for_median>
  157. {
  158. /// INTERNAL ONLY
  159. ///
  160. typedef accumulators::impl::weighted_median_impl<mpl::_1> impl;
  161. };
  162. struct with_density_weighted_median
  163. : depends_on<count, weighted_density>
  164. {
  165. /// INTERNAL ONLY
  166. ///
  167. typedef accumulators::impl::with_density_weighted_median_impl<mpl::_1> impl;
  168. };
  169. struct with_p_square_cumulative_distribution_weighted_median
  170. : depends_on<weighted_p_square_cumulative_distribution>
  171. {
  172. /// INTERNAL ONLY
  173. ///
  174. typedef accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl<mpl::_1, mpl::_2> impl;
  175. };
  176. }
  177. ///////////////////////////////////////////////////////////////////////////////
  178. // extract::weighted_median
  179. //
  180. namespace extract
  181. {
  182. extractor<tag::median> const weighted_median = {};
  183. BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_median)
  184. }
  185. using extract::weighted_median;
  186. // weighted_median(with_p_square_quantile) -> weighted_median
  187. template<>
  188. struct as_feature<tag::weighted_median(with_p_square_quantile)>
  189. {
  190. typedef tag::weighted_median type;
  191. };
  192. // weighted_median(with_density) -> with_density_weighted_median
  193. template<>
  194. struct as_feature<tag::weighted_median(with_density)>
  195. {
  196. typedef tag::with_density_weighted_median type;
  197. };
  198. // weighted_median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_weighted_median
  199. template<>
  200. struct as_feature<tag::weighted_median(with_p_square_cumulative_distribution)>
  201. {
  202. typedef tag::with_p_square_cumulative_distribution_weighted_median type;
  203. };
  204. }} // namespace boost::accumulators
  205. #endif