segment.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See
  5. // accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. #ifndef BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
  9. #define BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
  10. #include <boost/locale/config.hpp>
  11. #ifdef BOOST_MSVC
  12. # pragma warning(push)
  13. # pragma warning(disable : 4275 4251 4231 4660)
  14. #endif
  15. #include <locale>
  16. #include <string>
  17. #include <iosfwd>
  18. #include <iterator>
  19. namespace boost {
  20. namespace locale {
  21. namespace boundary {
  22. /// \cond INTERNAL
  23. namespace details {
  24. template<typename LeftIterator,typename RightIterator>
  25. int compare_text(LeftIterator l_begin,LeftIterator l_end,RightIterator r_begin,RightIterator r_end)
  26. {
  27. typedef LeftIterator left_iterator;
  28. typedef RightIterator right_iterator;
  29. typedef typename std::iterator_traits<left_iterator>::value_type char_type;
  30. typedef std::char_traits<char_type> traits;
  31. while(l_begin!=l_end && r_begin!=r_end) {
  32. char_type lchar = *l_begin++;
  33. char_type rchar = *r_begin++;
  34. if(traits::eq(lchar,rchar))
  35. continue;
  36. if(traits::lt(lchar,rchar))
  37. return -1;
  38. else
  39. return 1;
  40. }
  41. if(l_begin==l_end && r_begin==r_end)
  42. return 0;
  43. if(l_begin==l_end)
  44. return -1;
  45. else
  46. return 1;
  47. }
  48. template<typename Left,typename Right>
  49. int compare_text(Left const &l,Right const &r)
  50. {
  51. return compare_text(l.begin(),l.end(),r.begin(),r.end());
  52. }
  53. template<typename Left,typename Char>
  54. int compare_string(Left const &l,Char const *begin)
  55. {
  56. Char const *end = begin;
  57. while(*end!=0)
  58. end++;
  59. return compare_text(l.begin(),l.end(),begin,end);
  60. }
  61. template<typename Right,typename Char>
  62. int compare_string(Char const *begin,Right const &r)
  63. {
  64. Char const *end = begin;
  65. while(*end!=0)
  66. end++;
  67. return compare_text(begin,end,r.begin(),r.end());
  68. }
  69. }
  70. /// \endcond
  71. ///
  72. /// \addtogroup boundary
  73. /// @{
  74. ///
  75. /// \brief a segment object that represents a pair of two iterators that define the range where
  76. /// this segment exits and a rule that defines it.
  77. ///
  78. /// This type of object is dereferenced by the iterators of segment_index. Using a rule() member function
  79. /// you can get a specific rule this segment was selected with. For example, when you use
  80. /// word boundary analysis, you can check if the specific word contains Kana letters by checking (rule() & \ref word_kana)!=0
  81. /// For a sentence analysis you can check if the sentence is selected because a sentence terminator is found (\ref sentence_term) or
  82. /// there is a line break (\ref sentence_sep).
  83. ///
  84. /// This object can be automatically converted to std::basic_string with the same type of character. It is also
  85. /// valid range that has begin() and end() member functions returning iterators on the location of the segment.
  86. ///
  87. /// \see
  88. ///
  89. /// - \ref segment_index
  90. /// - \ref boundary_point
  91. /// - \ref boundary_point_index
  92. ///
  93. template<typename IteratorType>
  94. class segment : public std::pair<IteratorType,IteratorType> {
  95. public:
  96. ///
  97. /// The type of the underlying character
  98. ///
  99. typedef typename std::iterator_traits<IteratorType>::value_type char_type;
  100. ///
  101. /// The type of the string it is converted to
  102. ///
  103. typedef std::basic_string<char_type> string_type;
  104. ///
  105. /// The value that iterators return - the character itself
  106. ///
  107. typedef char_type value_type;
  108. ///
  109. /// The iterator that allows to iterate the range
  110. ///
  111. typedef IteratorType iterator;
  112. ///
  113. /// The iterator that allows to iterate the range
  114. ///
  115. typedef IteratorType const_iterator;
  116. ///
  117. /// The type that represent a difference between two iterators
  118. ///
  119. typedef typename std::iterator_traits<IteratorType>::difference_type difference_type;
  120. ///
  121. /// Default constructor
  122. ///
  123. segment() {}
  124. ///
  125. /// Create a segment using two iterators and a rule that represents this point
  126. ///
  127. segment(iterator b,iterator e,rule_type r) :
  128. std::pair<IteratorType,IteratorType>(b,e),
  129. rule_(r)
  130. {
  131. }
  132. ///
  133. /// Set the start of the range
  134. ///
  135. void begin(iterator const &v)
  136. {
  137. this->first = v;
  138. }
  139. ///
  140. /// Set the end of the range
  141. ///
  142. void end(iterator const &v)
  143. {
  144. this->second = v;
  145. }
  146. ///
  147. /// Get the start of the range
  148. ///
  149. IteratorType begin() const
  150. {
  151. return this->first;
  152. }
  153. ///
  154. /// Set the end of the range
  155. ///
  156. IteratorType end() const
  157. {
  158. return this->second;
  159. }
  160. ///
  161. /// Convert the range to a string automatically
  162. ///
  163. template <class T, class A>
  164. operator std::basic_string<char_type, T, A> ()const
  165. {
  166. return std::basic_string<char_type, T, A>(this->first, this->second);
  167. }
  168. ///
  169. /// Create a string from the range explicitly
  170. ///
  171. string_type str() const
  172. {
  173. return string_type(begin(),end());
  174. }
  175. ///
  176. /// Get the length of the text chunk
  177. ///
  178. size_t length() const
  179. {
  180. return std::distance(begin(),end());
  181. }
  182. ///
  183. /// Check if the segment is empty
  184. ///
  185. bool empty() const
  186. {
  187. return begin() == end();
  188. }
  189. ///
  190. /// Get the rule that is used for selection of this segment.
  191. ///
  192. rule_type rule() const
  193. {
  194. return rule_;
  195. }
  196. ///
  197. /// Set a rule that is used for segment selection
  198. ///
  199. void rule(rule_type r)
  200. {
  201. rule_ = r;
  202. }
  203. // make sure we override std::pair's operator==
  204. /// Compare two segments
  205. bool operator==(segment const &other)
  206. {
  207. return details::compare_text(*this,other) == 0;
  208. }
  209. /// Compare two segments
  210. bool operator!=(segment const &other)
  211. {
  212. return details::compare_text(*this,other) != 0;
  213. }
  214. private:
  215. rule_type rule_;
  216. };
  217. /// Compare two segments
  218. template<typename IteratorL,typename IteratorR>
  219. bool operator==(segment<IteratorL> const &l,segment<IteratorR> const &r)
  220. {
  221. return details::compare_text(l,r) == 0;
  222. }
  223. /// Compare two segments
  224. template<typename IteratorL,typename IteratorR>
  225. bool operator!=(segment<IteratorL> const &l,segment<IteratorR> const &r)
  226. {
  227. return details::compare_text(l,r) != 0;
  228. }
  229. /// Compare two segments
  230. template<typename IteratorL,typename IteratorR>
  231. bool operator<(segment<IteratorL> const &l,segment<IteratorR> const &r)
  232. {
  233. return details::compare_text(l,r) < 0;
  234. }
  235. /// Compare two segments
  236. template<typename IteratorL,typename IteratorR>
  237. bool operator<=(segment<IteratorL> const &l,segment<IteratorR> const &r)
  238. {
  239. return details::compare_text(l,r) <= 0;
  240. }
  241. /// Compare two segments
  242. template<typename IteratorL,typename IteratorR>
  243. bool operator>(segment<IteratorL> const &l,segment<IteratorR> const &r)
  244. {
  245. return details::compare_text(l,r) > 0;
  246. }
  247. /// Compare two segments
  248. template<typename IteratorL,typename IteratorR>
  249. bool operator>=(segment<IteratorL> const &l,segment<IteratorR> const &r)
  250. {
  251. return details::compare_text(l,r) >= 0;
  252. }
  253. /// Compare string and segment
  254. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  255. bool operator==(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  256. {
  257. return details::compare_text(l,r) == 0;
  258. }
  259. /// Compare string and segment
  260. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  261. bool operator!=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  262. {
  263. return details::compare_text(l,r) != 0;
  264. }
  265. /// Compare string and segment
  266. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  267. bool operator<(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  268. {
  269. return details::compare_text(l,r) < 0;
  270. }
  271. /// Compare string and segment
  272. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  273. bool operator<=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  274. {
  275. return details::compare_text(l,r) <= 0;
  276. }
  277. /// Compare string and segment
  278. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  279. bool operator>(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  280. {
  281. return details::compare_text(l,r) > 0;
  282. }
  283. /// Compare string and segment
  284. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  285. bool operator>=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  286. {
  287. return details::compare_text(l,r) >= 0;
  288. }
  289. /// Compare string and segment
  290. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  291. bool operator==(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  292. {
  293. return details::compare_text(l,r) == 0;
  294. }
  295. /// Compare string and segment
  296. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  297. bool operator!=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  298. {
  299. return details::compare_text(l,r) != 0;
  300. }
  301. /// Compare string and segment
  302. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  303. bool operator<(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  304. {
  305. return details::compare_text(l,r) < 0;
  306. }
  307. /// Compare string and segment
  308. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  309. bool operator<=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  310. {
  311. return details::compare_text(l,r) <= 0;
  312. }
  313. /// Compare string and segment
  314. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  315. bool operator>(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  316. {
  317. return details::compare_text(l,r) > 0;
  318. }
  319. /// Compare string and segment
  320. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  321. bool operator>=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  322. {
  323. return details::compare_text(l,r) >= 0;
  324. }
  325. /// Compare C string and segment
  326. template<typename CharType,typename IteratorR>
  327. bool operator==(CharType const *l,segment<IteratorR> const &r)
  328. {
  329. return details::compare_string(l,r) == 0;
  330. }
  331. /// Compare C string and segment
  332. template<typename CharType,typename IteratorR>
  333. bool operator!=(CharType const *l,segment<IteratorR> const &r)
  334. {
  335. return details::compare_string(l,r) != 0;
  336. }
  337. /// Compare C string and segment
  338. template<typename CharType,typename IteratorR>
  339. bool operator<(CharType const *l,segment<IteratorR> const &r)
  340. {
  341. return details::compare_string(l,r) < 0;
  342. }
  343. /// Compare C string and segment
  344. template<typename CharType,typename IteratorR>
  345. bool operator<=(CharType const *l,segment<IteratorR> const &r)
  346. {
  347. return details::compare_string(l,r) <= 0;
  348. }
  349. /// Compare C string and segment
  350. template<typename CharType,typename IteratorR>
  351. bool operator>(CharType const *l,segment<IteratorR> const &r)
  352. {
  353. return details::compare_string(l,r) > 0;
  354. }
  355. /// Compare C string and segment
  356. template<typename CharType,typename IteratorR>
  357. bool operator>=(CharType const *l,segment<IteratorR> const &r)
  358. {
  359. return details::compare_string(l,r) >= 0;
  360. }
  361. /// Compare C string and segment
  362. template<typename Iterator,typename CharType>
  363. bool operator==(segment<Iterator> const &l,CharType const *r)
  364. {
  365. return details::compare_string(l,r) == 0;
  366. }
  367. /// Compare C string and segment
  368. template<typename Iterator,typename CharType>
  369. bool operator!=(segment<Iterator> const &l,CharType const *r)
  370. {
  371. return details::compare_string(l,r) != 0;
  372. }
  373. /// Compare C string and segment
  374. template<typename Iterator,typename CharType>
  375. bool operator<(segment<Iterator> const &l,CharType const *r)
  376. {
  377. return details::compare_string(l,r) < 0;
  378. }
  379. /// Compare C string and segment
  380. template<typename Iterator,typename CharType>
  381. bool operator<=(segment<Iterator> const &l,CharType const *r)
  382. {
  383. return details::compare_string(l,r) <= 0;
  384. }
  385. /// Compare C string and segment
  386. template<typename Iterator,typename CharType>
  387. bool operator>(segment<Iterator> const &l,CharType const *r)
  388. {
  389. return details::compare_string(l,r) > 0;
  390. }
  391. /// Compare C string and segment
  392. template<typename Iterator,typename CharType>
  393. bool operator>=(segment<Iterator> const &l,CharType const *r)
  394. {
  395. return details::compare_string(l,r) >= 0;
  396. }
  397. typedef segment<std::string::const_iterator> ssegment; ///< convenience typedef
  398. typedef segment<std::wstring::const_iterator> wssegment; ///< convenience typedef
  399. #ifdef BOOST_HAS_CHAR16_T
  400. typedef segment<std::u16string::const_iterator> u16ssegment;///< convenience typedef
  401. #endif
  402. #ifdef BOOST_HAS_CHAR32_T
  403. typedef segment<std::u32string::const_iterator> u32ssegment;///< convenience typedef
  404. #endif
  405. typedef segment<char const *> csegment; ///< convenience typedef
  406. typedef segment<wchar_t const *> wcsegment; ///< convenience typedef
  407. #ifdef BOOST_HAS_CHAR16_T
  408. typedef segment<char16_t const *> u16csegment; ///< convenience typedef
  409. #endif
  410. #ifdef BOOST_HAS_CHAR32_T
  411. typedef segment<char32_t const *> u32csegment; ///< convenience typedef
  412. #endif
  413. ///
  414. /// Write the segment to the stream character by character
  415. ///
  416. template<typename CharType,typename TraitsType,typename Iterator>
  417. std::basic_ostream<CharType,TraitsType> &operator<<(
  418. std::basic_ostream<CharType,TraitsType> &out,
  419. segment<Iterator> const &tok)
  420. {
  421. for(Iterator p=tok.begin(),e=tok.end();p!=e;++p)
  422. out << *p;
  423. return out;
  424. }
  425. /// @}
  426. } // boundary
  427. } // locale
  428. } // boost
  429. #ifdef BOOST_MSVC
  430. #pragma warning(pop)
  431. #endif
  432. #endif
  433. // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4