cpp_regex_traits.hpp 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099
  1. /*
  2. *
  3. * Copyright (c) 2004 John Maddock
  4. * Copyright 2011 Garmin Ltd. or its subsidiaries
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE cpp_regex_traits.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares regular expression traits class cpp_regex_traits.
  16. */
  17. #ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
  18. #define BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
  19. #include <boost/config.hpp>
  20. #include <boost/integer.hpp>
  21. #ifndef BOOST_NO_STD_LOCALE
  22. #ifndef BOOST_RE_PAT_EXCEPT_HPP
  23. #include <boost/regex/pattern_except.hpp>
  24. #endif
  25. #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  26. #include <boost/regex/v4/regex_traits_defaults.hpp>
  27. #endif
  28. #ifdef BOOST_HAS_THREADS
  29. #include <boost/regex/pending/static_mutex.hpp>
  30. #endif
  31. #ifndef BOOST_REGEX_PRIMARY_TRANSFORM
  32. #include <boost/regex/v4/primary_transform.hpp>
  33. #endif
  34. #ifndef BOOST_REGEX_OBJECT_CACHE_HPP
  35. #include <boost/regex/pending/object_cache.hpp>
  36. #endif
  37. #include <istream>
  38. #include <ios>
  39. #include <climits>
  40. #ifdef BOOST_MSVC
  41. #pragma warning(push)
  42. #pragma warning(disable: 4103)
  43. #endif
  44. #ifdef BOOST_HAS_ABI_HEADERS
  45. # include BOOST_ABI_PREFIX
  46. #endif
  47. #ifdef BOOST_MSVC
  48. #pragma warning(pop)
  49. #endif
  50. #ifdef BOOST_MSVC
  51. #pragma warning(push)
  52. #pragma warning(disable:4786 4251)
  53. #endif
  54. namespace boost{
  55. //
  56. // forward declaration is needed by some compilers:
  57. //
  58. template <class charT>
  59. class cpp_regex_traits;
  60. namespace re_detail{
  61. //
  62. // class parser_buf:
  63. // acts as a stream buffer which wraps around a pair of pointers:
  64. //
  65. template <class charT,
  66. class traits = ::std::char_traits<charT> >
  67. class parser_buf : public ::std::basic_streambuf<charT, traits>
  68. {
  69. typedef ::std::basic_streambuf<charT, traits> base_type;
  70. typedef typename base_type::int_type int_type;
  71. typedef typename base_type::char_type char_type;
  72. typedef typename base_type::pos_type pos_type;
  73. typedef ::std::streamsize streamsize;
  74. typedef typename base_type::off_type off_type;
  75. public:
  76. parser_buf() : base_type() { setbuf(0, 0); }
  77. const charT* getnext() { return this->gptr(); }
  78. protected:
  79. std::basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n);
  80. typename parser_buf<charT, traits>::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which);
  81. typename parser_buf<charT, traits>::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which);
  82. private:
  83. parser_buf& operator=(const parser_buf&);
  84. parser_buf(const parser_buf&);
  85. };
  86. template<class charT, class traits>
  87. std::basic_streambuf<charT, traits>*
  88. parser_buf<charT, traits>::setbuf(char_type* s, streamsize n)
  89. {
  90. this->setg(s, s, s + n);
  91. return this;
  92. }
  93. template<class charT, class traits>
  94. typename parser_buf<charT, traits>::pos_type
  95. parser_buf<charT, traits>::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which)
  96. {
  97. typedef typename boost::int_t<sizeof(way) * CHAR_BIT>::least cast_type;
  98. if(which & ::std::ios_base::out)
  99. return pos_type(off_type(-1));
  100. std::ptrdiff_t size = this->egptr() - this->eback();
  101. std::ptrdiff_t pos = this->gptr() - this->eback();
  102. charT* g = this->eback();
  103. switch(static_cast<cast_type>(way))
  104. {
  105. case ::std::ios_base::beg:
  106. if((off < 0) || (off > size))
  107. return pos_type(off_type(-1));
  108. else
  109. this->setg(g, g + off, g + size);
  110. break;
  111. case ::std::ios_base::end:
  112. if((off < 0) || (off > size))
  113. return pos_type(off_type(-1));
  114. else
  115. this->setg(g, g + size - off, g + size);
  116. break;
  117. case ::std::ios_base::cur:
  118. {
  119. std::ptrdiff_t newpos = static_cast<std::ptrdiff_t>(pos + off);
  120. if((newpos < 0) || (newpos > size))
  121. return pos_type(off_type(-1));
  122. else
  123. this->setg(g, g + newpos, g + size);
  124. break;
  125. }
  126. default: ;
  127. }
  128. #ifdef BOOST_MSVC
  129. #pragma warning(push)
  130. #pragma warning(disable:4244)
  131. #endif
  132. return static_cast<pos_type>(this->gptr() - this->eback());
  133. #ifdef BOOST_MSVC
  134. #pragma warning(pop)
  135. #endif
  136. }
  137. template<class charT, class traits>
  138. typename parser_buf<charT, traits>::pos_type
  139. parser_buf<charT, traits>::seekpos(pos_type sp, ::std::ios_base::openmode which)
  140. {
  141. if(which & ::std::ios_base::out)
  142. return pos_type(off_type(-1));
  143. off_type size = static_cast<off_type>(this->egptr() - this->eback());
  144. charT* g = this->eback();
  145. if(off_type(sp) <= size)
  146. {
  147. this->setg(g, g + off_type(sp), g + size);
  148. }
  149. return pos_type(off_type(-1));
  150. }
  151. //
  152. // class cpp_regex_traits_base:
  153. // acts as a container for locale and the facets we are using.
  154. //
  155. template <class charT>
  156. struct cpp_regex_traits_base
  157. {
  158. cpp_regex_traits_base(const std::locale& l)
  159. { imbue(l); }
  160. std::locale imbue(const std::locale& l);
  161. std::locale m_locale;
  162. std::ctype<charT> const* m_pctype;
  163. #ifndef BOOST_NO_STD_MESSAGES
  164. std::messages<charT> const* m_pmessages;
  165. #endif
  166. std::collate<charT> const* m_pcollate;
  167. bool operator<(const cpp_regex_traits_base& b)const
  168. {
  169. if(m_pctype == b.m_pctype)
  170. {
  171. #ifndef BOOST_NO_STD_MESSAGES
  172. if(m_pmessages == b.m_pmessages)
  173. {
  174. return m_pcollate < b.m_pcollate;
  175. }
  176. return m_pmessages < b.m_pmessages;
  177. #else
  178. return m_pcollate < b.m_pcollate;
  179. #endif
  180. }
  181. return m_pctype < b.m_pctype;
  182. }
  183. bool operator==(const cpp_regex_traits_base& b)const
  184. {
  185. return (m_pctype == b.m_pctype)
  186. #ifndef BOOST_NO_STD_MESSAGES
  187. && (m_pmessages == b.m_pmessages)
  188. #endif
  189. && (m_pcollate == b.m_pcollate);
  190. }
  191. };
  192. template <class charT>
  193. std::locale cpp_regex_traits_base<charT>::imbue(const std::locale& l)
  194. {
  195. std::locale result(m_locale);
  196. m_locale = l;
  197. m_pctype = &BOOST_USE_FACET(std::ctype<charT>, l);
  198. #ifndef BOOST_NO_STD_MESSAGES
  199. m_pmessages = BOOST_HAS_FACET(std::messages<charT>, l) ? &BOOST_USE_FACET(std::messages<charT>, l) : 0;
  200. #endif
  201. m_pcollate = &BOOST_USE_FACET(std::collate<charT>, l);
  202. return result;
  203. }
  204. //
  205. // class cpp_regex_traits_char_layer:
  206. // implements methods that require specialisation for narrow characters:
  207. //
  208. template <class charT>
  209. class cpp_regex_traits_char_layer : public cpp_regex_traits_base<charT>
  210. {
  211. typedef std::basic_string<charT> string_type;
  212. typedef std::map<charT, regex_constants::syntax_type> map_type;
  213. typedef typename map_type::const_iterator map_iterator_type;
  214. public:
  215. cpp_regex_traits_char_layer(const std::locale& l)
  216. : cpp_regex_traits_base<charT>(l)
  217. {
  218. init();
  219. }
  220. cpp_regex_traits_char_layer(const cpp_regex_traits_base<charT>& b)
  221. : cpp_regex_traits_base<charT>(b)
  222. {
  223. init();
  224. }
  225. void init();
  226. regex_constants::syntax_type syntax_type(charT c)const
  227. {
  228. map_iterator_type i = m_char_map.find(c);
  229. return ((i == m_char_map.end()) ? 0 : i->second);
  230. }
  231. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  232. {
  233. map_iterator_type i = m_char_map.find(c);
  234. if(i == m_char_map.end())
  235. {
  236. if(this->m_pctype->is(std::ctype_base::lower, c)) return regex_constants::escape_type_class;
  237. if(this->m_pctype->is(std::ctype_base::upper, c)) return regex_constants::escape_type_not_class;
  238. return 0;
  239. }
  240. return i->second;
  241. }
  242. private:
  243. string_type get_default_message(regex_constants::syntax_type);
  244. // TODO: use a hash table when available!
  245. map_type m_char_map;
  246. };
  247. template <class charT>
  248. void cpp_regex_traits_char_layer<charT>::init()
  249. {
  250. // we need to start by initialising our syntax map so we know which
  251. // character is used for which purpose:
  252. #ifndef BOOST_NO_STD_MESSAGES
  253. #ifndef __IBMCPP__
  254. typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  255. #else
  256. typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  257. #endif
  258. std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
  259. if(cat_name.size() && (this->m_pmessages != 0))
  260. {
  261. cat = this->m_pmessages->open(
  262. cat_name,
  263. this->m_locale);
  264. if((int)cat < 0)
  265. {
  266. std::string m("Unable to open message catalog: ");
  267. std::runtime_error err(m + cat_name);
  268. boost::re_detail::raise_runtime_error(err);
  269. }
  270. }
  271. //
  272. // if we have a valid catalog then load our messages:
  273. //
  274. if((int)cat >= 0)
  275. {
  276. #ifndef BOOST_NO_EXCEPTIONS
  277. try{
  278. #endif
  279. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  280. {
  281. string_type mss = this->m_pmessages->get(cat, 0, i, get_default_message(i));
  282. for(typename string_type::size_type j = 0; j < mss.size(); ++j)
  283. {
  284. m_char_map[mss[j]] = i;
  285. }
  286. }
  287. this->m_pmessages->close(cat);
  288. #ifndef BOOST_NO_EXCEPTIONS
  289. }
  290. catch(...)
  291. {
  292. if(this->m_pmessages)
  293. this->m_pmessages->close(cat);
  294. throw;
  295. }
  296. #endif
  297. }
  298. else
  299. {
  300. #endif
  301. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  302. {
  303. const char* ptr = get_default_syntax(i);
  304. while(ptr && *ptr)
  305. {
  306. m_char_map[this->m_pctype->widen(*ptr)] = i;
  307. ++ptr;
  308. }
  309. }
  310. #ifndef BOOST_NO_STD_MESSAGES
  311. }
  312. #endif
  313. }
  314. template <class charT>
  315. typename cpp_regex_traits_char_layer<charT>::string_type
  316. cpp_regex_traits_char_layer<charT>::get_default_message(regex_constants::syntax_type i)
  317. {
  318. const char* ptr = get_default_syntax(i);
  319. string_type result;
  320. while(ptr && *ptr)
  321. {
  322. result.append(1, this->m_pctype->widen(*ptr));
  323. ++ptr;
  324. }
  325. return result;
  326. }
  327. //
  328. // specialised version for narrow characters:
  329. //
  330. template <>
  331. class BOOST_REGEX_DECL cpp_regex_traits_char_layer<char> : public cpp_regex_traits_base<char>
  332. {
  333. typedef std::string string_type;
  334. public:
  335. cpp_regex_traits_char_layer(const std::locale& l)
  336. : cpp_regex_traits_base<char>(l)
  337. {
  338. init();
  339. }
  340. cpp_regex_traits_char_layer(const cpp_regex_traits_base<char>& l)
  341. : cpp_regex_traits_base<char>(l)
  342. {
  343. init();
  344. }
  345. regex_constants::syntax_type syntax_type(char c)const
  346. {
  347. return m_char_map[static_cast<unsigned char>(c)];
  348. }
  349. regex_constants::escape_syntax_type escape_syntax_type(char c) const
  350. {
  351. return m_char_map[static_cast<unsigned char>(c)];
  352. }
  353. private:
  354. regex_constants::syntax_type m_char_map[1u << CHAR_BIT];
  355. void init();
  356. };
  357. #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
  358. enum
  359. {
  360. char_class_space=1<<0,
  361. char_class_print=1<<1,
  362. char_class_cntrl=1<<2,
  363. char_class_upper=1<<3,
  364. char_class_lower=1<<4,
  365. char_class_alpha=1<<5,
  366. char_class_digit=1<<6,
  367. char_class_punct=1<<7,
  368. char_class_xdigit=1<<8,
  369. char_class_alnum=char_class_alpha|char_class_digit,
  370. char_class_graph=char_class_alnum|char_class_punct,
  371. char_class_blank=1<<9,
  372. char_class_word=1<<10,
  373. char_class_unicode=1<<11,
  374. char_class_horizontal_space=1<<12,
  375. char_class_vertical_space=1<<13
  376. };
  377. #endif
  378. //
  379. // class cpp_regex_traits_implementation:
  380. // provides pimpl implementation for cpp_regex_traits.
  381. //
  382. template <class charT>
  383. class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT>
  384. {
  385. public:
  386. typedef typename cpp_regex_traits<charT>::char_class_type char_class_type;
  387. typedef typename std::ctype<charT>::mask native_mask_type;
  388. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  389. BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 24);
  390. BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 25);
  391. BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 26);
  392. BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 1u << 27);
  393. BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 1u << 28);
  394. #endif
  395. typedef std::basic_string<charT> string_type;
  396. typedef charT char_type;
  397. //cpp_regex_traits_implementation();
  398. cpp_regex_traits_implementation(const std::locale& l)
  399. : cpp_regex_traits_char_layer<charT>(l)
  400. {
  401. init();
  402. }
  403. cpp_regex_traits_implementation(const cpp_regex_traits_base<charT>& l)
  404. : cpp_regex_traits_char_layer<charT>(l)
  405. {
  406. init();
  407. }
  408. std::string error_string(regex_constants::error_type n) const
  409. {
  410. if(!m_error_strings.empty())
  411. {
  412. std::map<int, std::string>::const_iterator p = m_error_strings.find(n);
  413. return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second;
  414. }
  415. return get_default_error_string(n);
  416. }
  417. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  418. {
  419. char_class_type result = lookup_classname_imp(p1, p2);
  420. if(result == 0)
  421. {
  422. string_type temp(p1, p2);
  423. this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size());
  424. result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size());
  425. }
  426. return result;
  427. }
  428. string_type lookup_collatename(const charT* p1, const charT* p2) const;
  429. string_type transform_primary(const charT* p1, const charT* p2) const;
  430. string_type transform(const charT* p1, const charT* p2) const;
  431. private:
  432. std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID
  433. std::map<string_type, char_class_type> m_custom_class_names; // character class names
  434. std::map<string_type, string_type> m_custom_collate_names; // collating element names
  435. unsigned m_collate_type; // the form of the collation string
  436. charT m_collate_delim; // the collation group delimiter
  437. //
  438. // helpers:
  439. //
  440. char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
  441. void init();
  442. #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
  443. public:
  444. bool isctype(charT c, char_class_type m)const;
  445. #endif
  446. };
  447. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  448. #if !defined(BOOST_NO_INCLASS_MEMBER_INITIALIZATION)
  449. template <class charT>
  450. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_blank;
  451. template <class charT>
  452. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_word;
  453. template <class charT>
  454. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_unicode;
  455. template <class charT>
  456. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_vertical;
  457. template <class charT>
  458. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_horizontal;
  459. #endif
  460. #endif
  461. template <class charT>
  462. typename cpp_regex_traits_implementation<charT>::string_type
  463. cpp_regex_traits_implementation<charT>::transform_primary(const charT* p1, const charT* p2) const
  464. {
  465. //
  466. // PRECONDITIONS:
  467. //
  468. // A bug in gcc 3.2 (and maybe other versions as well) treats
  469. // p1 as a null terminated string, for efficiency reasons
  470. // we work around this elsewhere, but just assert here that
  471. // we adhere to gcc's (buggy) preconditions...
  472. //
  473. BOOST_ASSERT(*p2 == 0);
  474. string_type result;
  475. //
  476. // swallowing all exceptions here is a bad idea
  477. // however at least one std lib will always throw
  478. // std::bad_alloc for certain arguments...
  479. //
  480. #ifndef BOOST_NO_EXCEPTIONS
  481. try{
  482. #endif
  483. //
  484. // What we do here depends upon the format of the sort key returned by
  485. // sort key returned by this->transform:
  486. //
  487. switch(m_collate_type)
  488. {
  489. case sort_C:
  490. case sort_unknown:
  491. // the best we can do is translate to lower case, then get a regular sort key:
  492. {
  493. result.assign(p1, p2);
  494. this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size());
  495. result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size());
  496. break;
  497. }
  498. case sort_fixed:
  499. {
  500. // get a regular sort key, and then truncate it:
  501. result.assign(this->m_pcollate->transform(p1, p2));
  502. result.erase(this->m_collate_delim);
  503. break;
  504. }
  505. case sort_delim:
  506. // get a regular sort key, and then truncate everything after the delim:
  507. result.assign(this->m_pcollate->transform(p1, p2));
  508. std::size_t i;
  509. for(i = 0; i < result.size(); ++i)
  510. {
  511. if(result[i] == m_collate_delim)
  512. break;
  513. }
  514. result.erase(i);
  515. break;
  516. }
  517. #ifndef BOOST_NO_EXCEPTIONS
  518. }catch(...){}
  519. #endif
  520. while(result.size() && (charT(0) == *result.rbegin()))
  521. result.erase(result.size() - 1);
  522. if(result.empty())
  523. {
  524. // character is ignorable at the primary level:
  525. result = string_type(1, charT(0));
  526. }
  527. return result;
  528. }
  529. template <class charT>
  530. typename cpp_regex_traits_implementation<charT>::string_type
  531. cpp_regex_traits_implementation<charT>::transform(const charT* p1, const charT* p2) const
  532. {
  533. //
  534. // PRECONDITIONS:
  535. //
  536. // A bug in gcc 3.2 (and maybe other versions as well) treats
  537. // p1 as a null terminated string, for efficiency reasons
  538. // we work around this elsewhere, but just assert here that
  539. // we adhere to gcc's (buggy) preconditions...
  540. //
  541. BOOST_ASSERT(*p2 == 0);
  542. //
  543. // swallowing all exceptions here is a bad idea
  544. // however at least one std lib will always throw
  545. // std::bad_alloc for certain arguments...
  546. //
  547. string_type result;
  548. #ifndef BOOST_NO_EXCEPTIONS
  549. try{
  550. #endif
  551. result = this->m_pcollate->transform(p1, p2);
  552. //
  553. // Borland's STLPort version returns a NULL-terminated
  554. // string that has garbage at the end - each call to
  555. // std::collate<wchar_t>::transform returns a different string!
  556. // So as a workaround, we'll truncate the string at the first NULL
  557. // which _seems_ to work....
  558. #if BOOST_WORKAROUND(__BORLANDC__, < 0x580)
  559. result.erase(result.find(charT(0)));
  560. #else
  561. //
  562. // some implementations (Dinkumware) append unnecessary trailing \0's:
  563. while(result.size() && (charT(0) == *result.rbegin()))
  564. result.erase(result.size() - 1);
  565. #endif
  566. BOOST_ASSERT(std::find(result.begin(), result.end(), charT(0)) == result.end());
  567. #ifndef BOOST_NO_EXCEPTIONS
  568. }
  569. catch(...)
  570. {
  571. }
  572. #endif
  573. return result;
  574. }
  575. template <class charT>
  576. typename cpp_regex_traits_implementation<charT>::string_type
  577. cpp_regex_traits_implementation<charT>::lookup_collatename(const charT* p1, const charT* p2) const
  578. {
  579. typedef typename std::map<string_type, string_type>::const_iterator iter_type;
  580. if(m_custom_collate_names.size())
  581. {
  582. iter_type pos = m_custom_collate_names.find(string_type(p1, p2));
  583. if(pos != m_custom_collate_names.end())
  584. return pos->second;
  585. }
  586. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
  587. && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\
  588. && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
  589. std::string name(p1, p2);
  590. #else
  591. std::string name;
  592. const charT* p0 = p1;
  593. while(p0 != p2)
  594. name.append(1, char(*p0++));
  595. #endif
  596. name = lookup_default_collate_name(name);
  597. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
  598. && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\
  599. && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
  600. if(name.size())
  601. return string_type(name.begin(), name.end());
  602. #else
  603. if(name.size())
  604. {
  605. string_type result;
  606. typedef std::string::const_iterator iter;
  607. iter b = name.begin();
  608. iter e = name.end();
  609. while(b != e)
  610. result.append(1, charT(*b++));
  611. return result;
  612. }
  613. #endif
  614. if(p2 - p1 == 1)
  615. return string_type(1, *p1);
  616. return string_type();
  617. }
  618. template <class charT>
  619. void cpp_regex_traits_implementation<charT>::init()
  620. {
  621. #ifndef BOOST_NO_STD_MESSAGES
  622. #ifndef __IBMCPP__
  623. typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  624. #else
  625. typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  626. #endif
  627. std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
  628. if(cat_name.size() && (this->m_pmessages != 0))
  629. {
  630. cat = this->m_pmessages->open(
  631. cat_name,
  632. this->m_locale);
  633. if((int)cat < 0)
  634. {
  635. std::string m("Unable to open message catalog: ");
  636. std::runtime_error err(m + cat_name);
  637. boost::re_detail::raise_runtime_error(err);
  638. }
  639. }
  640. //
  641. // if we have a valid catalog then load our messages:
  642. //
  643. if((int)cat >= 0)
  644. {
  645. //
  646. // Error messages:
  647. //
  648. for(boost::regex_constants::error_type i = static_cast<boost::regex_constants::error_type>(0);
  649. i <= boost::regex_constants::error_unknown;
  650. i = static_cast<boost::regex_constants::error_type>(i + 1))
  651. {
  652. const char* p = get_default_error_string(i);
  653. string_type default_message;
  654. while(*p)
  655. {
  656. default_message.append(1, this->m_pctype->widen(*p));
  657. ++p;
  658. }
  659. string_type s = this->m_pmessages->get(cat, 0, i+200, default_message);
  660. std::string result;
  661. for(std::string::size_type j = 0; j < s.size(); ++j)
  662. {
  663. result.append(1, this->m_pctype->narrow(s[j], 0));
  664. }
  665. m_error_strings[i] = result;
  666. }
  667. //
  668. // Custom class names:
  669. //
  670. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  671. static const char_class_type masks[16] =
  672. {
  673. std::ctype<charT>::alnum,
  674. std::ctype<charT>::alpha,
  675. std::ctype<charT>::cntrl,
  676. std::ctype<charT>::digit,
  677. std::ctype<charT>::graph,
  678. cpp_regex_traits_implementation<charT>::mask_horizontal,
  679. std::ctype<charT>::lower,
  680. std::ctype<charT>::print,
  681. std::ctype<charT>::punct,
  682. std::ctype<charT>::space,
  683. std::ctype<charT>::upper,
  684. cpp_regex_traits_implementation<charT>::mask_vertical,
  685. std::ctype<charT>::xdigit,
  686. cpp_regex_traits_implementation<charT>::mask_blank,
  687. cpp_regex_traits_implementation<charT>::mask_word,
  688. cpp_regex_traits_implementation<charT>::mask_unicode,
  689. };
  690. #else
  691. static const char_class_type masks[16] =
  692. {
  693. ::boost::re_detail::char_class_alnum,
  694. ::boost::re_detail::char_class_alpha,
  695. ::boost::re_detail::char_class_cntrl,
  696. ::boost::re_detail::char_class_digit,
  697. ::boost::re_detail::char_class_graph,
  698. ::boost::re_detail::char_class_horizontal_space,
  699. ::boost::re_detail::char_class_lower,
  700. ::boost::re_detail::char_class_print,
  701. ::boost::re_detail::char_class_punct,
  702. ::boost::re_detail::char_class_space,
  703. ::boost::re_detail::char_class_upper,
  704. ::boost::re_detail::char_class_vertical_space,
  705. ::boost::re_detail::char_class_xdigit,
  706. ::boost::re_detail::char_class_blank,
  707. ::boost::re_detail::char_class_word,
  708. ::boost::re_detail::char_class_unicode,
  709. };
  710. #endif
  711. static const string_type null_string;
  712. for(unsigned int j = 0; j <= 13; ++j)
  713. {
  714. string_type s(this->m_pmessages->get(cat, 0, j+300, null_string));
  715. if(s.size())
  716. this->m_custom_class_names[s] = masks[j];
  717. }
  718. }
  719. #endif
  720. //
  721. // get the collation format used by m_pcollate:
  722. //
  723. m_collate_type = re_detail::find_sort_syntax(this, &m_collate_delim);
  724. }
  725. template <class charT>
  726. typename cpp_regex_traits_implementation<charT>::char_class_type
  727. cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
  728. {
  729. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  730. static const char_class_type masks[22] =
  731. {
  732. 0,
  733. std::ctype<char>::alnum,
  734. std::ctype<char>::alpha,
  735. cpp_regex_traits_implementation<charT>::mask_blank,
  736. std::ctype<char>::cntrl,
  737. std::ctype<char>::digit,
  738. std::ctype<char>::digit,
  739. std::ctype<char>::graph,
  740. cpp_regex_traits_implementation<charT>::mask_horizontal,
  741. std::ctype<char>::lower,
  742. std::ctype<char>::lower,
  743. std::ctype<char>::print,
  744. std::ctype<char>::punct,
  745. std::ctype<char>::space,
  746. std::ctype<char>::space,
  747. std::ctype<char>::upper,
  748. cpp_regex_traits_implementation<charT>::mask_unicode,
  749. std::ctype<char>::upper,
  750. cpp_regex_traits_implementation<charT>::mask_vertical,
  751. std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
  752. std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
  753. std::ctype<char>::xdigit,
  754. };
  755. #else
  756. static const char_class_type masks[22] =
  757. {
  758. 0,
  759. ::boost::re_detail::char_class_alnum,
  760. ::boost::re_detail::char_class_alpha,
  761. ::boost::re_detail::char_class_blank,
  762. ::boost::re_detail::char_class_cntrl,
  763. ::boost::re_detail::char_class_digit,
  764. ::boost::re_detail::char_class_digit,
  765. ::boost::re_detail::char_class_graph,
  766. ::boost::re_detail::char_class_horizontal_space,
  767. ::boost::re_detail::char_class_lower,
  768. ::boost::re_detail::char_class_lower,
  769. ::boost::re_detail::char_class_print,
  770. ::boost::re_detail::char_class_punct,
  771. ::boost::re_detail::char_class_space,
  772. ::boost::re_detail::char_class_space,
  773. ::boost::re_detail::char_class_upper,
  774. ::boost::re_detail::char_class_unicode,
  775. ::boost::re_detail::char_class_upper,
  776. ::boost::re_detail::char_class_vertical_space,
  777. ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word,
  778. ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word,
  779. ::boost::re_detail::char_class_xdigit,
  780. };
  781. #endif
  782. if(m_custom_class_names.size())
  783. {
  784. typedef typename std::map<std::basic_string<charT>, char_class_type>::const_iterator map_iter;
  785. map_iter pos = m_custom_class_names.find(string_type(p1, p2));
  786. if(pos != m_custom_class_names.end())
  787. return pos->second;
  788. }
  789. std::size_t state_id = 1 + re_detail::get_default_class_id(p1, p2);
  790. BOOST_ASSERT(state_id < sizeof(masks) / sizeof(masks[0]));
  791. return masks[state_id];
  792. }
  793. #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
  794. template <class charT>
  795. bool cpp_regex_traits_implementation<charT>::isctype(const charT c, char_class_type mask) const
  796. {
  797. return
  798. ((mask & ::boost::re_detail::char_class_space) && (this->m_pctype->is(std::ctype<charT>::space, c)))
  799. || ((mask & ::boost::re_detail::char_class_print) && (this->m_pctype->is(std::ctype<charT>::print, c)))
  800. || ((mask & ::boost::re_detail::char_class_cntrl) && (this->m_pctype->is(std::ctype<charT>::cntrl, c)))
  801. || ((mask & ::boost::re_detail::char_class_upper) && (this->m_pctype->is(std::ctype<charT>::upper, c)))
  802. || ((mask & ::boost::re_detail::char_class_lower) && (this->m_pctype->is(std::ctype<charT>::lower, c)))
  803. || ((mask & ::boost::re_detail::char_class_alpha) && (this->m_pctype->is(std::ctype<charT>::alpha, c)))
  804. || ((mask & ::boost::re_detail::char_class_digit) && (this->m_pctype->is(std::ctype<charT>::digit, c)))
  805. || ((mask & ::boost::re_detail::char_class_punct) && (this->m_pctype->is(std::ctype<charT>::punct, c)))
  806. || ((mask & ::boost::re_detail::char_class_xdigit) && (this->m_pctype->is(std::ctype<charT>::xdigit, c)))
  807. || ((mask & ::boost::re_detail::char_class_blank) && (this->m_pctype->is(std::ctype<charT>::space, c)) && !::boost::re_detail::is_separator(c))
  808. || ((mask & ::boost::re_detail::char_class_word) && (c == '_'))
  809. || ((mask & ::boost::re_detail::char_class_unicode) && ::boost::re_detail::is_extended(c))
  810. || ((mask & ::boost::re_detail::char_class_vertical_space) && (is_separator(c) || (c == '\v')))
  811. || ((mask & ::boost::re_detail::char_class_horizontal_space) && this->m_pctype->is(std::ctype<charT>::space, c) && !(is_separator(c) || (c == '\v')));
  812. }
  813. #endif
  814. template <class charT>
  815. inline boost::shared_ptr<const cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT))
  816. {
  817. cpp_regex_traits_base<charT> key(l);
  818. return ::boost::object_cache<cpp_regex_traits_base<charT>, cpp_regex_traits_implementation<charT> >::get(key, 5);
  819. }
  820. } // re_detail
  821. template <class charT>
  822. class cpp_regex_traits
  823. {
  824. private:
  825. typedef std::ctype<charT> ctype_type;
  826. public:
  827. typedef charT char_type;
  828. typedef std::size_t size_type;
  829. typedef std::basic_string<char_type> string_type;
  830. typedef std::locale locale_type;
  831. typedef boost::uint_least32_t char_class_type;
  832. struct boost_extensions_tag{};
  833. cpp_regex_traits()
  834. : m_pimpl(re_detail::create_cpp_regex_traits<charT>(std::locale()))
  835. { }
  836. static size_type length(const char_type* p)
  837. {
  838. return std::char_traits<charT>::length(p);
  839. }
  840. regex_constants::syntax_type syntax_type(charT c)const
  841. {
  842. return m_pimpl->syntax_type(c);
  843. }
  844. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  845. {
  846. return m_pimpl->escape_syntax_type(c);
  847. }
  848. charT translate(charT c) const
  849. {
  850. return c;
  851. }
  852. charT translate_nocase(charT c) const
  853. {
  854. return m_pimpl->m_pctype->tolower(c);
  855. }
  856. charT translate(charT c, bool icase) const
  857. {
  858. return icase ? m_pimpl->m_pctype->tolower(c) : c;
  859. }
  860. charT tolower(charT c) const
  861. {
  862. return m_pimpl->m_pctype->tolower(c);
  863. }
  864. charT toupper(charT c) const
  865. {
  866. return m_pimpl->m_pctype->toupper(c);
  867. }
  868. string_type transform(const charT* p1, const charT* p2) const
  869. {
  870. return m_pimpl->transform(p1, p2);
  871. }
  872. string_type transform_primary(const charT* p1, const charT* p2) const
  873. {
  874. return m_pimpl->transform_primary(p1, p2);
  875. }
  876. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  877. {
  878. return m_pimpl->lookup_classname(p1, p2);
  879. }
  880. string_type lookup_collatename(const charT* p1, const charT* p2) const
  881. {
  882. return m_pimpl->lookup_collatename(p1, p2);
  883. }
  884. bool isctype(charT c, char_class_type f) const
  885. {
  886. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  887. typedef typename std::ctype<charT>::mask ctype_mask;
  888. static const ctype_mask mask_base =
  889. static_cast<ctype_mask>(
  890. std::ctype<charT>::alnum
  891. | std::ctype<charT>::alpha
  892. | std::ctype<charT>::cntrl
  893. | std::ctype<charT>::digit
  894. | std::ctype<charT>::graph
  895. | std::ctype<charT>::lower
  896. | std::ctype<charT>::print
  897. | std::ctype<charT>::punct
  898. | std::ctype<charT>::space
  899. | std::ctype<charT>::upper
  900. | std::ctype<charT>::xdigit);
  901. if((f & mask_base)
  902. && (m_pimpl->m_pctype->is(
  903. static_cast<ctype_mask>(f & mask_base), c)))
  904. return true;
  905. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_unicode) && re_detail::is_extended(c))
  906. return true;
  907. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_word) && (c == '_'))
  908. return true;
  909. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_blank)
  910. && m_pimpl->m_pctype->is(std::ctype<charT>::space, c)
  911. && !re_detail::is_separator(c))
  912. return true;
  913. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_vertical)
  914. && (::boost::re_detail::is_separator(c) || (c == '\v')))
  915. return true;
  916. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_horizontal)
  917. && this->isctype(c, std::ctype<charT>::space) && !this->isctype(c, re_detail::cpp_regex_traits_implementation<charT>::mask_vertical))
  918. return true;
  919. return false;
  920. #else
  921. return m_pimpl->isctype(c, f);
  922. #endif
  923. }
  924. int toi(const charT*& p1, const charT* p2, int radix)const;
  925. int value(charT c, int radix)const
  926. {
  927. const charT* pc = &c;
  928. return toi(pc, pc + 1, radix);
  929. }
  930. locale_type imbue(locale_type l)
  931. {
  932. std::locale result(getloc());
  933. m_pimpl = re_detail::create_cpp_regex_traits<charT>(l);
  934. return result;
  935. }
  936. locale_type getloc()const
  937. {
  938. return m_pimpl->m_locale;
  939. }
  940. std::string error_string(regex_constants::error_type n) const
  941. {
  942. return m_pimpl->error_string(n);
  943. }
  944. //
  945. // extension:
  946. // set the name of the message catalog in use (defaults to "boost_regex").
  947. //
  948. static std::string catalog_name(const std::string& name);
  949. static std::string get_catalog_name();
  950. private:
  951. boost::shared_ptr<const re_detail::cpp_regex_traits_implementation<charT> > m_pimpl;
  952. //
  953. // catalog name handler:
  954. //
  955. static std::string& get_catalog_name_inst();
  956. #ifdef BOOST_HAS_THREADS
  957. static static_mutex& get_mutex_inst();
  958. #endif
  959. };
  960. template <class charT>
  961. int cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const
  962. {
  963. re_detail::parser_buf<charT> sbuf; // buffer for parsing numbers.
  964. std::basic_istream<charT> is(&sbuf); // stream for parsing numbers.
  965. // we do NOT want to parse any thousands separators inside the stream:
  966. last = std::find(first, last, BOOST_USE_FACET(std::numpunct<charT>, is.getloc()).thousands_sep());
  967. sbuf.pubsetbuf(const_cast<charT*>(static_cast<const charT*>(first)), static_cast<std::streamsize>(last-first));
  968. is.clear();
  969. if(std::abs(radix) == 16) is >> std::hex;
  970. else if(std::abs(radix) == 8) is >> std::oct;
  971. else is >> std::dec;
  972. int val;
  973. if(is >> val)
  974. {
  975. first = first + ((last - first) - sbuf.in_avail());
  976. return val;
  977. }
  978. else
  979. return -1;
  980. }
  981. template <class charT>
  982. std::string cpp_regex_traits<charT>::catalog_name(const std::string& name)
  983. {
  984. #ifdef BOOST_HAS_THREADS
  985. static_mutex::scoped_lock lk(get_mutex_inst());
  986. #endif
  987. std::string result(get_catalog_name_inst());
  988. get_catalog_name_inst() = name;
  989. return result;
  990. }
  991. template <class charT>
  992. std::string& cpp_regex_traits<charT>::get_catalog_name_inst()
  993. {
  994. static std::string s_name;
  995. return s_name;
  996. }
  997. template <class charT>
  998. std::string cpp_regex_traits<charT>::get_catalog_name()
  999. {
  1000. #ifdef BOOST_HAS_THREADS
  1001. static_mutex::scoped_lock lk(get_mutex_inst());
  1002. #endif
  1003. std::string result(get_catalog_name_inst());
  1004. return result;
  1005. }
  1006. #ifdef BOOST_HAS_THREADS
  1007. template <class charT>
  1008. static_mutex& cpp_regex_traits<charT>::get_mutex_inst()
  1009. {
  1010. static static_mutex s_mutex = BOOST_STATIC_MUTEX_INIT;
  1011. return s_mutex;
  1012. }
  1013. #endif
  1014. } // boost
  1015. #ifdef BOOST_MSVC
  1016. #pragma warning(pop)
  1017. #endif
  1018. #ifdef BOOST_MSVC
  1019. #pragma warning(push)
  1020. #pragma warning(disable: 4103)
  1021. #endif
  1022. #ifdef BOOST_HAS_ABI_HEADERS
  1023. # include BOOST_ABI_SUFFIX
  1024. #endif
  1025. #ifdef BOOST_MSVC
  1026. #pragma warning(pop)
  1027. #endif
  1028. #endif
  1029. #endif