perl_matcher_common.hpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * common to both the recursive and non-recursive versions.
  17. */
  18. #ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  19. #define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable: 4103)
  23. #endif
  24. #ifdef BOOST_HAS_ABI_HEADERS
  25. # include BOOST_ABI_PREFIX
  26. #endif
  27. #ifdef BOOST_MSVC
  28. #pragma warning(pop)
  29. #endif
  30. #ifdef __BORLANDC__
  31. # pragma option push -w-8008 -w-8066
  32. #endif
  33. #ifdef BOOST_MSVC
  34. # pragma warning(push)
  35. # pragma warning(disable: 4800)
  36. #endif
  37. namespace boost{
  38. namespace re_detail{
  39. template <class BidiIterator, class Allocator, class traits>
  40. void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
  41. {
  42. typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
  43. typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
  44. if(e.empty())
  45. {
  46. // precondition failure: e is not a valid regex.
  47. std::invalid_argument ex("Invalid regular expression object");
  48. boost::throw_exception(ex);
  49. }
  50. pstate = 0;
  51. m_match_flags = f;
  52. estimate_max_state_count(static_cast<category*>(0));
  53. expression_flag_type re_f = re.flags();
  54. icase = re_f & regex_constants::icase;
  55. if(!(m_match_flags & (match_perl|match_posix)))
  56. {
  57. if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
  58. m_match_flags |= match_perl;
  59. else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  60. m_match_flags |= match_perl;
  61. else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal))
  62. m_match_flags |= match_perl;
  63. else
  64. m_match_flags |= match_posix;
  65. }
  66. if(m_match_flags & match_posix)
  67. {
  68. m_temp_match.reset(new match_results<BidiIterator, Allocator>());
  69. m_presult = m_temp_match.get();
  70. }
  71. else
  72. m_presult = &m_result;
  73. #ifdef BOOST_REGEX_NON_RECURSIVE
  74. m_stack_base = 0;
  75. m_backup_state = 0;
  76. #endif
  77. // find the value to use for matching word boundaries:
  78. m_word_mask = re.get_data().m_word_mask;
  79. // find bitmask to use for matching '.':
  80. match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline);
  81. }
  82. template <class BidiIterator, class Allocator, class traits>
  83. void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
  84. {
  85. //
  86. // How many states should we allow our machine to visit before giving up?
  87. // This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
  88. // where N is the length of the string, and S is the number of states
  89. // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
  90. // but these take unreasonably amounts of time to bale out in pathological
  91. // cases.
  92. //
  93. // Calculate NS^2 first:
  94. //
  95. static const std::ptrdiff_t k = 100000;
  96. std::ptrdiff_t dist = boost::re_detail::distance(base, last);
  97. if(dist == 0)
  98. dist = 1;
  99. std::ptrdiff_t states = re.size();
  100. if(states == 0)
  101. states = 1;
  102. states *= states;
  103. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  104. {
  105. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  106. return;
  107. }
  108. states *= dist;
  109. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  110. {
  111. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  112. return;
  113. }
  114. states += k;
  115. max_state_count = states;
  116. //
  117. // Now calculate N^2:
  118. //
  119. states = dist;
  120. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  121. {
  122. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  123. return;
  124. }
  125. states *= dist;
  126. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  127. {
  128. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  129. return;
  130. }
  131. states += k;
  132. //
  133. // N^2 can be a very large number indeed, to prevent things getting out
  134. // of control, cap the max states:
  135. //
  136. if(states > BOOST_REGEX_MAX_STATE_COUNT)
  137. states = BOOST_REGEX_MAX_STATE_COUNT;
  138. //
  139. // If (the possibly capped) N^2 is larger than our first estimate,
  140. // use this instead:
  141. //
  142. if(states > max_state_count)
  143. max_state_count = states;
  144. }
  145. template <class BidiIterator, class Allocator, class traits>
  146. inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
  147. {
  148. // we don't know how long the sequence is:
  149. max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
  150. }
  151. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  152. template <class BidiIterator, class Allocator, class traits>
  153. inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
  154. protected_proc_type proc)
  155. {
  156. ::boost::re_detail::concrete_protected_call
  157. <perl_matcher<BidiIterator, Allocator, traits> >
  158. obj(this, proc);
  159. return obj.execute();
  160. }
  161. #endif
  162. template <class BidiIterator, class Allocator, class traits>
  163. inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
  164. {
  165. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  166. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp);
  167. #else
  168. return match_imp();
  169. #endif
  170. }
  171. template <class BidiIterator, class Allocator, class traits>
  172. bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
  173. {
  174. // initialise our stack if we are non-recursive:
  175. #ifdef BOOST_REGEX_NON_RECURSIVE
  176. save_state_init init(&m_stack_base, &m_backup_state);
  177. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  178. #if !defined(BOOST_NO_EXCEPTIONS)
  179. try{
  180. #endif
  181. #endif
  182. // reset our state machine:
  183. position = base;
  184. search_base = base;
  185. state_count = 0;
  186. m_match_flags |= regex_constants::match_all;
  187. m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);
  188. m_presult->set_base(base);
  189. m_presult->set_named_subs(this->re.get_named_subs());
  190. if(m_match_flags & match_posix)
  191. m_result = *m_presult;
  192. verify_options(re.flags(), m_match_flags);
  193. if(0 == match_prefix())
  194. return false;
  195. return (m_result[0].second == last) && (m_result[0].first == base);
  196. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  197. }
  198. catch(...)
  199. {
  200. // unwind all pushed states, apart from anything else this
  201. // ensures that all the states are correctly destructed
  202. // not just the memory freed.
  203. while(unwind(true)){}
  204. throw;
  205. }
  206. #endif
  207. }
  208. template <class BidiIterator, class Allocator, class traits>
  209. inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
  210. {
  211. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  212. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp);
  213. #else
  214. return find_imp();
  215. #endif
  216. }
  217. template <class BidiIterator, class Allocator, class traits>
  218. bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
  219. {
  220. static matcher_proc_type const s_find_vtable[7] =
  221. {
  222. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any,
  223. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word,
  224. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line,
  225. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf,
  226. &perl_matcher<BidiIterator, Allocator, traits>::match_prefix,
  227. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  228. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  229. };
  230. // initialise our stack if we are non-recursive:
  231. #ifdef BOOST_REGEX_NON_RECURSIVE
  232. save_state_init init(&m_stack_base, &m_backup_state);
  233. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  234. #if !defined(BOOST_NO_EXCEPTIONS)
  235. try{
  236. #endif
  237. #endif
  238. state_count = 0;
  239. if((m_match_flags & regex_constants::match_init) == 0)
  240. {
  241. // reset our state machine:
  242. search_base = position = base;
  243. pstate = re.get_first_state();
  244. m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last);
  245. m_presult->set_base(base);
  246. m_presult->set_named_subs(this->re.get_named_subs());
  247. m_match_flags |= regex_constants::match_init;
  248. }
  249. else
  250. {
  251. // start again:
  252. search_base = position = m_result[0].second;
  253. // If last match was null and match_not_null was not set then increment
  254. // our start position, otherwise we go into an infinite loop:
  255. if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
  256. {
  257. if(position == last)
  258. return false;
  259. else
  260. ++position;
  261. }
  262. // reset $` start:
  263. m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);
  264. //if((base != search_base) && (base == backstop))
  265. // m_match_flags |= match_prev_avail;
  266. }
  267. if(m_match_flags & match_posix)
  268. {
  269. m_result.set_size(re.mark_count(), base, last);
  270. m_result.set_base(base);
  271. }
  272. verify_options(re.flags(), m_match_flags);
  273. // find out what kind of expression we have:
  274. unsigned type = (m_match_flags & match_continuous) ?
  275. static_cast<unsigned int>(regbase::restart_continue)
  276. : static_cast<unsigned int>(re.get_restart_type());
  277. // call the appropriate search routine:
  278. matcher_proc_type proc = s_find_vtable[type];
  279. return (this->*proc)();
  280. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  281. }
  282. catch(...)
  283. {
  284. // unwind all pushed states, apart from anything else this
  285. // ensures that all the states are correctly destructed
  286. // not just the memory freed.
  287. while(unwind(true)){}
  288. throw;
  289. }
  290. #endif
  291. }
  292. template <class BidiIterator, class Allocator, class traits>
  293. bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
  294. {
  295. m_has_partial_match = false;
  296. m_has_found_match = false;
  297. pstate = re.get_first_state();
  298. m_presult->set_first(position);
  299. restart = position;
  300. match_all_states();
  301. if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial))
  302. {
  303. m_has_found_match = true;
  304. m_presult->set_second(last, 0, false);
  305. position = last;
  306. if((m_match_flags & match_posix) == match_posix)
  307. {
  308. m_result.maybe_assign(*m_presult);
  309. }
  310. }
  311. #ifdef BOOST_REGEX_MATCH_EXTRA
  312. if(m_has_found_match && (match_extra & m_match_flags))
  313. {
  314. //
  315. // we have a match, reverse the capture information:
  316. //
  317. for(unsigned i = 0; i < m_presult->size(); ++i)
  318. {
  319. typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
  320. std::reverse(seq.begin(), seq.end());
  321. }
  322. }
  323. #endif
  324. if(!m_has_found_match)
  325. position = restart; // reset search postion
  326. return m_has_found_match;
  327. }
  328. template <class BidiIterator, class Allocator, class traits>
  329. bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
  330. {
  331. unsigned int len = static_cast<const re_literal*>(pstate)->length;
  332. const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
  333. //
  334. // compare string with what we stored in
  335. // our records:
  336. for(unsigned int i = 0; i < len; ++i, ++position)
  337. {
  338. if((position == last) || (traits_inst.translate(*position, icase) != what[i]))
  339. return false;
  340. }
  341. pstate = pstate->next.p;
  342. return true;
  343. }
  344. template <class BidiIterator, class Allocator, class traits>
  345. bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line()
  346. {
  347. if(position == backstop)
  348. {
  349. if((m_match_flags & match_prev_avail) == 0)
  350. {
  351. if((m_match_flags & match_not_bol) == 0)
  352. {
  353. pstate = pstate->next.p;
  354. return true;
  355. }
  356. return false;
  357. }
  358. }
  359. else if(m_match_flags & match_single_line)
  360. return false;
  361. // check the previous value character:
  362. BidiIterator t(position);
  363. --t;
  364. if(position != last)
  365. {
  366. if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) )
  367. {
  368. pstate = pstate->next.p;
  369. return true;
  370. }
  371. }
  372. else if(is_separator(*t))
  373. {
  374. pstate = pstate->next.p;
  375. return true;
  376. }
  377. return false;
  378. }
  379. template <class BidiIterator, class Allocator, class traits>
  380. bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line()
  381. {
  382. if(position != last)
  383. {
  384. if(m_match_flags & match_single_line)
  385. return false;
  386. // we're not yet at the end so *first is always valid:
  387. if(is_separator(*position))
  388. {
  389. if((position != backstop) || (m_match_flags & match_prev_avail))
  390. {
  391. // check that we're not in the middle of \r\n sequence
  392. BidiIterator t(position);
  393. --t;
  394. if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n')))
  395. {
  396. return false;
  397. }
  398. }
  399. pstate = pstate->next.p;
  400. return true;
  401. }
  402. }
  403. else if((m_match_flags & match_not_eol) == 0)
  404. {
  405. pstate = pstate->next.p;
  406. return true;
  407. }
  408. return false;
  409. }
  410. template <class BidiIterator, class Allocator, class traits>
  411. bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
  412. {
  413. if(position == last)
  414. return false;
  415. if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0))
  416. return false;
  417. if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
  418. return false;
  419. pstate = pstate->next.p;
  420. ++position;
  421. return true;
  422. }
  423. template <class BidiIterator, class Allocator, class traits>
  424. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
  425. {
  426. bool b; // indcates whether next character is a word character
  427. if(position != last)
  428. {
  429. // prev and this character must be opposites:
  430. #if defined(BOOST_REGEX_USE_C_LOCALE) && defined(__GNUC__) && (__GNUC__ == 2) && (__GNUC_MINOR__ < 95)
  431. b = traits::isctype(*position, m_word_mask);
  432. #else
  433. b = traits_inst.isctype(*position, m_word_mask);
  434. #endif
  435. }
  436. else
  437. {
  438. b = (m_match_flags & match_not_eow) ? true : false;
  439. }
  440. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  441. {
  442. if(m_match_flags & match_not_bow)
  443. b ^= true;
  444. else
  445. b ^= false;
  446. }
  447. else
  448. {
  449. --position;
  450. b ^= traits_inst.isctype(*position, m_word_mask);
  451. ++position;
  452. }
  453. if(b)
  454. {
  455. pstate = pstate->next.p;
  456. return true;
  457. }
  458. return false; // no match if we get to here...
  459. }
  460. template <class BidiIterator, class Allocator, class traits>
  461. bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word()
  462. {
  463. if(position == last)
  464. return false;
  465. // both prev and this character must be m_word_mask:
  466. bool prev = traits_inst.isctype(*position, m_word_mask);
  467. {
  468. bool b;
  469. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  470. return false;
  471. else
  472. {
  473. --position;
  474. b = traits_inst.isctype(*position, m_word_mask);
  475. ++position;
  476. }
  477. if(b == prev)
  478. {
  479. pstate = pstate->next.p;
  480. return true;
  481. }
  482. }
  483. return false;
  484. }
  485. template <class BidiIterator, class Allocator, class traits>
  486. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start()
  487. {
  488. if(position == last)
  489. return false; // can't be starting a word if we're already at the end of input
  490. if(!traits_inst.isctype(*position, m_word_mask))
  491. return false; // next character isn't a word character
  492. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  493. {
  494. if(m_match_flags & match_not_bow)
  495. return false; // no previous input
  496. }
  497. else
  498. {
  499. // otherwise inside buffer:
  500. BidiIterator t(position);
  501. --t;
  502. if(traits_inst.isctype(*t, m_word_mask))
  503. return false; // previous character not non-word
  504. }
  505. // OK we have a match:
  506. pstate = pstate->next.p;
  507. return true;
  508. }
  509. template <class BidiIterator, class Allocator, class traits>
  510. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end()
  511. {
  512. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  513. return false; // start of buffer can't be end of word
  514. BidiIterator t(position);
  515. --t;
  516. if(traits_inst.isctype(*t, m_word_mask) == false)
  517. return false; // previous character wasn't a word character
  518. if(position == last)
  519. {
  520. if(m_match_flags & match_not_eow)
  521. return false; // end of buffer but not end of word
  522. }
  523. else
  524. {
  525. // otherwise inside buffer:
  526. if(traits_inst.isctype(*position, m_word_mask))
  527. return false; // next character is a word character
  528. }
  529. pstate = pstate->next.p;
  530. return true; // if we fall through to here then we've succeeded
  531. }
  532. template <class BidiIterator, class Allocator, class traits>
  533. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start()
  534. {
  535. if((position != backstop) || (m_match_flags & match_not_bob))
  536. return false;
  537. // OK match:
  538. pstate = pstate->next.p;
  539. return true;
  540. }
  541. template <class BidiIterator, class Allocator, class traits>
  542. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end()
  543. {
  544. if((position != last) || (m_match_flags & match_not_eob))
  545. return false;
  546. // OK match:
  547. pstate = pstate->next.p;
  548. return true;
  549. }
  550. template <class BidiIterator, class Allocator, class traits>
  551. bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
  552. {
  553. //
  554. // Compare with what we previously matched.
  555. // Note that this succeeds if the backref did not partisipate
  556. // in the match, this is in line with ECMAScript, but not Perl
  557. // or PCRE.
  558. //
  559. int index = static_cast<const re_brace*>(pstate)->index;
  560. if(index >= 10000)
  561. {
  562. named_subexpressions::range_type r = re.get_data().equal_range(index);
  563. BOOST_ASSERT(r.first != r.second);
  564. do
  565. {
  566. index = r.first->index;
  567. ++r.first;
  568. }while((r.first != r.second) && ((*m_presult)[index].matched != true));
  569. }
  570. if((m_match_flags & match_perl) && !(*m_presult)[index].matched)
  571. return false;
  572. BidiIterator i = (*m_presult)[index].first;
  573. BidiIterator j = (*m_presult)[index].second;
  574. while(i != j)
  575. {
  576. if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
  577. return false;
  578. ++i;
  579. ++position;
  580. }
  581. pstate = pstate->next.p;
  582. return true;
  583. }
  584. template <class BidiIterator, class Allocator, class traits>
  585. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
  586. {
  587. typedef typename traits::char_class_type char_class_type;
  588. // let the traits class do the work:
  589. if(position == last)
  590. return false;
  591. BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase);
  592. if(t != position)
  593. {
  594. pstate = pstate->next.p;
  595. position = t;
  596. return true;
  597. }
  598. return false;
  599. }
  600. template <class BidiIterator, class Allocator, class traits>
  601. bool perl_matcher<BidiIterator, Allocator, traits>::match_set()
  602. {
  603. if(position == last)
  604. return false;
  605. if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  606. {
  607. pstate = pstate->next.p;
  608. ++position;
  609. return true;
  610. }
  611. return false;
  612. }
  613. template <class BidiIterator, class Allocator, class traits>
  614. bool perl_matcher<BidiIterator, Allocator, traits>::match_jump()
  615. {
  616. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  617. return true;
  618. }
  619. template <class BidiIterator, class Allocator, class traits>
  620. bool perl_matcher<BidiIterator, Allocator, traits>::match_combining()
  621. {
  622. if(position == last)
  623. return false;
  624. if(is_combining(traits_inst.translate(*position, icase)))
  625. return false;
  626. ++position;
  627. while((position != last) && is_combining(traits_inst.translate(*position, icase)))
  628. ++position;
  629. pstate = pstate->next.p;
  630. return true;
  631. }
  632. template <class BidiIterator, class Allocator, class traits>
  633. bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end()
  634. {
  635. if(m_match_flags & match_not_eob)
  636. return false;
  637. BidiIterator p(position);
  638. while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p;
  639. if(p != last)
  640. return false;
  641. pstate = pstate->next.p;
  642. return true;
  643. }
  644. template <class BidiIterator, class Allocator, class traits>
  645. bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue()
  646. {
  647. if(position == search_base)
  648. {
  649. pstate = pstate->next.p;
  650. return true;
  651. }
  652. return false;
  653. }
  654. template <class BidiIterator, class Allocator, class traits>
  655. bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
  656. {
  657. #ifdef BOOST_MSVC
  658. #pragma warning(push)
  659. #pragma warning(disable:4127)
  660. #endif
  661. if( ::boost::is_random_access_iterator<BidiIterator>::value)
  662. {
  663. std::ptrdiff_t maxlen = ::boost::re_detail::distance(backstop, position);
  664. if(maxlen < static_cast<const re_brace*>(pstate)->index)
  665. return false;
  666. std::advance(position, -static_cast<const re_brace*>(pstate)->index);
  667. }
  668. else
  669. {
  670. int c = static_cast<const re_brace*>(pstate)->index;
  671. while(c--)
  672. {
  673. if(position == backstop)
  674. return false;
  675. --position;
  676. }
  677. }
  678. pstate = pstate->next.p;
  679. return true;
  680. #ifdef BOOST_MSVC
  681. #pragma warning(pop)
  682. #endif
  683. }
  684. template <class BidiIterator, class Allocator, class traits>
  685. inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
  686. {
  687. // return true if marked sub-expression N has been matched:
  688. int index = static_cast<const re_brace*>(pstate)->index;
  689. bool result = false;
  690. if(index == 9999)
  691. {
  692. // Magic value for a (DEFINE) block:
  693. return false;
  694. }
  695. else if(index > 0)
  696. {
  697. // Have we matched subexpression "index"?
  698. // Check if index is a hash value:
  699. if(index >= 10000)
  700. {
  701. named_subexpressions::range_type r = re.get_data().equal_range(index);
  702. while(r.first != r.second)
  703. {
  704. if((*m_presult)[r.first->index].matched)
  705. {
  706. result = true;
  707. break;
  708. }
  709. ++r.first;
  710. }
  711. }
  712. else
  713. {
  714. result = (*m_presult)[index].matched;
  715. }
  716. pstate = pstate->next.p;
  717. }
  718. else
  719. {
  720. // Have we recursed into subexpression "index"?
  721. // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
  722. int idx = -index-1;
  723. if(idx >= 10000)
  724. {
  725. named_subexpressions::range_type r = re.get_data().equal_range(idx);
  726. int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx;
  727. while(r.first != r.second)
  728. {
  729. result |= (stack_index == r.first->index);
  730. if(result)break;
  731. ++r.first;
  732. }
  733. }
  734. else
  735. {
  736. result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
  737. }
  738. pstate = pstate->next.p;
  739. }
  740. return result;
  741. }
  742. template <class BidiIterator, class Allocator, class traits>
  743. bool perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case()
  744. {
  745. // change our case sensitivity:
  746. this->icase = static_cast<const re_case*>(pstate)->icase;
  747. pstate = pstate->next.p;
  748. return true;
  749. }
  750. template <class BidiIterator, class Allocator, class traits>
  751. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
  752. {
  753. #ifdef BOOST_MSVC
  754. #pragma warning(push)
  755. #pragma warning(disable:4127)
  756. #endif
  757. const unsigned char* _map = re.get_map();
  758. while(true)
  759. {
  760. // skip everything we can't match:
  761. while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) )
  762. ++position;
  763. if(position == last)
  764. {
  765. // run out of characters, try a null match if possible:
  766. if(re.can_be_null())
  767. return match_prefix();
  768. break;
  769. }
  770. // now try and obtain a match:
  771. if(match_prefix())
  772. return true;
  773. if(position == last)
  774. return false;
  775. ++position;
  776. }
  777. return false;
  778. #ifdef BOOST_MSVC
  779. #pragma warning(pop)
  780. #endif
  781. }
  782. template <class BidiIterator, class Allocator, class traits>
  783. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word()
  784. {
  785. #ifdef BOOST_MSVC
  786. #pragma warning(push)
  787. #pragma warning(disable:4127)
  788. #endif
  789. // do search optimised for word starts:
  790. const unsigned char* _map = re.get_map();
  791. if((m_match_flags & match_prev_avail) || (position != base))
  792. --position;
  793. else if(match_prefix())
  794. return true;
  795. do
  796. {
  797. while((position != last) && traits_inst.isctype(*position, m_word_mask))
  798. ++position;
  799. while((position != last) && !traits_inst.isctype(*position, m_word_mask))
  800. ++position;
  801. if(position == last)
  802. break;
  803. if(can_start(*position, _map, (unsigned char)mask_any) )
  804. {
  805. if(match_prefix())
  806. return true;
  807. }
  808. if(position == last)
  809. break;
  810. } while(true);
  811. return false;
  812. #ifdef BOOST_MSVC
  813. #pragma warning(pop)
  814. #endif
  815. }
  816. template <class BidiIterator, class Allocator, class traits>
  817. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line()
  818. {
  819. // do search optimised for line starts:
  820. const unsigned char* _map = re.get_map();
  821. if(match_prefix())
  822. return true;
  823. while(position != last)
  824. {
  825. while((position != last) && !is_separator(*position))
  826. ++position;
  827. if(position == last)
  828. return false;
  829. ++position;
  830. if(position == last)
  831. {
  832. if(re.can_be_null() && match_prefix())
  833. return true;
  834. return false;
  835. }
  836. if( can_start(*position, _map, (unsigned char)mask_any) )
  837. {
  838. if(match_prefix())
  839. return true;
  840. }
  841. if(position == last)
  842. return false;
  843. //++position;
  844. }
  845. return false;
  846. }
  847. template <class BidiIterator, class Allocator, class traits>
  848. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf()
  849. {
  850. if((position == base) && ((m_match_flags & match_not_bob) == 0))
  851. return match_prefix();
  852. return false;
  853. }
  854. template <class BidiIterator, class Allocator, class traits>
  855. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit()
  856. {
  857. #if 0
  858. if(position == last)
  859. return false; // can't possibly match if we're at the end already
  860. unsigned type = (m_match_flags & match_continuous) ?
  861. static_cast<unsigned int>(regbase::restart_continue)
  862. : static_cast<unsigned int>(re.get_restart_type());
  863. const kmp_info<char_type>* info = access::get_kmp(re);
  864. int len = info->len;
  865. const char_type* x = info->pstr;
  866. int j = 0;
  867. while (position != last)
  868. {
  869. while((j > -1) && (x[j] != traits_inst.translate(*position, icase)))
  870. j = info->kmp_next[j];
  871. ++position;
  872. ++j;
  873. if(j >= len)
  874. {
  875. if(type == regbase::restart_fixed_lit)
  876. {
  877. std::advance(position, -j);
  878. restart = position;
  879. std::advance(restart, len);
  880. m_result.set_first(position);
  881. m_result.set_second(restart);
  882. position = restart;
  883. return true;
  884. }
  885. else
  886. {
  887. restart = position;
  888. std::advance(position, -j);
  889. if(match_prefix())
  890. return true;
  891. else
  892. {
  893. for(int k = 0; (restart != position) && (k < j); ++k, --restart)
  894. {} // dwa 10/20/2000 - warning suppression for MWCW
  895. if(restart != last)
  896. ++restart;
  897. position = restart;
  898. j = 0; //we could do better than this...
  899. }
  900. }
  901. }
  902. }
  903. if((m_match_flags & match_partial) && (position == last) && j)
  904. {
  905. // we need to check for a partial match:
  906. restart = position;
  907. std::advance(position, -j);
  908. return match_prefix();
  909. }
  910. #endif
  911. return false;
  912. }
  913. } // namespace re_detail
  914. } // namespace boost
  915. #ifdef BOOST_MSVC
  916. # pragma warning(pop)
  917. #endif
  918. #ifdef __BORLANDC__
  919. # pragma option pop
  920. #endif
  921. #ifdef BOOST_MSVC
  922. #pragma warning(push)
  923. #pragma warning(disable: 4103)
  924. #endif
  925. #ifdef BOOST_HAS_ABI_HEADERS
  926. # include BOOST_ABI_SUFFIX
  927. #endif
  928. #ifdef BOOST_MSVC
  929. #pragma warning(pop)
  930. #endif
  931. #endif