perl_matcher_recursive.hpp 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * specific to the recursive implementation.
  17. */
  18. #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
  19. #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable: 4103)
  23. #endif
  24. #ifdef BOOST_HAS_ABI_HEADERS
  25. # include BOOST_ABI_PREFIX
  26. #endif
  27. #ifdef BOOST_MSVC
  28. #pragma warning(pop)
  29. #endif
  30. #ifdef BOOST_MSVC
  31. #pragma warning(push)
  32. #pragma warning(disable: 4800)
  33. #endif
  34. namespace boost{
  35. namespace re_detail{
  36. template <class BidiIterator>
  37. class backup_subex
  38. {
  39. int index;
  40. sub_match<BidiIterator> sub;
  41. public:
  42. template <class A>
  43. backup_subex(const match_results<BidiIterator, A>& w, int i)
  44. : index(i), sub(w[i], false) {}
  45. template <class A>
  46. void restore(match_results<BidiIterator, A>& w)
  47. {
  48. w.set_first(sub.first, index, index == 0);
  49. w.set_second(sub.second, index, sub.matched, index == 0);
  50. }
  51. const sub_match<BidiIterator>& get() { return sub; }
  52. };
  53. template <class BidiIterator, class Allocator, class traits>
  54. bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
  55. {
  56. static matcher_proc_type const s_match_vtable[30] =
  57. {
  58. (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
  59. &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
  60. &perl_matcher<BidiIterator, Allocator, traits>::match_literal,
  61. &perl_matcher<BidiIterator, Allocator, traits>::match_start_line,
  62. &perl_matcher<BidiIterator, Allocator, traits>::match_end_line,
  63. &perl_matcher<BidiIterator, Allocator, traits>::match_wild,
  64. &perl_matcher<BidiIterator, Allocator, traits>::match_match,
  65. &perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary,
  66. &perl_matcher<BidiIterator, Allocator, traits>::match_within_word,
  67. &perl_matcher<BidiIterator, Allocator, traits>::match_word_start,
  68. &perl_matcher<BidiIterator, Allocator, traits>::match_word_end,
  69. &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start,
  70. &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end,
  71. &perl_matcher<BidiIterator, Allocator, traits>::match_backref,
  72. &perl_matcher<BidiIterator, Allocator, traits>::match_long_set,
  73. &perl_matcher<BidiIterator, Allocator, traits>::match_set,
  74. &perl_matcher<BidiIterator, Allocator, traits>::match_jump,
  75. &perl_matcher<BidiIterator, Allocator, traits>::match_alt,
  76. &perl_matcher<BidiIterator, Allocator, traits>::match_rep,
  77. &perl_matcher<BidiIterator, Allocator, traits>::match_combining,
  78. &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end,
  79. &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue,
  80. // Although this next line *should* be evaluated at compile time, in practice
  81. // some compilers (VC++) emit run-time initialisation which breaks thread
  82. // safety, so use a dispatch function instead:
  83. //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow),
  84. &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch,
  85. &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat,
  86. &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
  87. &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
  88. &perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
  89. &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
  90. &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
  91. &perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
  92. };
  93. if(state_count > max_state_count)
  94. raise_error(traits_inst, regex_constants::error_complexity);
  95. while(pstate)
  96. {
  97. matcher_proc_type proc = s_match_vtable[pstate->type];
  98. ++state_count;
  99. if(!(this->*proc)())
  100. {
  101. if((m_match_flags & match_partial) && (position == last) && (position != search_base))
  102. m_has_partial_match = true;
  103. return 0;
  104. }
  105. }
  106. return true;
  107. }
  108. template <class BidiIterator, class Allocator, class traits>
  109. bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
  110. {
  111. int index = static_cast<const re_brace*>(pstate)->index;
  112. icase = static_cast<const re_brace*>(pstate)->icase;
  113. bool r = true;
  114. switch(index)
  115. {
  116. case 0:
  117. pstate = pstate->next.p;
  118. break;
  119. case -1:
  120. case -2:
  121. {
  122. // forward lookahead assert:
  123. BidiIterator old_position(position);
  124. const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
  125. pstate = pstate->next.p->next.p;
  126. r = match_all_states();
  127. pstate = next_pstate;
  128. position = old_position;
  129. if((r && (index != -1)) || (!r && (index != -2)))
  130. r = false;
  131. else
  132. r = true;
  133. break;
  134. }
  135. case -3:
  136. {
  137. // independent sub-expression:
  138. bool old_independent = m_independent;
  139. m_independent = true;
  140. const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
  141. pstate = pstate->next.p->next.p;
  142. r = match_all_states();
  143. pstate = next_pstate;
  144. m_independent = old_independent;
  145. #ifdef BOOST_REGEX_MATCH_EXTRA
  146. if(r && (m_match_flags & match_extra))
  147. {
  148. //
  149. // our captures have been stored in *m_presult
  150. // we need to unpack them, and insert them
  151. // back in the right order when we unwind the stack:
  152. //
  153. unsigned i;
  154. match_results<BidiIterator, Allocator> tm(*m_presult);
  155. for(i = 0; i < tm.size(); ++i)
  156. (*m_presult)[i].get_captures().clear();
  157. // match everything else:
  158. r = match_all_states();
  159. // now place the stored captures back:
  160. for(i = 0; i < tm.size(); ++i)
  161. {
  162. typedef typename sub_match<BidiIterator>::capture_sequence_type seq;
  163. seq& s1 = (*m_presult)[i].get_captures();
  164. const seq& s2 = tm[i].captures();
  165. s1.insert(
  166. s1.end(),
  167. s2.begin(),
  168. s2.end());
  169. }
  170. }
  171. #endif
  172. break;
  173. }
  174. case -4:
  175. {
  176. // conditional expression:
  177. const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
  178. BOOST_ASSERT(alt->type == syntax_element_alt);
  179. pstate = alt->next.p;
  180. if(pstate->type == syntax_element_assert_backref)
  181. {
  182. if(!match_assert_backref())
  183. pstate = alt->alt.p;
  184. break;
  185. }
  186. else
  187. {
  188. // zero width assertion, have to match this recursively:
  189. BOOST_ASSERT(pstate->type == syntax_element_startmark);
  190. bool negated = static_cast<const re_brace*>(pstate)->index == -2;
  191. BidiIterator saved_position = position;
  192. const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
  193. pstate = pstate->next.p->next.p;
  194. bool res = match_all_states();
  195. position = saved_position;
  196. if(negated)
  197. res = !res;
  198. if(res)
  199. pstate = next_pstate;
  200. else
  201. pstate = alt->alt.p;
  202. break;
  203. }
  204. }
  205. case -5:
  206. {
  207. // Reset start of $0, since we have a \K escape
  208. backup_subex<BidiIterator> sub(*m_presult, 0);
  209. m_presult->set_first(position, 0, true);
  210. pstate = pstate->next.p;
  211. r = match_all_states();
  212. if(r == false)
  213. sub.restore(*m_presult);
  214. break;
  215. }
  216. default:
  217. {
  218. BOOST_ASSERT(index > 0);
  219. if((m_match_flags & match_nosubs) == 0)
  220. {
  221. backup_subex<BidiIterator> sub(*m_presult, index);
  222. m_presult->set_first(position, index);
  223. pstate = pstate->next.p;
  224. r = match_all_states();
  225. if(r == false)
  226. sub.restore(*m_presult);
  227. #ifdef BOOST_REGEX_MATCH_EXTRA
  228. //
  229. // we have a match, push the capture information onto the stack:
  230. //
  231. else if(sub.get().matched && (match_extra & m_match_flags))
  232. ((*m_presult)[index]).get_captures().push_back(sub.get());
  233. #endif
  234. }
  235. else
  236. {
  237. pstate = pstate->next.p;
  238. }
  239. break;
  240. }
  241. }
  242. return r;
  243. }
  244. template <class BidiIterator, class Allocator, class traits>
  245. bool perl_matcher<BidiIterator, Allocator, traits>::match_alt()
  246. {
  247. bool take_first, take_second;
  248. const re_alt* jmp = static_cast<const re_alt*>(pstate);
  249. // find out which of these two alternatives we need to take:
  250. if(position == last)
  251. {
  252. take_first = jmp->can_be_null & mask_take;
  253. take_second = jmp->can_be_null & mask_skip;
  254. }
  255. else
  256. {
  257. take_first = can_start(*position, jmp->_map, (unsigned char)mask_take);
  258. take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip);
  259. }
  260. if(take_first)
  261. {
  262. // we can take the first alternative,
  263. // see if we need to push next alternative:
  264. if(take_second)
  265. {
  266. BidiIterator oldposition(position);
  267. const re_syntax_base* old_pstate = jmp->alt.p;
  268. pstate = pstate->next.p;
  269. if(!match_all_states())
  270. {
  271. pstate = old_pstate;
  272. position = oldposition;
  273. }
  274. return true;
  275. }
  276. pstate = pstate->next.p;
  277. return true;
  278. }
  279. if(take_second)
  280. {
  281. pstate = jmp->alt.p;
  282. return true;
  283. }
  284. return false; // neither option is possible
  285. }
  286. template <class BidiIterator, class Allocator, class traits>
  287. bool perl_matcher<BidiIterator, Allocator, traits>::match_rep()
  288. {
  289. #ifdef BOOST_MSVC
  290. #pragma warning(push)
  291. #pragma warning(disable:4127 4244)
  292. #endif
  293. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  294. //
  295. // Always copy the repeat count, so that the state is restored
  296. // when we exit this scope:
  297. //
  298. repeater_count<BidiIterator> r(rep->state_id, &next_count, position);
  299. //
  300. // If we've had at least one repeat already, and the last one
  301. // matched the NULL string then set the repeat count to
  302. // maximum:
  303. //
  304. next_count->check_null_repeat(position, rep->max);
  305. // find out which of these two alternatives we need to take:
  306. bool take_first, take_second;
  307. if(position == last)
  308. {
  309. take_first = rep->can_be_null & mask_take;
  310. take_second = rep->can_be_null & mask_skip;
  311. }
  312. else
  313. {
  314. take_first = can_start(*position, rep->_map, (unsigned char)mask_take);
  315. take_second = can_start(*position, rep->_map, (unsigned char)mask_skip);
  316. }
  317. if(next_count->get_count() < rep->min)
  318. {
  319. // we must take the repeat:
  320. if(take_first)
  321. {
  322. // increase the counter:
  323. ++(*next_count);
  324. pstate = rep->next.p;
  325. return match_all_states();
  326. }
  327. return false;
  328. }
  329. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  330. if(greedy)
  331. {
  332. // try and take the repeat if we can:
  333. if((next_count->get_count() < rep->max) && take_first)
  334. {
  335. // store position in case we fail:
  336. BidiIterator pos = position;
  337. // increase the counter:
  338. ++(*next_count);
  339. pstate = rep->next.p;
  340. if(match_all_states())
  341. return true;
  342. // failed repeat, reset posistion and fall through for alternative:
  343. position = pos;
  344. }
  345. if(take_second)
  346. {
  347. pstate = rep->alt.p;
  348. return true;
  349. }
  350. return false; // can't take anything, fail...
  351. }
  352. else // non-greedy
  353. {
  354. // try and skip the repeat if we can:
  355. if(take_second)
  356. {
  357. // store position in case we fail:
  358. BidiIterator pos = position;
  359. pstate = rep->alt.p;
  360. if(match_all_states())
  361. return true;
  362. // failed alternative, reset posistion and fall through for repeat:
  363. position = pos;
  364. }
  365. if((next_count->get_count() < rep->max) && take_first)
  366. {
  367. // increase the counter:
  368. ++(*next_count);
  369. pstate = rep->next.p;
  370. return match_all_states();
  371. }
  372. }
  373. return false;
  374. #ifdef BOOST_MSVC
  375. #pragma warning(pop)
  376. #endif
  377. }
  378. template <class BidiIterator, class Allocator, class traits>
  379. bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow()
  380. {
  381. #ifdef BOOST_MSVC
  382. #pragma warning(push)
  383. #pragma warning(disable:4127)
  384. #endif
  385. unsigned count = 0;
  386. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  387. re_syntax_base* psingle = rep->next.p;
  388. // match compulsary repeats first:
  389. while(count < rep->min)
  390. {
  391. pstate = psingle;
  392. if(!match_wild())
  393. return false;
  394. ++count;
  395. }
  396. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  397. if(greedy)
  398. {
  399. // normal repeat:
  400. while(count < rep->max)
  401. {
  402. pstate = psingle;
  403. if(!match_wild())
  404. break;
  405. ++count;
  406. }
  407. if((rep->leading) && (count < rep->max))
  408. restart = position;
  409. pstate = rep;
  410. return backtrack_till_match(count - rep->min);
  411. }
  412. else
  413. {
  414. // non-greedy, keep trying till we get a match:
  415. BidiIterator save_pos;
  416. do
  417. {
  418. if((rep->leading) && (rep->max == UINT_MAX))
  419. restart = position;
  420. pstate = rep->alt.p;
  421. save_pos = position;
  422. ++state_count;
  423. if(match_all_states())
  424. return true;
  425. if(count >= rep->max)
  426. return false;
  427. ++count;
  428. pstate = psingle;
  429. position = save_pos;
  430. if(!match_wild())
  431. return false;
  432. }while(true);
  433. }
  434. #ifdef BOOST_MSVC
  435. #pragma warning(pop)
  436. #endif
  437. }
  438. template <class BidiIterator, class Allocator, class traits>
  439. bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast()
  440. {
  441. #ifdef BOOST_MSVC
  442. #pragma warning(push)
  443. #pragma warning(disable:4127)
  444. #endif
  445. if(m_match_flags & match_not_dot_null)
  446. return match_dot_repeat_slow();
  447. if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0)
  448. return match_dot_repeat_slow();
  449. //
  450. // start by working out how much we can skip:
  451. //
  452. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  453. #ifdef BOOST_MSVC
  454. #pragma warning(push)
  455. #pragma warning(disable:4267)
  456. #endif
  457. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  458. std::size_t count = (std::min)(static_cast<std::size_t>(::boost::re_detail::distance(position, last)), static_cast<std::size_t>(greedy ? rep->max : rep->min));
  459. if(rep->min > count)
  460. {
  461. position = last;
  462. return false; // not enough text left to match
  463. }
  464. std::advance(position, count);
  465. #ifdef BOOST_MSVC
  466. #pragma warning(pop)
  467. #endif
  468. if((rep->leading) && (count < rep->max) && greedy)
  469. restart = position;
  470. if(greedy)
  471. return backtrack_till_match(count - rep->min);
  472. // non-greedy, keep trying till we get a match:
  473. BidiIterator save_pos;
  474. do
  475. {
  476. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  477. {
  478. ++position;
  479. ++count;
  480. }
  481. if((rep->leading) && (rep->max == UINT_MAX))
  482. restart = position;
  483. pstate = rep->alt.p;
  484. save_pos = position;
  485. ++state_count;
  486. if(match_all_states())
  487. return true;
  488. if(count >= rep->max)
  489. return false;
  490. if(save_pos == last)
  491. return false;
  492. position = ++save_pos;
  493. ++count;
  494. }while(true);
  495. #ifdef BOOST_MSVC
  496. #pragma warning(pop)
  497. #endif
  498. }
  499. template <class BidiIterator, class Allocator, class traits>
  500. bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
  501. {
  502. #ifdef BOOST_MSVC
  503. #pragma warning(push)
  504. #pragma warning(disable:4127)
  505. #pragma warning(disable:4267)
  506. #endif
  507. #ifdef __BORLANDC__
  508. #pragma option push -w-8008 -w-8066 -w-8004
  509. #endif
  510. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  511. BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length);
  512. const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
  513. //
  514. // start by working out how much we can skip:
  515. //
  516. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  517. std::size_t count, desired;
  518. if(::boost::is_random_access_iterator<BidiIterator>::value)
  519. {
  520. desired =
  521. (std::min)(
  522. (std::size_t)(greedy ? rep->max : rep->min),
  523. (std::size_t)::boost::re_detail::distance(position, last));
  524. count = desired;
  525. ++desired;
  526. if(icase)
  527. {
  528. while(--desired && (traits_inst.translate_nocase(*position) == what))
  529. {
  530. ++position;
  531. }
  532. }
  533. else
  534. {
  535. while(--desired && (traits_inst.translate(*position) == what))
  536. {
  537. ++position;
  538. }
  539. }
  540. count = count - desired;
  541. }
  542. else
  543. {
  544. count = 0;
  545. desired = greedy ? rep->max : rep->min;
  546. while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what))
  547. {
  548. ++position;
  549. ++count;
  550. }
  551. }
  552. if((rep->leading) && (count < rep->max) && greedy)
  553. restart = position;
  554. if(count < rep->min)
  555. return false;
  556. if(greedy)
  557. return backtrack_till_match(count - rep->min);
  558. // non-greedy, keep trying till we get a match:
  559. BidiIterator save_pos;
  560. do
  561. {
  562. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  563. {
  564. if((traits_inst.translate(*position, icase) == what))
  565. {
  566. ++position;
  567. ++count;
  568. }
  569. else
  570. return false; // counldn't repeat even though it was the only option
  571. }
  572. if((rep->leading) && (rep->max == UINT_MAX))
  573. restart = position;
  574. pstate = rep->alt.p;
  575. save_pos = position;
  576. ++state_count;
  577. if(match_all_states())
  578. return true;
  579. if(count >= rep->max)
  580. return false;
  581. position = save_pos;
  582. if(position == last)
  583. return false;
  584. if(traits_inst.translate(*position, icase) == what)
  585. {
  586. ++position;
  587. ++count;
  588. }
  589. else
  590. {
  591. return false;
  592. }
  593. }while(true);
  594. #ifdef __BORLANDC__
  595. #pragma option pop
  596. #endif
  597. #ifdef BOOST_MSVC
  598. #pragma warning(pop)
  599. #endif
  600. }
  601. template <class BidiIterator, class Allocator, class traits>
  602. bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat()
  603. {
  604. #ifdef BOOST_MSVC
  605. #pragma warning(push)
  606. #pragma warning(disable:4127)
  607. #endif
  608. #ifdef __BORLANDC__
  609. #pragma option push -w-8008 -w-8066 -w-8004
  610. #endif
  611. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  612. const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
  613. unsigned count = 0;
  614. //
  615. // start by working out how much we can skip:
  616. //
  617. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  618. std::size_t desired = greedy ? rep->max : rep->min;
  619. if(::boost::is_random_access_iterator<BidiIterator>::value)
  620. {
  621. BidiIterator end = position;
  622. // Move end forward by "desired", preferably without using distance or advance if we can
  623. // as these can be slow for some iterator types.
  624. std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::re_detail::distance(position, last);
  625. if(desired >= len)
  626. end = last;
  627. else
  628. std::advance(end, desired);
  629. BidiIterator origin(position);
  630. while((position != end) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  631. {
  632. ++position;
  633. }
  634. count = (unsigned)::boost::re_detail::distance(origin, position);
  635. }
  636. else
  637. {
  638. while((count < desired) && (position != last) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  639. {
  640. ++position;
  641. ++count;
  642. }
  643. }
  644. if((rep->leading) && (count < rep->max) && greedy)
  645. restart = position;
  646. if(count < rep->min)
  647. return false;
  648. if(greedy)
  649. return backtrack_till_match(count - rep->min);
  650. // non-greedy, keep trying till we get a match:
  651. BidiIterator save_pos;
  652. do
  653. {
  654. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  655. {
  656. if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  657. {
  658. ++position;
  659. ++count;
  660. }
  661. else
  662. return false; // counldn't repeat even though it was the only option
  663. }
  664. if((rep->leading) && (rep->max == UINT_MAX))
  665. restart = position;
  666. pstate = rep->alt.p;
  667. save_pos = position;
  668. ++state_count;
  669. if(match_all_states())
  670. return true;
  671. if(count >= rep->max)
  672. return false;
  673. position = save_pos;
  674. if(position == last)
  675. return false;
  676. if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  677. {
  678. ++position;
  679. ++count;
  680. }
  681. else
  682. {
  683. return false;
  684. }
  685. }while(true);
  686. #ifdef __BORLANDC__
  687. #pragma option pop
  688. #endif
  689. #ifdef BOOST_MSVC
  690. #pragma warning(pop)
  691. #endif
  692. }
  693. template <class BidiIterator, class Allocator, class traits>
  694. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
  695. {
  696. #ifdef BOOST_MSVC
  697. #pragma warning(push)
  698. #pragma warning(disable:4127)
  699. #endif
  700. #ifdef __BORLANDC__
  701. #pragma option push -w-8008 -w-8066 -w-8004
  702. #endif
  703. typedef typename traits::char_class_type char_class_type;
  704. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  705. const re_set_long<char_class_type>* set = static_cast<const re_set_long<char_class_type>*>(pstate->next.p);
  706. unsigned count = 0;
  707. //
  708. // start by working out how much we can skip:
  709. //
  710. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  711. std::size_t desired = greedy ? rep->max : rep->min;
  712. if(::boost::is_random_access_iterator<BidiIterator>::value)
  713. {
  714. BidiIterator end = position;
  715. // Move end forward by "desired", preferably without using distance or advance if we can
  716. // as these can be slow for some iterator types.
  717. std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::re_detail::distance(position, last);
  718. if(desired >= len)
  719. end = last;
  720. else
  721. std::advance(end, desired);
  722. BidiIterator origin(position);
  723. while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
  724. {
  725. ++position;
  726. }
  727. count = (unsigned)::boost::re_detail::distance(origin, position);
  728. }
  729. else
  730. {
  731. while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
  732. {
  733. ++position;
  734. ++count;
  735. }
  736. }
  737. if((rep->leading) && (count < rep->max) && greedy)
  738. restart = position;
  739. if(count < rep->min)
  740. return false;
  741. if(greedy)
  742. return backtrack_till_match(count - rep->min);
  743. // non-greedy, keep trying till we get a match:
  744. BidiIterator save_pos;
  745. do
  746. {
  747. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  748. {
  749. if(position != re_is_set_member(position, last, set, re.get_data(), icase))
  750. {
  751. ++position;
  752. ++count;
  753. }
  754. else
  755. return false; // counldn't repeat even though it was the only option
  756. }
  757. if((rep->leading) && (rep->max == UINT_MAX))
  758. restart = position;
  759. pstate = rep->alt.p;
  760. save_pos = position;
  761. ++state_count;
  762. if(match_all_states())
  763. return true;
  764. if(count >= rep->max)
  765. return false;
  766. position = save_pos;
  767. if(position == last)
  768. return false;
  769. if(position != re_is_set_member(position, last, set, re.get_data(), icase))
  770. {
  771. ++position;
  772. ++count;
  773. }
  774. else
  775. {
  776. return false;
  777. }
  778. }while(true);
  779. #ifdef __BORLANDC__
  780. #pragma option pop
  781. #endif
  782. #ifdef BOOST_MSVC
  783. #pragma warning(pop)
  784. #endif
  785. }
  786. template <class BidiIterator, class Allocator, class traits>
  787. bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::size_t count)
  788. {
  789. #ifdef BOOST_MSVC
  790. #pragma warning(push)
  791. #pragma warning(disable:4127)
  792. #endif
  793. if((m_match_flags & match_partial) && (position == last))
  794. m_has_partial_match = true;
  795. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  796. BidiIterator backtrack = position;
  797. if(position == last)
  798. {
  799. if(rep->can_be_null & mask_skip)
  800. {
  801. pstate = rep->alt.p;
  802. if(match_all_states())
  803. return true;
  804. }
  805. if(count)
  806. {
  807. position = --backtrack;
  808. --count;
  809. }
  810. else
  811. return false;
  812. }
  813. do
  814. {
  815. while(count && !can_start(*position, rep->_map, mask_skip))
  816. {
  817. --position;
  818. --count;
  819. ++state_count;
  820. }
  821. pstate = rep->alt.p;
  822. backtrack = position;
  823. if(match_all_states())
  824. return true;
  825. if(count == 0)
  826. return false;
  827. position = --backtrack;
  828. ++state_count;
  829. --count;
  830. }while(true);
  831. #ifdef BOOST_MSVC
  832. #pragma warning(pop)
  833. #endif
  834. }
  835. template <class BidiIterator, class Allocator, class traits>
  836. bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
  837. {
  838. BOOST_ASSERT(pstate->type == syntax_element_recurse);
  839. //
  840. // Set new call stack:
  841. //
  842. if(recursion_stack.capacity() == 0)
  843. {
  844. recursion_stack.reserve(50);
  845. }
  846. recursion_stack.push_back(recursion_info<results_type>());
  847. recursion_stack.back().preturn_address = pstate->next.p;
  848. recursion_stack.back().results = *m_presult;
  849. recursion_stack.back().repeater_stack = next_count;
  850. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  851. recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index;
  852. repeater_count<BidiIterator>* saved = next_count;
  853. repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those
  854. next_count = &r;
  855. bool result = match_all_states();
  856. next_count = saved;
  857. if(!result)
  858. {
  859. next_count = recursion_stack.back().repeater_stack;
  860. *m_presult = recursion_stack.back().results;
  861. recursion_stack.pop_back();
  862. return false;
  863. }
  864. return true;
  865. }
  866. template <class BidiIterator, class Allocator, class traits>
  867. bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
  868. {
  869. int index = static_cast<const re_brace*>(pstate)->index;
  870. icase = static_cast<const re_brace*>(pstate)->icase;
  871. if(index > 0)
  872. {
  873. if((m_match_flags & match_nosubs) == 0)
  874. {
  875. m_presult->set_second(position, index);
  876. }
  877. if(!recursion_stack.empty())
  878. {
  879. if(index == recursion_stack.back().idx)
  880. {
  881. recursion_info<results_type> saved = recursion_stack.back();
  882. recursion_stack.pop_back();
  883. pstate = saved.preturn_address;
  884. repeater_count<BidiIterator>* saved_count = next_count;
  885. next_count = saved.repeater_stack;
  886. *m_presult = saved.results;
  887. if(!match_all_states())
  888. {
  889. recursion_stack.push_back(saved);
  890. next_count = saved_count;
  891. return false;
  892. }
  893. }
  894. }
  895. }
  896. else if((index < 0) && (index != -4))
  897. {
  898. // matched forward lookahead:
  899. pstate = 0;
  900. return true;
  901. }
  902. pstate = pstate ? pstate->next.p : 0;
  903. return true;
  904. }
  905. template <class BidiIterator, class Allocator, class traits>
  906. bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
  907. {
  908. if(!recursion_stack.empty())
  909. {
  910. BOOST_ASSERT(0 == recursion_stack.back().idx);
  911. const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address;
  912. *m_presult = recursion_stack.back().results;
  913. recursion_stack.pop_back();
  914. if(!match_all_states())
  915. {
  916. recursion_stack.push_back(recursion_info<results_type>());
  917. recursion_stack.back().preturn_address = saved_state;
  918. recursion_stack.back().results = *m_presult;
  919. return false;
  920. }
  921. return true;
  922. }
  923. if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
  924. return false;
  925. if((m_match_flags & match_all) && (position != last))
  926. return false;
  927. if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
  928. return false;
  929. m_presult->set_second(position);
  930. pstate = 0;
  931. m_has_found_match = true;
  932. if((m_match_flags & match_posix) == match_posix)
  933. {
  934. m_result.maybe_assign(*m_presult);
  935. if((m_match_flags & match_any) == 0)
  936. return false;
  937. }
  938. #ifdef BOOST_REGEX_MATCH_EXTRA
  939. if(match_extra & m_match_flags)
  940. {
  941. for(unsigned i = 0; i < m_presult->size(); ++i)
  942. if((*m_presult)[i].matched)
  943. ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
  944. }
  945. #endif
  946. return true;
  947. }
  948. } // namespace re_detail
  949. } // namespace boost
  950. #ifdef BOOST_MSVC
  951. #pragma warning(pop)
  952. #endif
  953. #ifdef BOOST_MSVC
  954. #pragma warning(push)
  955. #pragma warning(disable: 4103)
  956. #endif
  957. #ifdef BOOST_HAS_ABI_HEADERS
  958. # include BOOST_ABI_SUFFIX
  959. #endif
  960. #ifdef BOOST_MSVC
  961. #pragma warning(pop)
  962. #endif
  963. #endif