tz_db_base.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. #ifndef DATE_TIME_TZ_DB_BASE_HPP__
  2. #define DATE_TIME_TZ_DB_BASE_HPP__
  3. /* Copyright (c) 2003-2005 CrystalClear Software, Inc.
  4. * Subject to the Boost Software License, Version 1.0.
  5. * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
  6. * Author: Jeff Garland, Bart Garst
  7. * $Date: 2013-09-21 13:17:00 -0700 (Sat, 21 Sep 2013) $
  8. */
  9. #include <map>
  10. #include <vector>
  11. #include <string>
  12. #include <sstream>
  13. #include <fstream>
  14. #include <stdexcept>
  15. #include <boost/tokenizer.hpp>
  16. #include <boost/shared_ptr.hpp>
  17. #include <boost/throw_exception.hpp>
  18. #include <boost/date_time/compiler_config.hpp>
  19. #include <boost/date_time/time_zone_names.hpp>
  20. #include <boost/date_time/time_zone_base.hpp>
  21. #include <boost/date_time/time_parsing.hpp>
  22. namespace boost {
  23. namespace date_time {
  24. //! Exception thrown when tz database cannot locate requested data file
  25. class data_not_accessible : public std::logic_error
  26. {
  27. public:
  28. data_not_accessible() :
  29. std::logic_error(std::string("Unable to locate or access the required datafile."))
  30. {}
  31. data_not_accessible(const std::string& filespec) :
  32. std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec))
  33. {}
  34. };
  35. //! Exception thrown when tz database locates incorrect field structure in data file
  36. class bad_field_count : public std::out_of_range
  37. {
  38. public:
  39. bad_field_count(const std::string& s) :
  40. std::out_of_range(s)
  41. {}
  42. };
  43. //! Creates a database of time_zones from csv datafile
  44. /*! The csv file containing the zone_specs used by the
  45. * tz_db_base is intended to be customized by the
  46. * library user. When customizing this file (or creating your own) the
  47. * file must follow a specific format.
  48. *
  49. * This first line is expected to contain column headings and is therefore
  50. * not processed by the tz_db_base.
  51. *
  52. * Each record (line) must have eleven fields. Some of those fields can
  53. * be empty. Every field (even empty ones) must be enclosed in
  54. * double-quotes.
  55. * Ex:
  56. * @code
  57. * "America/Phoenix" <- string enclosed in quotes
  58. * "" <- empty field
  59. * @endcode
  60. *
  61. * Some fields represent a length of time. The format of these fields
  62. * must be:
  63. * @code
  64. * "{+|-}hh:mm[:ss]" <- length-of-time format
  65. * @endcode
  66. * Where the plus or minus is mandatory and the seconds are optional.
  67. *
  68. * Since some time zones do not use daylight savings it is not always
  69. * necessary for every field in a zone_spec to contain a value. All
  70. * zone_specs must have at least ID and GMT offset. Zones that use
  71. * daylight savings must have all fields filled except:
  72. * STD ABBR, STD NAME, DST NAME. You should take note
  73. * that DST ABBR is mandatory for zones that use daylight savings
  74. * (see field descriptions for further details).
  75. *
  76. * ******* Fields and their description/details *********
  77. *
  78. * ID:
  79. * Contains the identifying string for the zone_spec. Any string will
  80. * do as long as it's unique. No two ID's can be the same.
  81. *
  82. * STD ABBR:
  83. * STD NAME:
  84. * DST ABBR:
  85. * DST NAME:
  86. * These four are all the names and abbreviations used by the time
  87. * zone being described. While any string will do in these fields,
  88. * care should be taken. These fields hold the strings that will be
  89. * used in the output of many of the local_time classes.
  90. * Ex:
  91. * @code
  92. * time_zone nyc = tz_db.time_zone_from_region("America/New_York");
  93. * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc);
  94. * cout << ny_time.to_long_string() << endl;
  95. * // 2004-Aug-30 00:00:00 Eastern Daylight Time
  96. * cout << ny_time.to_short_string() << endl;
  97. * // 2004-Aug-30 00:00:00 EDT
  98. * @endcode
  99. *
  100. * NOTE: The exact format/function names may vary - see local_time
  101. * documentation for further details.
  102. *
  103. * GMT offset:
  104. * This is the number of hours added to utc to get the local time
  105. * before any daylight savings adjustments are made. Some examples
  106. * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours.
  107. * The format must follow the length-of-time format described above.
  108. *
  109. * DST adjustment:
  110. * The amount of time added to gmt_offset when daylight savings is in
  111. * effect. The format must follow the length-of-time format described
  112. * above.
  113. *
  114. * DST Start Date rule:
  115. * This is a specially formatted string that describes the day of year
  116. * in which the transition take place. It holds three fields of it's own,
  117. * separated by semicolons.
  118. * The first field indicates the "nth" weekday of the month. The possible
  119. * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth),
  120. * and -1 (last).
  121. * The second field indicates the day-of-week from 0-6 (Sun=0).
  122. * The third field indicates the month from 1-12 (Jan=1).
  123. *
  124. * Examples are: "-1;5;9"="Last Friday of September",
  125. * "2;1;3"="Second Monday of March"
  126. *
  127. * Start time:
  128. * Start time is the number of hours past midnight, on the day of the
  129. * start transition, the transition takes place. More simply put, the
  130. * time of day the transition is made (in 24 hours format). The format
  131. * must follow the length-of-time format described above with the
  132. * exception that it must always be positive.
  133. *
  134. * DST End date rule:
  135. * See DST Start date rule. The difference here is this is the day
  136. * daylight savings ends (transition to STD).
  137. *
  138. * End time:
  139. * Same as Start time.
  140. */
  141. template<class time_zone_type, class rule_type>
  142. class tz_db_base {
  143. public:
  144. /* Having CharT as a template parameter created problems
  145. * with posix_time::duration_from_string. Templatizing
  146. * duration_from_string was not possible at this time, however,
  147. * it should be possible in the future (when poor compilers get
  148. * fixed or stop being used).
  149. * Since this class was designed to use CharT as a parameter it
  150. * is simply typedef'd here to ease converting in back to a
  151. * parameter the future */
  152. typedef char char_type;
  153. typedef typename time_zone_type::base_type time_zone_base_type;
  154. typedef typename time_zone_type::time_duration_type time_duration_type;
  155. typedef time_zone_names_base<char_type> time_zone_names;
  156. typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets;
  157. typedef std::basic_string<char_type> string_type;
  158. //! Constructs an empty database
  159. tz_db_base() {}
  160. //! Process csv data file, may throw exceptions
  161. /*! May throw bad_field_count exceptions */
  162. void load_from_stream(std::istream &in)
  163. {
  164. std::string buff;
  165. while( std::getline(in, buff)) {
  166. parse_string(buff);
  167. }
  168. }
  169. //! Process csv data file, may throw exceptions
  170. /*! May throw data_not_accessible, or bad_field_count exceptions */
  171. void load_from_file(const std::string& pathspec)
  172. {
  173. string_type in_str;
  174. std::string buff;
  175. std::ifstream ifs(pathspec.c_str());
  176. if(!ifs){
  177. boost::throw_exception(data_not_accessible(pathspec));
  178. }
  179. std::getline(ifs, buff); // first line is column headings
  180. this->load_from_stream(ifs);
  181. }
  182. //! returns true if record successfully added to map
  183. /*! Takes a region name in the form of "America/Phoenix", and a
  184. * time_zone object for that region. The id string must be a unique
  185. * name that does not already exist in the database. */
  186. bool add_record(const string_type& region,
  187. boost::shared_ptr<time_zone_base_type> tz)
  188. {
  189. typename map_type::value_type p(region, tz);
  190. return (m_zone_map.insert(p)).second;
  191. }
  192. //! Returns a time_zone object built from the specs for the given region
  193. /*! Returns a time_zone object built from the specs for the given
  194. * region. If region does not exist a local_time::record_not_found
  195. * exception will be thrown */
  196. boost::shared_ptr<time_zone_base_type>
  197. time_zone_from_region(const string_type& region) const
  198. {
  199. // get the record
  200. typename map_type::const_iterator record = m_zone_map.find(region);
  201. if(record == m_zone_map.end()){
  202. return boost::shared_ptr<time_zone_base_type>(); //null pointer
  203. }
  204. return record->second;
  205. }
  206. //! Returns a vector of strings holding the time zone regions in the database
  207. std::vector<std::string> region_list() const
  208. {
  209. typedef std::vector<std::string> vector_type;
  210. vector_type regions;
  211. typename map_type::const_iterator itr = m_zone_map.begin();
  212. while(itr != m_zone_map.end()) {
  213. regions.push_back(itr->first);
  214. ++itr;
  215. }
  216. return regions;
  217. }
  218. private:
  219. typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type;
  220. map_type m_zone_map;
  221. // start and end rule are of the same type
  222. typedef typename rule_type::start_rule::week_num week_num;
  223. /* TODO: mechanisms need to be put in place to handle different
  224. * types of rule specs. parse_rules() only handles nth_kday
  225. * rule types. */
  226. //! parses rule specs for transition day rules
  227. rule_type* parse_rules(const string_type& sr, const string_type& er) const
  228. {
  229. using namespace gregorian;
  230. // start and end rule are of the same type,
  231. // both are included here for readability
  232. typedef typename rule_type::start_rule start_rule;
  233. typedef typename rule_type::end_rule end_rule;
  234. // these are: [start|end] nth, day, month
  235. int s_nth = 0, s_d = 0, s_m = 0;
  236. int e_nth = 0, e_d = 0, e_m = 0;
  237. split_rule_spec(s_nth, s_d, s_m, sr);
  238. split_rule_spec(e_nth, e_d, e_m, er);
  239. typename start_rule::week_num s_wn, e_wn;
  240. s_wn = get_week_num(s_nth);
  241. e_wn = get_week_num(e_nth);
  242. return new rule_type(start_rule(s_wn, s_d, s_m),
  243. end_rule(e_wn, e_d, e_m));
  244. }
  245. //! helper function for parse_rules()
  246. week_num get_week_num(int nth) const
  247. {
  248. typedef typename rule_type::start_rule start_rule;
  249. switch(nth){
  250. case 1:
  251. return start_rule::first;
  252. case 2:
  253. return start_rule::second;
  254. case 3:
  255. return start_rule::third;
  256. case 4:
  257. return start_rule::fourth;
  258. case 5:
  259. case -1:
  260. return start_rule::fifth;
  261. default:
  262. // shouldn't get here - add error handling later
  263. break;
  264. }
  265. return start_rule::fifth; // silence warnings
  266. }
  267. //! splits the [start|end]_date_rule string into 3 ints
  268. void split_rule_spec(int& nth, int& d, int& m, string_type rule) const
  269. {
  270. typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type;
  271. typedef boost::tokenizer<char_separator_type,
  272. std::basic_string<char_type>::const_iterator,
  273. std::basic_string<char_type> > tokenizer;
  274. typedef boost::tokenizer<char_separator_type,
  275. std::basic_string<char_type>::const_iterator,
  276. std::basic_string<char_type> >::iterator tokenizer_iterator;
  277. const char_type sep_char[] = { ';', '\0'};
  278. char_separator_type sep(sep_char);
  279. tokenizer tokens(rule, sep); // 3 fields
  280. if ( std::distance ( tokens.begin(), tokens.end ()) != 3 ) {
  281. std::ostringstream msg;
  282. msg << "Expecting 3 fields, got "
  283. << std::distance ( tokens.begin(), tokens.end ())
  284. << " fields in line: " << rule;
  285. boost::throw_exception(bad_field_count(msg.str()));
  286. }
  287. tokenizer_iterator tok_iter = tokens.begin();
  288. nth = std::atoi(tok_iter->c_str()); ++tok_iter;
  289. d = std::atoi(tok_iter->c_str()); ++tok_iter;
  290. m = std::atoi(tok_iter->c_str());
  291. }
  292. //! Take a line from the csv, turn it into a time_zone_type.
  293. /*! Take a line from the csv, turn it into a time_zone_type,
  294. * and add it to the map. Zone_specs in csv file are expected to
  295. * have eleven fields that describe the time zone. Returns true if
  296. * zone_spec successfully added to database */
  297. bool parse_string(string_type& s)
  298. {
  299. std::vector<string_type> result;
  300. typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type;
  301. token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>());
  302. token_iter_type end;
  303. while (i != end) {
  304. result.push_back(*i);
  305. i++;
  306. }
  307. enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET,
  308. DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE,
  309. END_TIME, FIELD_COUNT };
  310. //take a shot at fixing gcc 4.x error
  311. const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT);
  312. if (result.size() != expected_fields) {
  313. std::ostringstream msg;
  314. msg << "Expecting " << FIELD_COUNT << " fields, got "
  315. << result.size() << " fields in line: " << s;
  316. boost::throw_exception(bad_field_count(msg.str()));
  317. BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach
  318. }
  319. // initializations
  320. bool has_dst = true;
  321. if(result[DSTABBR] == std::string()){
  322. has_dst = false;
  323. }
  324. // start building components of a time_zone
  325. time_zone_names names(result[STDNAME], result[STDABBR],
  326. result[DSTNAME], result[DSTABBR]);
  327. time_duration_type utc_offset =
  328. str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]);
  329. dst_adjustment_offsets adjust(time_duration_type(0,0,0),
  330. time_duration_type(0,0,0),
  331. time_duration_type(0,0,0));
  332. boost::shared_ptr<rule_type> rules;
  333. if(has_dst){
  334. adjust = dst_adjustment_offsets(
  335. str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]),
  336. str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]),
  337. str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME])
  338. );
  339. rules =
  340. boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE],
  341. result[END_DATE_RULE]));
  342. }
  343. string_type id(result[ID]);
  344. boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules));
  345. return (add_record(id, zone));
  346. }
  347. };
  348. } } // namespace
  349. #endif // DATE_TIME_TZ_DB_BASE_HPP__