[tz] data not represented in tzfiles

Paul Eggert eggert at cs.ucla.edu
Mon Sep 9 06:56:47 UTC 2013


Zefram wrote:
> Two periods of DST each year can't be expressed, or two-stage onset
> of DST.  Anything involving more than two Rule entries being applicable
> up to max_year.

Good point.  I have pushed what I think is a merge of your change
into the experimental version on github (see attached).

In the current tz data this change affects only Asia/Tehran,
and does not affect the output of 'zdump -v Asia/Tehran'.
I assume that makes sense; if not, please let me know.

>From cd270af1583e13bf70331d78d8aecc56052190b0 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert at cs.ucla.edu>
Date: Sun, 8 Sep 2013 23:53:35 -0700
Subject: [PATCH] * zic.c: Tweak 400-years-hack to better handle some edge
 cases better.

Derived from Zefram's patch mentioned in
<http://mm.icann.org/pipermail/tz/2013-July/019470.html>.
With the current tz data, this affects only the Asia/Tehran file,
and it doesn't affect zdump output.
(YEAR_BY_YEAR_ZONE): New constant.
(stringzone): Return it when applicable.
(outzone): Search through a couple of extra years when extending.
Extend when we ran past 2037 in the data, too.
---
 zic.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 65 insertions(+), 8 deletions(-)

diff --git a/zic.c b/zic.c
index dcab3aa..9939195 100644
--- a/zic.c
+++ b/zic.c
@@ -1873,6 +1873,8 @@ rule_cmp(struct rule const *a, struct rule const *b)
 	return a->r_dayofmonth - b->r_dayofmonth;
 }
 
+enum { YEAR_BY_YEAR_ZONE = 1 };
+
 static int
 stringzone(char *result, const struct zone *const zpfirst, const int zonecount)
 {
@@ -1925,7 +1927,7 @@ stringzone(char *result, const struct zone *const zpfirst, const int zonecount)
 		** do not try to apply a rule to the zone.
 		*/
 		if (stdrp != NULL && stdrp->r_hiyear == 2037)
-			return -1;
+			return YEAR_BY_YEAR_ZONE;
 
 		if (stdrp != NULL && stdrp->r_stdoff != 0) {
 			/* Perpetual DST.  */
@@ -2006,6 +2008,7 @@ outzone(const struct zone * const zpfirst, const int zonecount)
 	register int			max_envvar_len;
 	register int			prodstic; /* all rules are min to max */
 	register int			compat;
+	register int			do_extend;
 
 	max_abbr_len = 2 + max_format_len + max_abbrvar_len;
 	max_envvar_len = 2 * max_abbr_len + 5 * 9;
@@ -2055,7 +2058,8 @@ outzone(const struct zone * const zpfirst, const int zonecount)
 	** Generate lots of data if a rule can't cover all future times.
 	*/
 	compat = stringzone(envvar, zpfirst, zonecount);
-	if (noise && compat != 0) {
+	do_extend = compat < 0 || compat == YEAR_BY_YEAR_ZONE;
+	if (noise && compat != 0 && compat != YEAR_BY_YEAR_ZONE) {
 		if (compat < 0)
 			warning("%s %s",
 				_("no POSIX environment variable for zone"),
@@ -2069,12 +2073,27 @@ outzone(const struct zone * const zpfirst, const int zonecount)
 				zpfirst->z_name, compat);
 		}
 	}
-	if (envvar[0] == '\0') {
-		if (min_year >= ZIC_MIN + YEARSPERREPEAT)
-			min_year -= YEARSPERREPEAT;
+	if (do_extend) {
+		/*
+		** Search through a couple of extra years past the obvious
+		** 400, to avoid edge cases.  For example, suppose a non-POSIX
+		** rule applies from 2012 onwards and has transitions in March
+		** and September, plus some one-off transitions in November
+		** 2013.  If zic looked only at the last 400 years, it would
+		** set max_year=2413, with the intent that the 400 years 2014
+		** through 2413 will be repeated.  The last transition listed
+		** in the tzfile would be in 2413-09, less than 400 years
+		** after the last one-off transition in 2013-11.  Two years
+		** might be overkill, but with the kind of edge cases
+		** available we're not sure that one year would suffice.
+		*/
+		enum { years_of_observations = YEARSPERREPEAT + 2 };
+
+		if (min_year >= ZIC_MIN + years_of_observations)
+			min_year -= years_of_observations;
 		else	min_year = ZIC_MIN;
-		if (max_year <= ZIC_MAX - YEARSPERREPEAT)
-			max_year += YEARSPERREPEAT;
+		if (max_year <= ZIC_MAX - years_of_observations)
+			max_year += years_of_observations;
 		else	max_year = ZIC_MAX;
 		/*
 		** Regardless of any of the above,
@@ -2084,7 +2103,7 @@ outzone(const struct zone * const zpfirst, const int zonecount)
 		*/
 		if (prodstic) {
 			min_year = 1900;
-			max_year = min_year + YEARSPERREPEAT;
+			max_year = min_year + years_of_observations;
 		}
 	}
 	/*
@@ -2250,6 +2269,44 @@ error(_("can't determine time zone abbreviation to use just after until time"));
 				starttime = tadd(starttime, -gmtoff);
 		}
 	}
+	if (do_extend) {
+		/*
+		** If we're extending the explicitly listed observations
+		** for 400 years because we can't fill the POSIX-TZ field,
+		** check whether we actually ended up explicitly listing
+		** observations through that period.  If there aren't any
+		** near the end of the 400-year period, add a redundant
+		** one at the end of the final year, to make it clear
+		** that we are claiming to have definite knowledge of
+		** the lack of transitions up to that point.
+		*/
+		struct rule xr;
+		struct attype *lastat;
+		xr.r_month = TM_JANUARY;
+		xr.r_dycode = DC_DOM;
+		xr.r_dayofmonth = 1;
+		xr.r_tod = 0;
+		for (lastat = &attypes[0], i = 1; i < timecnt; i++)
+			if (attypes[i].at > lastat->at)
+				lastat = &attypes[i];
+		if (lastat->at < rpytime(&xr, max_year - 1)) {
+			/*
+			** Create new type code for the redundant entry,
+			** to prevent it being optimised away.
+			*/
+			if (typecnt >= TZ_MAX_TYPES) {
+				error(_("too many local time types"));
+				exit(EXIT_FAILURE);
+			}
+			gmtoffs[typecnt] = gmtoffs[lastat->type];
+			isdsts[typecnt] = isdsts[lastat->type];
+			ttisstds[typecnt] = ttisstds[lastat->type];
+			ttisgmts[typecnt] = ttisgmts[lastat->type];
+			abbrinds[typecnt] = abbrinds[lastat->type];
+			++typecnt;
+			addtt(rpytime(&xr, max_year + 1), typecnt-1);
+		}
+	}
 	writezone(zpfirst->z_name, envvar);
 	free(startbuf);
 	free(ab);
-- 
1.8.1.2




More information about the tz mailing list