[tz] [PROPOSED] Use type 0 for timestamps before first transition

Paul Eggert eggert at cs.ucla.edu
Sun Jun 3 19:26:50 UTC 2018


I ran into this problem when helping to draft:
https://tools.ietf.org/html/draft-murchison-tzdist-tzif-04
Its section 3.2 gives a complex four-step procedure for
determining the type of timestamps before the first transition,
derived from 2018e localtime.c.  However, I just now checked, and
glibc does only the last two steps.  Furthermore, in practice
nowadays only the last step is needed in almost all zones, because
type 0 is almost always the right type to use; the only exceptions
are the legacy zones EST5EDT, CST6CDT, MST7MDT, PST8PDT, CET, MET,
and EET.  As the tzif-04 section 3.2 procedure is only for
backward compatibility with older TZif files, now is a good time
to clean up by specifying a simple way to deal with time stamps
before the first transition, namely, to use time type 0.
So, this patch changes zic so that the first-used standard time type
(if there is one) is always time type 0.  This is compatible with
glibc and with any other program that uses the heuristic of using
the lowest-numbered standard-time time type for timestamps before
the first transition, since it merely changes zic to output
a standard-time type 0.
* NEWS, tzfile.5: Document this.
* localtime.c (tzparse): Make standard time type 0, if standard
time is in use.  This should help future-proof this program
if we ever get rid of the defaulttype member and just use 0.
* zic.c (swaptypes): New function.
(writezone): Reorder types to make type 0 the default type.
Negate the local variable writetypes into omittypes, so that
strlen can be used on it, and enlarge it by 1 to make room for a
sentinel.  Use memset where appropriate.
---
 NEWS        | 13 +++++++++
 localtime.c | 43 ++++++++++++++++++++++--------
 tzfile.5    | 17 ++++++++----
 zic.c       | 77 +++++++++++++++++++++++++++++++++++++++--------------
 4 files changed, 114 insertions(+), 36 deletions(-)

diff --git a/NEWS b/NEWS
index 0ef70a1..ce009e7 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,16 @@ Unreleased, experimental changes
     observed DST in 1942/79, not 1961/80, and there were several
     errors for transition times and dates.  (Thanks to P Chan.)
 
+  Changes to code
+
+    zic now always generates TZif files where time type 0 is used for
+    timestamps before the first transition.  This simplifies the
+    reading of TZif files and should not affect behavior of existing
+    TZif readers because the same set of time types is used; only
+    their internal indexes may have changed.  This affects only the
+    legacy zones EST5EDT, CST6CDT, MST7MDT, PST8PDT, CET, MET, and
+    EET, which previously used nonzero types for these timestamps.
+
   Changes to documentation
 
     New restrictions: A Rule name must start with a character that
@@ -25,6 +35,9 @@ Unreleased, experimental changes
     The latter restriction makes room for future extensions (a
     possibility noted by Tom Lane).
 
+    tzfile.5 now documents what time types apply before the first and
+    after the last transition, if any.
+
     The name "TZif" is now used for the tz binary data format.
 
     tz-link.htm now mentions the A0 TimeZone Migration utilities.
diff --git a/localtime.c b/localtime.c
index 5b5a5b1..21160c0 100644
--- a/localtime.c
+++ b/localtime.c
@@ -131,7 +131,11 @@ struct state {
 	char		chars[BIGGEST(BIGGEST(TZ_MAX_CHARS + 1, sizeof gmt),
 				(2 * (MY_TZNAME_MAX + 1)))];
 	struct lsinfo	lsis[TZ_MAX_LEAPS];
-	int		defaulttype; /* for early times or if no transitions */
+
+	/* The time type to use for early times or if no transitions.
+	   It is always zero for recent tzdb releases.
+	   It might be nonzero for data from tzdb 2018e or earlier.  */
+	int defaulttype;
 };
 
 enum r_type {
@@ -657,6 +661,17 @@ tzloadbody(char const *name, struct state *sp, bool doextend,
 					break;
 		}
 	}
+
+	/* Infer sp->defaulttype from the data.  Although this default
+	   type is always zero for data from recent tzdb releases,
+	   things are trickier for data from tzdb 2018e or earlier.
+
+	   The first set of heuristics work around bugs in 32-bit data
+	   generated by tzdb 2013c or earlier.  The workaround is for
+	   zones like Australia/Macquarie where timestamps before the
+	   first transition have a time type that is not the earliest
+	   standard-time type.  See:
+	   https://mm.icann.org/pipermail/tz/2013-May/019368.html */
 	/*
 	** If type 0 is unused in transitions,
 	** it's the type to use for early times.
@@ -678,6 +693,9 @@ tzloadbody(char const *name, struct state *sp, bool doextend,
 			if (!sp->ttis[i].tt_isdst)
 				break;
 	}
+	/* The next heuristics are for data generated by tzdb 2018e or
+	   earlier, for zones like EST5EDT where the first transition
+	   is to DST.  */
 	/*
 	** If no result yet, find the first standard type.
 	** If there is none, punt to type zero.
@@ -690,7 +708,12 @@ tzloadbody(char const *name, struct state *sp, bool doextend,
 				break;
 			}
 	}
+	/* A simple 'sp->defaulttype = 0;' would suffice here if we
+	   didn't have to worry about 2018e-or-earlier data.  Even
+	   simpler would be to remove the defaulttype member and just
+	   use 0 in its place.  */
 	sp->defaulttype = i;
+
 	return 0;
 }
 
@@ -1115,8 +1138,8 @@ tzparse(const char *name, struct state *sp, bool lastditch)
 			/*
 			** Two transitions per year, from EPOCH_YEAR forward.
 			*/
-			init_ttinfo(&sp->ttis[0], -dstoffset, true, stdlen + 1);
-			init_ttinfo(&sp->ttis[1], -stdoffset, false, 0);
+			init_ttinfo(&sp->ttis[0], -stdoffset, false, 0);
+			init_ttinfo(&sp->ttis[1], -dstoffset, true, stdlen + 1);
 			sp->defaulttype = 0;
 			timecnt = 0;
 			janfirst = 0;
@@ -1157,17 +1180,14 @@ tzparse(const char *name, struct state *sp, bool lastditch)
 					if (! increment_overflow_time
 					    (&sp->ats[timecnt],
 					     janoffset + starttime))
-					  sp->types[timecnt++] = reversed;
-					else if (janoffset)
-					  sp->defaulttype = reversed;
+					  sp->types[timecnt++] = !reversed;
 					sp->ats[timecnt] = janfirst;
 					if (! increment_overflow_time
 					    (&sp->ats[timecnt],
 					     janoffset + endtime)) {
-					  sp->types[timecnt++] = !reversed;
+					  sp->types[timecnt++] = reversed;
 					  yearlim = year + YEARSPERREPEAT + 1;
-					} else if (janoffset)
-					  sp->defaulttype = !reversed;
+					}
 				}
 				if (increment_overflow_time
 				    (&janfirst, janoffset + yearsecs))
@@ -1175,9 +1195,10 @@ tzparse(const char *name, struct state *sp, bool lastditch)
 				janoffset = 0;
 			}
 			sp->timecnt = timecnt;
-			if (! timecnt)
+			if (! timecnt) {
+				sp->ttis[0] = sp->ttis[1];
 				sp->typecnt = 1;	/* Perpetual DST.  */
-			else if (YEARSPERREPEAT < year - yearbeg)
+			} else if (YEARSPERREPEAT < year - yearbeg)
 				sp->goback = sp->goahead = true;
 		} else {
 			register int_fast32_t	theirstdoffset;
diff --git a/tzfile.5 b/tzfile.5
index 530397f..13213d2 100644
--- a/tzfile.5
+++ b/tzfile.5
@@ -70,7 +70,9 @@ at which the rules for computing local time change.
 one-byte unsigned integer values;
 each one tells which of the different types of local time types
 described in the file is associated with the time period
-starting with the same-indexed transition time.
+starting with the same-indexed transition time
+and continuing up to but not including the next transition time
+(or continuing for one second, if this is the last transition).
 These values serve as indices into the next field.
 .IP *
 .I tzh_typecnt
@@ -159,15 +161,20 @@ eight bytes are used for each transition time or leap second time.
 After the second header and data comes a newline-enclosed,
 POSIX-TZ-environment-variable-style string for use in handling instants
 after the last transition time stored in the file
-(with nothing between the newlines if there is no POSIX representation for
-such instants).
-The POSIX-style string must agree with the local time type after
-both data's last transition times; for example, given the string
+or for all instants if the file has no transitions.
+The POSIX-style TZ string is empty (i.e., nothing between the newlines)
+if there is no POSIX representation for such instants.
+If nonempty, the POSIX-style TZ string must agree with the local time
+type after both data's last transition times if present;
+for example, given the string
 .q "WET0WEST,M3.5.0,M10.5.0/3"
 then if a last transition time is in July, the transition's local time
 type must specify a daylight-saving time abbreviated
 .q "WEST"
 that is one hour east of UT.
+Also, if there is at least one transition, time type 0 is associated
+with the time period from the indefinite past up to but not including
+the earliest transition time.
 .SS Version 3 format
 For version-3-format time zone files, the POSIX-TZ-style string may
 use two minor extensions to the POSIX TZ format, as described in
diff --git a/zic.c b/zic.c
index 679b3e2..ce9ccaf 100644
--- a/zic.c
+++ b/zic.c
@@ -1759,6 +1759,17 @@ is32(const zic_t x)
 	return INT32_MIN <= x && x <= INT32_MAX;
 }
 
+static void
+swaptypes(int i, int j)
+{
+  { zic_t t = gmtoffs[i]; gmtoffs[i] = gmtoffs[j]; gmtoffs[j] = t; }
+  { char t = isdsts[i]; isdsts[i] = isdsts[j]; isdsts[j] = t; }
+  { unsigned char t = abbrinds[i]; abbrinds[i] = abbrinds[j];
+    abbrinds[j] = t; }
+  { bool t = ttisstds[i]; ttisstds[i] = ttisstds[j]; ttisstds[j] = t; }
+  { bool t = ttisgmts[i]; ttisgmts[i] = ttisgmts[j]; ttisgmts[j] = t; }
+}
+
 static void
 writezone(const char *const name, const char *const string, char version)
 {
@@ -1904,7 +1915,8 @@ writezone(const char *const name, const char *const string, char version)
 	for (pass = 1; pass <= 2; ++pass) {
 		register ptrdiff_t thistimei, thistimecnt, thistimelim;
 		register int	thisleapi, thisleapcnt, thisleaplim;
-		int		writetype[TZ_MAX_TYPES];
+		int old0, new0;
+		char		omittype[TZ_MAX_TYPES + 1];
 		int		typemap[TZ_MAX_TYPES];
 		register int	thistypecnt;
 		char		thischars[TZ_MAX_CHARS];
@@ -1929,25 +1941,46 @@ writezone(const char *const name, const char *const string, char version)
 		  error(_("too many transition times"));
 		thistimelim = thistimei + thistimecnt;
 		thisleaplim = thisleapi + thisleapcnt;
-		for (i = 0; i < typecnt; ++i)
-			writetype[i] = thistimecnt == timecnt;
+		memset(omittype, thistimecnt != timecnt, typecnt);
 		if (thistimecnt == 0) {
 			/*
 			** No transition times fall in the current
 			** (32- or 64-bit) window.
 			*/
 			if (typecnt != 0)
-				writetype[typecnt - 1] = true;
+			  omittype[typecnt - 1] = false;
 		} else {
 			for (i = thistimei - 1; i < thistimelim; ++i)
 				if (i >= 0)
-					writetype[types[i]] = true;
+				  omittype[types[i]] = false;
 			/*
 			** For America/Godthab and Antarctica/Palmer
 			*/
 			if (thistimei == 0)
-				writetype[0] = true;
+			  omittype[0] = false;
 		}
+
+		/* Reorder types to make type 0 the first-used standard type
+		   if one is used, otherwise the first-used DST type if one
+		   is used, otherwise no reordering.  Use TYPEMAP to swap
+		   OLD0 and NEW0 so that NEW0 appears as type 0 in the output
+		   instead of OLD0.  TYPEMAP also omits unused types.  */
+		omittype[typecnt] = false;  /* strlen sentinel */
+		old0 = strlen(omittype);
+		new0 = -1;
+		for (i = 0; i < timecnt; i++)
+		  if (!omittype[types[i]]) {
+		    if (! isdsts[types[i]]) {
+		      new0 = types[i];
+		      break;
+		    }
+		    if (new0 < 0)
+		      new0 = types[i];
+		  }
+		if (new0 < 0)
+		  new0 = old0;
+		swaptypes(old0, new0);
+
 #ifndef LEAVE_SOME_PRE_2011_SYSTEMS_IN_THE_LURCH
 		/*
 		** For some pre-2011 systems: if the last-to-be-written
@@ -1965,8 +1998,8 @@ writezone(const char *const name, const char *const string, char version)
 				if (isdsts[types[i]])
 					mrudst = types[i];
 				else	mrustd = types[i];
-			for (i = 0; i < typecnt; ++i)
-				if (writetype[i]) {
+			for (i = old0; i < typecnt; i++)
+				if (!omittype[i]) {
 					if (isdsts[i])
 						hidst = i;
 					else	histd = i;
@@ -1980,7 +2013,7 @@ writezone(const char *const name, const char *const string, char version)
 						ttisstds[mrudst],
 						ttisgmts[mrudst]);
 					isdsts[mrudst] = 1;
-					writetype[type] = true;
+					omittype[type] = false;
 			}
 			if (histd >= 0 && mrustd >= 0 && histd != mrustd &&
 				gmtoffs[histd] != gmtoffs[mrustd]) {
@@ -1991,20 +2024,23 @@ writezone(const char *const name, const char *const string, char version)
 						ttisstds[mrustd],
 						ttisgmts[mrustd]);
 					isdsts[mrustd] = 0;
-					writetype[type] = true;
+					omittype[type] = false;
 			}
 		}
 #endif /* !defined LEAVE_SOME_PRE_2011_SYSTEMS_IN_THE_LURCH */
 		thistypecnt = 0;
-		for (i = 0; i < typecnt; ++i)
-			typemap[i] = writetype[i] ?  thistypecnt++ : -1;
+		for (i = old0; i < typecnt; i++)
+		  if (!omittype[i])
+		    typemap[i == old0 ? new0 : i == new0 ? old0 : i]
+		      = thistypecnt++;
+
 		for (i = 0; i < sizeof indmap / sizeof indmap[0]; ++i)
 			indmap[i] = -1;
 		thischarcnt = 0;
-		for (i = 0; i < typecnt; ++i) {
+		for (i = old0; i < typecnt; i++) {
 			register char *	thisabbr;
 
-			if (!writetype[i])
+			if (omittype[i])
 				continue;
 			if (indmap[abbrinds[i]] >= 0)
 				continue;
@@ -2053,8 +2089,8 @@ writezone(const char *const name, const char *const string, char version)
 			uc = typemap[types[i]];
 			fwrite(&uc, sizeof uc, 1, fp);
 		}
-		for (i = 0; i < typecnt; ++i)
-			if (writetype[i]) {
+		for (i = old0; i < typecnt; i++)
+			if (!omittype[i]) {
 				puttzcode(gmtoffs[i], fp);
 				putc(isdsts[i], fp);
 				putc((unsigned char) indmap[abbrinds[i]], fp);
@@ -2087,12 +2123,13 @@ writezone(const char *const name, const char *const string, char version)
 			else	puttzcode64(todo, fp);
 			puttzcode(corr[i], fp);
 		}
-		for (i = 0; i < typecnt; ++i)
-			if (writetype[i])
+		for (i = old0; i < typecnt; i++)
+			if (!omittype[i])
 				putc(ttisstds[i], fp);
-		for (i = 0; i < typecnt; ++i)
-			if (writetype[i])
+		for (i = old0; i < typecnt; i++)
+			if (!omittype[i])
 				putc(ttisgmts[i], fp);
+		swaptypes(old0, new0);
 	}
 	fprintf(fp, "\n%s\n", string);
 	close_file(fp, directory, name);
-- 
2.17.1



More information about the tz mailing list