[tz] [PROPOSED 3/3] New zic -b option to shrink data and test y2038

Paul Eggert eggert at cs.ucla.edu
Fri Jun 14 00:01:39 UTC 2019


* Makefile (ZFLAGS): Mention -b in comment.
* NEWS: Mention this.
* zic.c (usage): Mention -b.
(bloat): New static var.
(want_bloat): New static function.
(main): Parse new -b option.
(writezone): In slim output, suppress QTBUG-53071 workaround,
pre-2011 bug workaround, all-zero isstdcnt and isutcnt indicators,
and 32-bit data (except for header and the single type that RFC
8536 requires).
(outzone): In slim output, suppress redundant transitions back to
1900 or forward through 2038, and suppress redundant transitions
just before the TZ string takes over.
* zic.8: Document -b.  Sort options alphabetically.
---
 Makefile | 12 ++++++--
 NEWS     | 14 +++++++++
 zic.8    | 47 ++++++++++++++++++++++++------
 zic.c    | 89 ++++++++++++++++++++++++++++++++++++++++++++------------
 4 files changed, 133 insertions(+), 29 deletions(-)

diff --git a/Makefile b/Makefile
index 25f1d35..35db07d 100644
--- a/Makefile
+++ b/Makefile
@@ -362,9 +362,17 @@ LEAPSECONDS=
 zic=		./zic
 ZIC=		$(zic) $(ZFLAGS)
 
-# To shrink the size of installed TZif files,
+# Append "-b fat" to install larger TZif files that work around
+# incompatiblities and bugs in some TZif readers, notably readers that
+# mishandle 64-bit data in TZif files.  Append "-b slim" to install
+# smaller TZif files that test for these year-2038 bugs.  If no -b
+# option is given, the current default is "-b fat", but this is
+# intended to change as buggy readers often mishandle timestamps
+# after 2038 anyway.
+#
+# To shrink the size of installed TZif files even further,
 # append "-r @N" to omit data before N-seconds-after-the-Epoch.
-# See the zic man page for more about -r.
+# See the zic man page for more about -b and -r.
 ZFLAGS=
 
 # How to use zic to install TZif files.
diff --git a/NEWS b/NEWS
index 56bbc7f..0e4c688 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,20 @@ Unreleased, experimental changes
 
   Changes to code
 
+    zic's new -b option supports a way to control data bloat and to
+    test for year-2038 bugs in software that reads TZif files.
+    'zic -b fat' and 'zic -b slim' generate larger and smaller output;
+    for example, changing from fat to slim shrinks the Europe/London
+    file from 3648 to 1625 bytes, saving about 55%.  Fat and slim
+    files represent the same time data and use the same TZif format as
+    documented in tzfile(5) and in Internet RFC 8536.  Fat format
+    attempts to work around bugs or incompatibilities on older
+    software that reads TZif files, notably software that mishandles
+    64-bit TZif data.  Slim format is more efficient and tests for
+    these bugs.  Currently zic defaults to fat format, although this
+    is intended to change in future zic versions, as the buggy
+    software typically mishandles post-2038 timestamps anyway.
+
     zic no longer treats a set of rules ending in 2037 specially.
     Previously, zic assumed that such a ruleset meant that future
     timestamps could not be predicted, and therefore omitted a
diff --git a/zic.8 b/zic.8
index 89ea2d9..d02994f 100644
--- a/zic.8
+++ b/zic.8
@@ -45,6 +45,32 @@ Output version information and exit.
 .B \*-\*-help
 Output short usage message and exit.
 .TP
+.BI "\*-b " bloat
+Output backward-compatibility data as specified by
+.IR bloat .
+If
+.I bloat
+is
+.BR fat ,
+generate additional data entries that work around potential bugs or
+incompatibilities in older software, such as software that mishandles
+the 64-bit generated data.
+If
+.I bloat
+is
+.BR slim ,
+keep the output files small; this can help check for the bugs
+and incompatibilities.
+Although the default is currently
+.BR fat ,
+this is intended to change in future
+.B zic
+versions, as software that mishandles the 64-bit data typically
+mishandles timestamps after the year 2038 anyway.
+Also see the
+.B \*-r
+option for another way to shrink output size.
+.TP
 .BI "\*-d " directory
 Create time conversion information files in the named directory rather than
 in the standard directory named below.
@@ -59,6 +85,11 @@ will act as if the input contained a link line of the form
 .ti +.5i
 Link	\fItimezone\fP		localtime
 .TP
+.BI "\*-L " leapsecondfilename
+Read leap second information from the file with the given name.
+If this option is not used,
+no leap second information appears in output files.
+.TP
 .BI "\*-p " timezone
 Use
 .IR timezone 's
@@ -70,15 +101,6 @@ will act as if the input contained a link line of the form
 .ti +.5i
 Link	\fItimezone\fP		posixrules
 .TP
-.BI "\*-t " file
-When creating local time information, put the configuration link in
-the named file rather than in the standard location.
-.TP
-.BI "\*-L " leapsecondfilename
-Read leap second information from the file with the given name.
-If this option is not used,
-no leap second information appears in output files.
-.TP
 .BR "\*-r " "[\fB@\fP\fIlo\fP][\fB/@\fP\fIhi\fP]"
 Reduce the size of output files by limiting their applicability
 to timestamps in the range from
@@ -102,6 +124,13 @@ On platforms with GNU
 .BR date ,
 .q "zic \-r @$(date +%s)"
 omits data intended for past timestamps.
+Also see the
+.B "\*-b slim"
+option for another way to shrink output size.
+.TP
+.BI "\*-t " file
+When creating local time information, put the configuration link in
+the named file rather than in the standard location.
 .TP
 .B \*-v
 Be more verbose, and complain about the following situations:
diff --git a/zic.c b/zic.c
index d43cc0d..34228a5 100644
--- a/zic.c
+++ b/zic.c
@@ -574,8 +574,9 @@ usage(FILE *stream, int status)
 {
   fprintf(stream,
 	  _("%s: usage is %s [ --version ] [ --help ] [ -v ] \\\n"
-	    "\t[ -l localtime ] [ -p posixrules ] [ -d directory ] \\\n"
-	    "\t[ -t localtime-link ] [ -L leapseconds ] [ -r '[@lo][/@hi]' ] \\\n"
+	    "\t[ -b {slim|fat} ] [ -d directory ] [ -l localtime ]"
+	    " [ -L leapseconds ] \\\n"
+	    "\t[ -p posixrules ] [ -r '[@lo][/@hi]' ] [ -t localtime-link ] \\\n"
 	    "\t[ filename ... ]\n\n"
 	    "Report bugs to %s.\n"),
 	  progname, progname, REPORT_BUGS_TO);
@@ -650,6 +651,17 @@ static const char *	leapsec;
 static const char *	tzdefault;
 static const char *	yitcommand;
 
+/* -1 if the TZif output file should be slim, 0 if default, 1 if the
+   output should be fat for backward compatibility.  Currently the
+   default is fat, although this may change.  */
+static int bloat;
+
+static bool
+want_bloat(void)
+{
+  return 0 <= bloat;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -681,10 +693,22 @@ main(int argc, char **argv)
 		} else if (strcmp(argv[k], "--help") == 0) {
 			usage(stdout, EXIT_SUCCESS);
 		}
-	while ((c = getopt(argc, argv, "d:l:L:p:r:st:vy:")) != EOF && c != -1)
+	while ((c = getopt(argc, argv, "b:d:l:L:p:r:st:vy:")) != EOF && c != -1)
 		switch (c) {
 			default:
 				usage(stderr, EXIT_FAILURE);
+			case 'b':
+				if (strcmp(optarg, "slim") == 0) {
+				  if (0 < bloat)
+				    error(_("incompatible -b options"));
+				  bloat = -1;
+				} else if (strcmp(optarg, "fat") == 0) {
+				  if (bloat < 0)
+				    error(_("incompatible -b options"));
+				  bloat = 1;
+				} else
+				  error(_("invalid option: -b '%s'"), optarg);
+				break;
 			case 'd':
 				if (directory == NULL)
 					directory = optarg;
@@ -1921,7 +1945,7 @@ writezone(const char *const name, const char *const string, char version,
 	   seconds, as the idea is to insert a transition just before
 	   32-bit time_t rolls around, and this occurs at a slightly
 	   different moment if transitions are leap-second corrected.  */
-	if (WORK_AROUND_QTBUG_53071 && timecnt != 0
+	if (WORK_AROUND_QTBUG_53071 && timecnt != 0 && want_bloat()
 	    && ats[timecnt - 1] < y2038_boundary - 1 && strchr(string, '<')) {
 	  ats[timecnt] = y2038_boundary - 1;
 	  types[timecnt] = types[timecnt - 1];
@@ -1970,7 +1994,7 @@ writezone(const char *const name, const char *const string, char version,
 		int old0;
 		char		omittype[TZ_MAX_TYPES];
 		int		typemap[TZ_MAX_TYPES];
-		register int	thistypecnt;
+		int		thistypecnt, stdcnt, utcnt;
 		char		thischars[TZ_MAX_CHARS];
 		int		thischarcnt;
 		bool		toomanytimes;
@@ -2053,7 +2077,7 @@ writezone(const char *const name, const char *const string, char version,
 		** (to help get global "altzone" and "timezone" variables
 		** set correctly).
 		*/
-		{
+		if (want_bloat()) {
 			register int	mrudst, mrustd, hidst, histd, type;
 
 			hidst = histd = mrudst = mrustd = -1;
@@ -2100,12 +2124,16 @@ writezone(const char *const name, const char *const string, char version,
 
 		for (i = 0; i < sizeof indmap / sizeof indmap[0]; ++i)
 			indmap[i] = -1;
-		thischarcnt = 0;
+		thischarcnt = stdcnt = utcnt = 0;
 		for (i = old0; i < typecnt; i++) {
 			register char *	thisabbr;
 
 			if (omittype[i])
 				continue;
+			if (ttisstds[i])
+			  stdcnt = thistypecnt;
+			if (ttisgmts[i])
+			  utcnt = thistypecnt;
 			if (indmap[abbrinds[i]] >= 0)
 				continue;
 			thisabbr = &chars[abbrinds[i]];
@@ -2118,12 +2146,18 @@ writezone(const char *const name, const char *const string, char version,
 			}
 			indmap[abbrinds[i]] = j;
 		}
+		if (pass == 1 && !want_bloat()) {
+		  utcnt = stdcnt = thisleapcnt = 0;
+		  thistimecnt = - locut - hicut;
+		  thistypecnt = thischarcnt = 1;
+		  thistimelim = thistimei;
+		}
 #define DO(field)	fwrite(tzh.field, sizeof tzh.field, 1, fp)
 		tzh = tzh0;
 		memcpy(tzh.tzh_magic, TZ_MAGIC, sizeof tzh.tzh_magic);
 		tzh.tzh_version[0] = version;
-		convert(thistypecnt, tzh.tzh_ttisgmtcnt);
-		convert(thistypecnt, tzh.tzh_ttisstdcnt);
+		convert(utcnt, tzh.tzh_ttisgmtcnt);
+		convert(stdcnt, tzh.tzh_ttisstdcnt);
 		convert(thisleapcnt, tzh.tzh_leapcnt);
 		convert(locut + thistimecnt + hicut, tzh.tzh_timecnt);
 		convert(thistypecnt, tzh.tzh_typecnt);
@@ -2138,6 +2172,15 @@ writezone(const char *const name, const char *const string, char version,
 		DO(tzh_typecnt);
 		DO(tzh_charcnt);
 #undef DO
+		if (pass == 1 && !want_bloat()) {
+		  /* Output a minimal data block with just one time type.  */
+		  puttzcode(0, fp);	/* utoff */
+		  putc(0, fp);		/* dst */
+		  putc(0, fp);		/* index of abbreviation */
+		  putc(0, fp);		/* empty-string abbreviation */
+		  continue;
+		}
+
 		/* Output a LO_TIME transition if needed; see limitrange.
 		   But do not go below the minimum representable value
 		   for this pass.  */
@@ -2193,10 +2236,12 @@ writezone(const char *const name, const char *const string, char version,
 			puttzcodepass(todo, fp, pass);
 			puttzcode(corr[i], fp);
 		}
-		for (i = old0; i < typecnt; i++)
+		if (stdcnt != 0)
+		  for (i = old0; i < typecnt; i++)
 			if (!omittype[i])
 				putc(ttisstds[i], fp);
-		for (i = old0; i < typecnt; i++)
+		if (utcnt != 0)
+		  for (i = old0; i < typecnt; i++)
 			if (!omittype[i])
 				putc(ttisgmts[i], fp);
 		swaptypes(old0, thisdefaulttype);
@@ -2643,16 +2688,18 @@ outzone(const struct zone *zpfirst, ptrdiff_t zonecount)
 			max_year = min_year + years_of_observations;
 		}
 	}
-	/*
-	** For the benefit of older systems,
-	** generate data from 1900 through 2038.
-	*/
-	if (min_year > 1900)
-		min_year = 1900;
 	max_year0 = max_year;
-	if (max_year < 2038)
+	if (want_bloat()) {
+	  /* For the benefit of older systems,
+	     generate data from 1900 through 2038.  */
+	  if (min_year > 1900)
+		min_year = 1900;
+	  if (max_year < 2038)
 		max_year = 2038;
+	}
+
 	for (i = 0; i < zonecount; ++i) {
+		struct rule *prevrp = NULL;
 		/*
 		** A guess that may well be corrected later.
 		*/
@@ -2788,6 +2835,11 @@ outzone(const struct zone *zpfirst, ptrdiff_t zonecount)
 				doabbr(ab, zp, rp->r_abbrvar,
 				       rp->r_isdst, rp->r_stdoff, false);
 				offset = oadd(zp->z_gmtoff, rp->r_stdoff);
+				if (!want_bloat() && !useuntil && !do_extend
+				    && prevrp
+				    && rp->r_hiyear == ZIC_MAX
+				    && prevrp->r_hiyear == ZIC_MAX)
+				  break;
 				type = addtype(offset, ab, rp->r_isdst,
 					rp->r_todisstd, rp->r_todisgmt);
 				if (defaulttype < 0 && !rp->r_isdst)
@@ -2797,6 +2849,7 @@ outzone(const struct zone *zpfirst, ptrdiff_t zonecount)
 					  && ktime < attypes[lastatmax].at))
 				  lastatmax = timecnt;
 				addtt(ktime, type);
+				prevrp = rp;
 			}
 		}
 		if (usestart) {
-- 
2.21.0



More information about the tz mailing list