[tz] version information in tz binary file?

Paul Eggert eggert at cs.ucla.edu
Tue Sep 10 20:18:27 UTC 2013


Thanks for those suggestions.  I've pushed this patch to implement
most of them.  I didn't add documentation for future names
as it's not clear to me what they'll be or what we'll need.
And I didn't suggest putting the program-generator version
into the data, as my experience is that this costs more
regression hassles than it's worth.  I dislike compilers
that scribble their names into the object code.  Of course
people are free to add -o options to do that and if there's
sufficient interest we should probably standardize it.

>From fe2db73d5f454db17a1fa4e646a79a0e5e2d9756 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert at cs.ucla.edu>
Date: Tue, 10 Sep 2013 12:49:45 -0700
Subject: [PATCH] Improve tzfile format as suggested by Zefram and Alan
 Barrett.

* tzfile.5: Be a bit more careful about wording for integers.
Use binary terminators for the byte strings, not newlines.
Add a size field.  Terminate by another NUL.  Mention where
future extensions are expected to go.
* zic.c (writezone): Implement this.
(addgenoption): Omit namelen check; no longer needed.
(writevalue): Remove.
---
 tzfile.5 | 73 ++++++++++++++++++++++++++++++++--------------------------------
 zic.c    | 49 +++++++++++++++----------------------------
 2 files changed, 54 insertions(+), 68 deletions(-)

diff --git a/tzfile.5 b/tzfile.5
index d9477eb..edfa475 100644
--- a/tzfile.5
+++ b/tzfile.5
@@ -12,9 +12,8 @@ time zone information files,
 followed by a character identifying the version of the file's format
 (as of 2013, either an ASCII NUL, or '2', or '3')
 followed by fifteen bytes containing zeroes reserved for future use,
-followed by six four-byte values of type
-.BR long ,
-written in a ``standard'' byte order
+followed by six four-byte integer values
+written in a "standard" byte order
 (the high-order byte of the value is written first).
 These values are,
 in order:
@@ -42,18 +41,15 @@ stored in the file.
 .PP
 The above header is followed by
 .I tzh_timecnt
-four-byte values of type
-.BR long ,
-sorted in ascending order.
-These values are written in ``standard'' byte order.
+four-byte signed integer values sorted in ascending order.
+These values are written in "standard" byte order.
 Each is used as a transition time (as returned by
 .IR time (2))
 at which the rules for computing local time change.
 Next come
 .I tzh_timecnt
-one-byte values of type
-.BR "unsigned char" ;
-each one tells which of the different types of ``local time'' types
+one-byte unsigned integer values;
+each one tells which of the different types of "local time" types
 described in the file is associated with the same-indexed transition time.
 These values serve as indices into an array of
 .I ttinfo
@@ -64,19 +60,17 @@ these structures are defined as follows:
 .in +.5i
 .sp
 .nf
-.ta .5i +\w'unsigned int\0\0'u
+.ta .5i +\w'unsigned char\0\0'u
 struct ttinfo {
-	long	tt_gmtoff;
-	int	tt_isdst;
-	unsigned int	tt_abbrind;
+	int32_t	tt_gmtoff;
+	unsigned char	tt_isdst;
+	unsigned char	tt_abbrind;
 };
 .in -.5i
 .fi
 .sp
-Each structure is written as a four-byte value for
-.I tt_gmtoff
-of type
-.BR long ,
+Each structure is written as a four-byte signed integer value for
+.IR tt_gmtoff ,
 in a standard byte order, followed by a one-byte value for
 .I tt_isdst
 and a one-byte value for
@@ -140,36 +134,43 @@ For version-2-format time zone files,
 the above header and data are followed by a second header and data,
 identical in format except that
 eight bytes are used for each transition time or leap second time.
-After the second header and data comes a newline-enclosed,
+After the second header and data,
+and just before the end of the file, comes a newline-enclosed,
 POSIX-TZ-environment-variable-style string for use in handling instants
 after the last transition time stored in the file
 (with nothing between the newlines if there is no POSIX representation for
 such instants).
 .PP
-For version-3-format time zone files, the POSIX-TZ-style string may
-use two minor extensions to the POSIX TZ format, as described in
+Version-3-format time zone files have the following additions:
+.IP
+The POSIX-TZ-style string may use two minor extensions to the
+POSIX TZ format, as described in
 .IR newtzset (3).
 First, the hours part of its transition times may be signed and range from
 \(mi167 through 167 instead of the POSIX-required unsigned values
 from 0 through 24.  Second, DST is in effect all year if it starts
 January 1 at 00:00 and ends December 31 at 24:00 plus the difference
 between daylight saving and standard time.
+.IP
+The newline-enclosed POSIX-TZ-style string is preceded by a section
+containing auxiliary meta-information that is not needed to process
+time stamps.  This section consists of another copy of the
+newline-enclosed POSIX-TZ-style string (this is for the benefit of
+version-2-only clients), followed by a four-byte integer size value,
+followed by zero or more NUL-terminated byte strings, followed by an
+additional NUL.  The size value is the total number of bytes in all
+the byte strings, including the trailing NULs at the end of the
+strings, but not including the additional NUL.  Each byte string
+consists of a name-value pair separated by "=".  Names consist of
+ASCII letters, digits and underscores, and start with a letter;
+duplicate names are not allowed.  Two common names are "name", the
+Zone name for the data, and "version", the data's version number.
+Values can contain any bytes except NUL.
 .PP
-Also, for version-3-format time zone files, the version-2 header and
-data are optionally followed by a section containing auxiliary
-meta-information that is not needed to process time stamps.  This
-section, if present, consists of the four magic bytes "=TZ\en",
-followed by zero or more newline-terminated byte strings, followed by
-another copy of the newline-enclosed POSIX-TZ-style string (this last
-is for the benefit of any older clients that look for the TZ string at
-the very end of the file).  Each newline-terminated byte string
-consists of a name-value pair separated by "=" and terminated by
-newline.  Names consist of ASCII letters, digits and underscores, and
-start with a letter; duplicate names are not allowed.  Two common
-names are "name", the Zone name for the data, and "version", the
-version number.  Values consist of any bytes except NUL, newline, and
-backslash; however, newline and backslash can represented via the
-two-byte strings "\en" and "\e\e" respectively.
+Future additions to the format may insert more data just before the
+newline-enclosed POSIX-TZ-style string at the end of the file, so
+clients should not assume that this string immediately follows
+the auxiliary meta-information.
 .SH SEE ALSO
 newctime(3), newtzset(3)
 .\" This file is in the public domain, so clarified as of
diff --git a/zic.c b/zic.c
index 17b9e0e..e59a15f 100644
--- a/zic.c
+++ b/zic.c
@@ -463,13 +463,11 @@ addgenoption(char const *option)
 		if (! (isascii (*o) && (isalnum(*o) || *o == '_')))
 			return 0;
 	namelen = o - option;
-	if (INT_MAX < namelen)
-		return 0; /* fprintf won't work.  */
 	if (namelen == sizeof "name" - 1
 	    && memcmp(option, "name", namelen) == 0)
 		return 0;
 	for (i = 0; i < genoptions; i++)
-		if (strncmp(genoption[i], option, namelen  + 1) == 0)
+		if (strncmp(genoption[i], option, namelen + 1) == 0)
 			return 0;
 	genoption = erealloc(genoption, (genoptions + 1) * sizeof *genoption);
 	genoption[genoptions++] = option;
@@ -1427,22 +1425,6 @@ is32(const zic_t x)
 }
 
 static void
-writevalue(FILE *fp, char const *v)
-{
-	fputc('=', fp);
-
-	for (; *v; v++)
-		if (*v == '\n')
-			fprintf(fp, "\\n");
-		else if (*v == '\\')
-			fprintf(fp, "\\\\");
-		else
-			fputc(*v, fp);
-
-	fputc('\n', fp);
-}
-
-static void
 writezone(const char *const name, const char *const string)
 {
 	register FILE *			fp;
@@ -1450,6 +1432,7 @@ writezone(const char *const name, const char *const string)
 	register int			leapcnt32, leapi32;
 	register int			timecnt32, timei32;
 	register int			pass;
+	register int_fast32_t		genlen;
 	static char *			fullname;
 	static const struct tzhead	tzh0;
 	static struct tzhead		tzh;
@@ -1765,20 +1748,22 @@ writezone(const char *const name, const char *const string)
 				(void) putc(ttisgmts[i], fp);
 	}
 	(void) fprintf(fp, "\n%s\n", string);
-	if (genname || genoptions) {
-		fprintf(fp, "=TZ\n");
-		if (genname) {
-			fprintf(fp, "name");
-			writevalue(fp, name);
-		}
-		for (i = 0; i < genoptions; i++) {
-			register char const *v = genoption[i];
-			register int namelen = strchr(v, '=') - v;
-			fprintf(fp, "%.*s", namelen, v);
-			writevalue(fp, v + namelen + 1);
-		}
-		fprintf(fp, "\n%s\n", string);
+
+	genlen = 0;
+	if (genname)
+		genlen += sizeof "name=" + strlen (name);
+	for (i = 0; i < genoptions; i++)
+		genlen += strlen (genoption[i]) + 1;
+	puttzcode(genlen, fp);
+
+	if (genname)
+		fprintf(fp, "name=%s%c", name, 0);
+	for (i = 0; i < genoptions; i++) {
+		register char const *v = genoption[i];
+		register int namelen = strchr(v, '=') - v;
+		fprintf(fp, "%s%c", v, 0);
 	}
+	fprintf(fp, "%c\n%s\n", 0, string);
 	if (ferror(fp) || fclose(fp)) {
 		(void) fprintf(stderr, _("%s: Error writing %s\n"),
 			progname, fullname);
-- 
1.8.1.2




More information about the tz mailing list