From 15048fede07edc0eb0f7f66d5821f1d74f3b6008 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Sun, 21 Aug 2016 14:24:16 -0700
Subject: [PROPOSED PATCH 3/3] zdump -i: use colons and tabs in output

Based on comments by Jon Skeet and Tim Parenti in thread starting at:
http://mm.icann.org/pipermail/tz/2016-June/023763.html
* NEWS: Add thank-yous.
* zdump.8 (INTERVAL FORMAT): Columns are now tab-separated, not
space separated.  UT offsets use colons and no longer have a
special case for outlandish offsets.  An empty line now precedes
the first TZ line too.
* zdump.c (main): Output an empty line before the first TZ= line, too.
Use tabs for separators, not spaces.
(format_utc_offset): Separate hh mm ss fields with colons.
(istrftime): Remove %o; no longer needed.
%Q now outputs %o at start, and uses tab separators.
---
 NEWS    |  2 ++
 zdump.8 | 60 +++++++++++++++++++++++++++++++-----------------------------
 zdump.c | 60 ++++++++++++++++++++++++++++++------------------------------
 3 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/NEWS b/NEWS
index 8f7d98d..3d01581 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,8 @@ Unreleased, experimental changes
     zdump has a new -i option to generate transitions in a
     more-compact but still human-readable format.  This option is
     experimental, and the output format may change in future versions.
+    (Thanks to Jon Skeet for suggesting that an option was needed,
+    and thanks to Tim Parenti and Chris Rovick for further comments.)
 
   Changes affecting documentation and commentary
 
diff --git a/zdump.8 b/zdump.8
index 16ec454..14ca60b 100644
--- a/zdump.8
+++ b/zdump.8
@@ -94,7 +94,8 @@ the cutoff's lower bound is exclusive and its upper bound is inclusive.
 .I "This format is experimental: it may change in future versions."
 .PP
 The interval format is a compact text representation that is intended
-to be both human- and machine-readable.  It consists of a first line
+to be both human- and machine-readable.  It consists of an empty line,
+then a line
 .q "TZ=\fIstring\fP"
 where
 .I string
@@ -104,7 +105,7 @@ describing the time interval before the first transition if any, and
 zero or more following lines
 .q "\fIdate time interval\fP",
 one line for each transition time and following interval.  Fields are
-separated by single spaces.
+separated by single tabs.
 .PP
 Dates are in
 .IR yyyy - mm - dd
@@ -114,7 +115,7 @@ format where
 .IR hh <24.
 Times are in local time immediately after the transition.  A
 time interval description consists of a UT offset in signed
-.RI \(+- hhmmss
+.RI \(+- hh : mm : ss
 format, a time zone abbreviation, and an isdst flag.  An abbreviation
 that equals the UT offset is omitted; other abbreviations are
 double-quoted strings unless they consist of one or more alphabetic
@@ -122,12 +123,12 @@ characters.  An isdst flag is omitted for standard time, and otherwise
 is a decimal integer that is unsigned and positive (typically 1) for
 daylight saving time and negative for unknown.
 .PP
-In times and in UT offsets with absolute value less than 100 hours,
-the seconds are omitted if they are zero, and the minutes are also
-omitted if they are also zero.  Positive UT offsets are east of
-Greenwich.  The UT offset \*-00 denotes a UT placeholder in areas
-where the actual offset is unspecified; by convention, this occurs
-when the UT offset is zero and the time zone abbreviation begins with
+In times and UT offsets, the seconds are omitted if they are zero, and
+the minutes are also omitted if they are also zero.  Positive UT
+offsets are east of Greenwich.  The UT offset \*-00 denotes a UT
+placeholder in areas where the actual offset is unspecified; by
+convention, this occurs when the UT offset is zero and the time zone
+abbreviation begins with
 .q "\*-"
 or is
 .q "zzz".
@@ -140,20 +141,24 @@ programming language.  E.g., the double-quoted string
 "\e\*(rq.\""
 .PP
 .ne 9
-Here is an example:
+Here is an example of the output, with the leading empty line omitted.
+(This example is shown with tab stops set far enough apart so that the
+tabbed columns line up.)
 .nf
 .sp
 .if \n(.g .ft CW
 .if t .in +.5i
 .if n .in +2
+.nr w \w'1896-01-13 'u
+.ta \nwu +\nwu +\nwu +\nwu
 TZ="Pacific/Honolulu"
-- - -103126 LMT
-1896-01-13 12:01:26 -1030 HST
-1933-04-30 03 -0930 HDT 1
-1933-05-21 11 -1030 HST
-1942-02-09 03 -0930 HDT 1
-1945-09-30 01 -1030 HST
-1947-06-08 02:30 -10 HST
+-	-	-10:31:26	LMT
+1896-01-13	12:01:26	-10:30	HST
+1933-04-30	03	-09:30	HDT	1
+1933-05-21	11	-10:30	HST
+1942-02-09	03	-09:30	HDT	1
+1945-09-30	01	-10:30	HST
+1947-06-08	02:30	-10	HST
 .in
 .if \n(.g .ft
 .sp
@@ -177,24 +182,21 @@ Here are excerpts from another example:
 .if t .in +.5i
 .if n .in +2
 TZ="Europe/Astrakhan"
-- - +031212 LMT
-1924-04-30 23:47:48 +03
-1930-06-21 01 +04
-1981-04-01 01 +05 1
-1981-09-30 23 +04
+-	-	+03:12:12	LMT
+1924-04-30	23:47:48	+03
+1930-06-21	01	+04
+1981-04-01	01	+05		1
+1981-09-30	23	+04
 \&...
-2014-10-26 01 +03
-2016-03-27 03 +04
+2014-10-26	01	+03
+2016-03-27	03	+04
 .in
 .if \n(.g .ft
 .sp
 .fi
 This time zone is east of UT, so its UT offsets are positive.  Also,
-many of its time zone abbreviations omitted since they duplicate the
-text of the UT offset.
-.PP
-If multiple zones are present, their representations are separated
-by empty lines.
+many of its time zone abbreviations are omitted since they duplicate
+the text of the UT offset.
 .SH LIMITATIONS
 Time discontinuities are found by sampling the results returned by localtime
 at twelve-hour intervals.
diff --git a/zdump.c b/zdump.c
index f420896..694b04f 100644
--- a/zdump.c
+++ b/zdump.c
@@ -739,8 +739,8 @@ main(int argc, char *argv[])
 		if (tm_ok) {
 		  ab = saveabbr(&abbrev, &abbrevsize, &tm);
 		  if (iflag) {
-		    showtrans(&"\nTZ=%f"[i == optind], &tm, t, ab, argv[i]);
-		    showtrans("- - %o%Q", &tm, t, ab, argv[i]);
+		    showtrans("\nTZ=%f", &tm, t, ab, argv[i]);
+		    showtrans("-\t-\t%Q", &tm, t, ab, argv[i]);
 		  }
 		}
 		while (t < cuthitime) {
@@ -759,7 +759,7 @@ main(int argc, char *argv[])
 		    newtmp = localtime_rz(tz, &newt, &newtm);
 		    newtm_ok = newtmp != NULL;
 		    if (iflag)
-		      showtrans("%Y-%m-%d %L %o%Q", newtmp, newt,
+		      showtrans("%Y-%m-%d\t%L\t%Q", newtmp, newt,
 				newtm_ok ? abbr(&newtm) : NULL, argv[i]);
 		    else {
 		      show(tz, argv[i], newt - 1, true);
@@ -992,15 +992,14 @@ format_local_time(char *buf, size_t size, struct tm const *tm)
 
 /* Store into BUF, of size SIZE, a formatted UTC offset for the
    localtime *TM corresponding to time T.  Use ISO 8601 format
-   +HHMMSS, or -HHMMSS for time stamps west of Greenwich; if the time
-   stamp represents an unknown UTC offset, use the format -00.  If the
-   hour needs more than two digits to represent, HH contains three or
-   more digits.  Otherwise, omit SS if SS is zero, and omit MM too if
-   MM is also zero.
-
-   Return the length of the resulting string.  If the string does not
-   fit, return the length that the string would have been if it had
-   fit; do not overrun the output buffer.  */
+   +HH:MM:SS, or -HH:MM:SS for time stamps west of Greenwich.  Omit
+   :SS if :SS is zero, and omit :MM too if :MM is also zero.  If the
+   time stamp represents an unknown UTC offset, use the format -00.
+
+   Return the length of the resulting string, or -1 if the result is
+   not representable as a string.  If the string does not fit, return
+   the length that the string would have been if it had fit; do not
+   overrun the output buffer.  */
 static int
 format_utc_offset(char *buf, size_t size, struct tm const *tm, time_t t)
 {
@@ -1020,10 +1019,10 @@ format_utc_offset(char *buf, size_t size, struct tm const *tm, time_t t)
   ss = off % 60;
   mm = off / 60 % 60;
   hh = off / 60 / 60;
-  return (ss || 100 <= hh
-	  ? snprintf(buf, size, "%c%02ld%02d%02d", sign, hh, mm, ss)
+  return (ss
+	  ? snprintf(buf, size, "%c%02ld:%02d:%02d", sign, hh, mm, ss)
 	  : mm
-	  ? snprintf(buf, size, "%c%02ld%02d", sign, hh, mm)
+	  ? snprintf(buf, size, "%c%02ld:%02d", sign, hh, mm)
 	  : snprintf(buf, size, "%c%02ld", sign, hh));
 }
 
@@ -1067,10 +1066,10 @@ format_quoted_string(char *buf, size_t size, char const *p)
 
    %f zone name
    %L local time as per format_local_time
-   %o UTC offset as for format_utc_offset
-   %Q like " %Z D" where D is the isdst flag; except omit " D" if zero,
-      omit " %Z" if %Z=%o, and quote and escape %Z if it contains
-      nonalphabetics.  */
+   %Q like "U\t%Z\tD" where U is the UTC offset as for format_utc_offset
+      and D is the isdst flag; except omit D if it is zero, omit %Z if
+      it equals U, quote and escape %Z if it contains nonalphabetics,
+      and omit any trailing tabs.  */
 
 static bool
 istrftime(char *buf, size_t size, char const *time_fmt,
@@ -1085,8 +1084,7 @@ istrftime(char *buf, size_t size, char const *time_fmt,
       p++;
     else if (!*p
 	     || (*p == '%'
-		 && (p[1] == 'f' || p[1] == 'L'
-		     || p[1] == 'o' || p[1] == 'Q'))) {
+		 && (p[1] == 'f' || p[1] == 'L' || p[1] == 'Q'))) {
       size_t formatted_len;
       size_t f_prefix_len = p - f;
       size_t f_prefix_copy_size = p - f + 2;
@@ -1111,19 +1109,20 @@ istrftime(char *buf, size_t size, char const *time_fmt,
       case 'L':
 	formatted_len = format_local_time(b, s, tm);
 	break;
-      case 'o':
-	formatted_len = format_utc_offset(b, s, tm, t);
-	break;
       case 'Q':
 	{
-	  char offbuf[INT_STRLEN_MAXIMUM(long) + sizeof "+mmss"];
-	  format_utc_offset(offbuf, sizeof offbuf, tm, t);
-	  if (strcmp(offbuf, ab) != 0) {
+	  bool show_abbr;
+	  int offlen = format_utc_offset(b, s, tm, t);
+	  if (! (0 <= offlen && offlen < s))
+	    return false;
+	  show_abbr = strcmp(b, ab) != 0;
+	  b += offlen, s -= offlen;
+	  if (show_abbr) {
 	    char const *abp;
 	    size_t len;
 	    if (s <= 1)
 	      return false;
-	    *b++ = ' ', s--;
+	    *b++ = '\t', s--;
 	    for (abp = ab; is_alpha(*abp); abp++)
 	      continue;
 	    len = (!*abp && *ab
@@ -1133,8 +1132,9 @@ istrftime(char *buf, size_t size, char const *time_fmt,
 	      return false;
 	    b += len, s -= len;
 	  }
-	  formatted_len
-	    = tm->tm_isdst ? snprintf(b, s, " %d", tm->tm_isdst) : 0;
+	  formatted_len = (tm->tm_isdst
+			   ? snprintf(b, s, &"\t\t%d"[show_abbr], tm->tm_isdst)
+			   : 0);
 	}
 	break;
       }
-- 
2.5.5