[tz] [PROPOSED] Use %z in vanguard form

Paul Eggert eggert at cs.ucla.edu
Fri Jul 22 15:25:16 UTC 2022


* europe (Atlantic/Azores, Atlantic/Madeira):
Add comments containing a vanguard equivalent.
* ziguard.awk (get_minutes, offset_abbr): New functions.
Support the special case for the Azores and Madeira.
Convert to and from %z form.
---
 NEWS        |  9 ++++++
 europe      | 12 ++++++--
 ziguard.awk | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index 4904819..33261ee 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,7 @@ Unreleased, experimental changes
     Iran no longer observes DST after 2022.
     Rename Europe/Kiev to Europe/Kyiv.
     New zic -R option
+    Vanguard form now uses %z.
 
   Changes to future timestamps
 
@@ -61,6 +62,14 @@ Unreleased, experimental changes
 
   Changes to build procedure
 
+    Source data in vanguard form now uses the %z notation, introduced
+    in release 2015f.  For example, for America/Sao_Paulo vanguard
+    form contains the zone continuation line "-3:00 Brazil %z", which
+    is simpler and more reliable than the line "-3:00 Brazil -03/-02"
+    used in main and rearguard forms.  The plan is for the main form
+    to use %z eventually; in the meantime maintainers of zi parsers
+    are encouraged to test the parsers on vanguard.zi.
+
     'make install' now defaults LOCALTIME to Factory not GMT,
     which means the default abbreviation is now "-00" not "GMT".
 
diff --git a/europe b/europe
index 0c008b4..776504d 100644
--- a/europe
+++ b/europe
@@ -2399,9 +2399,13 @@ Zone	Europe/Lisbon	-0:36:45 -	LMT	1884
 			 0:00	W-Eur	WE%sT	1992 Sep 27  1:00s
 			 1:00	EU	CE%sT	1996 Mar 31  1:00u
 			 0:00	EU	WE%sT
-# This Zone can be simplified once we assume zic %z.
 Zone Atlantic/Azores	-1:42:40 -	LMT	1884        # Ponta Delgada
 			-1:54:32 -	HMT	1912 Jan  1  2:00u # Horta MT
+# Vanguard section, for zic and other parsers that support %z.
+#			-2:00	Port	%z	1966 Apr  3  2:00
+#			-1:00	Port	%z	1983 Sep 25  1:00s
+#			-1:00	W-Eur	%z	1992 Sep 27  1:00s
+# Rearguard section, for parsers lacking %z; see ziguard.awk.
 			-2:00	Port	-02/-01	1942 Apr 25 22:00s
 			-2:00	Port	+00	1942 Aug 15 22:00s
 			-2:00	Port	-02/-01	1943 Apr 17 22:00s
@@ -2413,11 +2417,14 @@ Zone Atlantic/Azores	-1:42:40 -	LMT	1884        # Ponta Delgada
 			-2:00	Port	-02/-01	1966 Apr  3  2:00
 			-1:00	Port	-01/+00	1983 Sep 25  1:00s
 			-1:00	W-Eur	-01/+00	1992 Sep 27  1:00s
+# End of rearguard section.
 			 0:00	EU	WE%sT	1993 Mar 28  1:00u
 			-1:00	EU	-01/+00
-# This Zone can be simplified once we assume zic %z.
 Zone Atlantic/Madeira	-1:07:36 -	LMT	1884        # Funchal
 			-1:07:36 -	FMT	1912 Jan  1  1:00u # Funchal MT
+# Vanguard section, for zic and other parsers that support %z.
+#			-1:00	Port	%z
+# Rearguard section, for parsers lacking %z; see ziguard.awk.
 			-1:00	Port	-01/+00	1942 Apr 25 22:00s
 			-1:00	Port	+01	1942 Aug 15 22:00s
 			-1:00	Port	-01/+00	1943 Apr 17 22:00s
@@ -2427,6 +2434,7 @@ Zone Atlantic/Madeira	-1:07:36 -	LMT	1884        # Funchal
 			-1:00	Port	-01/+00	1945 Apr 21 22:00s
 			-1:00	Port	+01	1945 Aug 25 22:00s
 			-1:00	Port	-01/+00	1966 Apr  3  2:00
+# End of rearguard section.
 			 0:00	Port	WE%sT	1983 Sep 25  1:00s
 			 0:00	EU	WE%sT
 
diff --git a/ziguard.awk b/ziguard.awk
index 25de617..91f6896 100644
--- a/ziguard.awk
+++ b/ziguard.awk
@@ -23,6 +23,33 @@
 # of the input data as best it can within the constraints of the
 # rearguard format.
 
+# Given a FIELD like "-0:30", return a minute count like -30.
+function get_minutes(field, \
+		     sign, hours, minutes)
+{
+  sign = field ~ /^-/ ? -1 : 1
+  hours = +field
+  if (field ~ /:/) {
+    minutes = field
+    sub(/[^:]*:/, "", minutes)
+  }
+  return 60 * hours + sign * minutes
+}
+
+# Given an OFFSET, which is a minute count like 300 or 330,
+# return a %z-style abbreviation like "+05" or "+0530".
+function offset_abbr(offset, \
+		     hours, minutes, sign)
+{
+  hours = int(offset / 60)
+  minutes = offset % 60
+  if (minutes) {
+    return sprintf("%+.4d", hours * 100 + minutes);
+  } else {
+    return sprintf("%+.2d", hours)
+  }
+}
+
 BEGIN {
   dataform_type["vanguard"] = 1
   dataform_type["main"] = 1
@@ -85,6 +112,16 @@ DATAFORM != "main" {
     }
   }
 
+  # If this line should differ due to Portugal benefiting from %z if supported,
+  # uncomment the desired version and comment out the undesired one.
+  if (/^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
+    if (/%z/ == (DATAFORM == "vanguard")) {
+      uncomment = in_comment
+    } else {
+      comment_out = !in_comment
+    }
+  }
+
   if (uncomment) {
     sub(/^#/, "")
   }
@@ -92,6 +129,52 @@ DATAFORM != "main" {
     sub(/^/, "#")
   }
 
+  # Prefer %z in vanguard form, explicit abbreviations otherwise.
+  if (DATAFORM == "vanguard") {
+    sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
+	"&CHANGE-TO-%z")
+    sub(/-00CHANGE-TO-%z/, "-00")
+    sub(/[-+][^\t ]+CHANGE-TO-/, "")
+  } else {
+    if (/^[^#]*%z/) {
+      stdoff_column = 2 * /^Zone/ + 1
+      rules_column = stdoff_column + 1
+      stdoff = get_minutes($stdoff_column)
+      rules = $rules_column
+      stdabbr = offset_abbr(stdoff)
+      if (rules == "-") {
+	abbr = stdabbr
+      } else {
+	dstabbr_only = rules ~ /^[+0-9-]/
+	if (dstabbr_only) {
+	  dstoff = get_minutes(rules)
+	} else {
+	  # The DST offset is normally an hour, but there are special cases.
+	  if (rules == "Morocco" && NF == 3) {
+	    dstoff = -60
+	  } else if (rules == "NBorneo") {
+	    dstoff = 20
+	  } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
+		     || (rules == "Uruguay" \
+			 && /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
+	    dstoff = 30
+	  } else if (rules == "Uruguay" && /[\t ]1974 Mar 10$/) {
+	    dstoff = 90
+	  } else {
+	    dstoff = 60
+	  }
+	}
+	dstabbr = offset_abbr(stdoff + dstoff)
+	if (dstabbr_only) {
+	  abbr = dstabbr
+	} else {
+	  abbr = stdabbr "/" dstabbr
+	}
+      }
+      sub(/%z/, abbr)
+    }
+  }
+
   if (DATAFORM == "rearguard") {
 
     # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
-- 
2.36.1



More information about the tz mailing list