[tz] [PROPOSED] Generalize data format upgrade procedure
Paul Eggert
eggert at cs.ucla.edu
Mon Feb 5 05:27:19 UTC 2018
* Makefile (XDST): Remove, replacing with the more-general ...
(DATAFORM): ... new macro. All uses changed.
(fulldata.zi, pdstdata.zi): Remove, replacing with the more-general ...
(vanguard.zi, main.zi, rearguard.zi): ... new targets.
All uses changed.
(check_zishrink): Fix bug that caused this test to fail when
PACKRATDATA was nonempty and testing vanguard or rearguard data.
* NEWS, europe: Mention changes.
* ziguard.awk: Handle fractional seconds as well as negative DST offsets.
Rename from zidst.awk, since it now handles issues other than just DST.
All uses changed.
---
Makefile | 69 ++++++++++++++++++-----------------------------
NEWS | 64 ++++++++++++++++++++++++++------------------
europe | 29 +++++++++-----------
zidst.awk | 50 ----------------------------------
ziguard.awk | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 166 insertions(+), 135 deletions(-)
delete mode 100644 zidst.awk
create mode 100644 ziguard.awk
diff --git a/Makefile b/Makefile
index 92ddb80..659a5b0 100644
--- a/Makefile
+++ b/Makefile
@@ -10,25 +10,14 @@ VERSION= unknown
# Email address for bug reports.
BUGEMAIL= tz at iana.org
-# To install the full data, which can contain daylight saving time
-# offsets that are negative (relative to standard time), use
-# XDST= full
-# To install data containing only positive daylight saving time
-# offsets, but otherwise as close to the full data as practical, use
-# XDST= pdst
-XDST= pdst
-# Parsers requiring DST offsets to be positive should use the file
-# pdstdata.zi, which contains almost all the data of 'africa' etc.,
-# except with positive DST offsets. This works around a problem that
-# was discovered in January 2018 with negative DST in tests for ICU
-# and OpenJDK. See:
-# https://mm.icann.org/pipermail/tz/2018-January/025825.html
-# https://mm.icann.org/pipermail/tz/2018-January/025822.html
-# Currently the 'africa' etc. files use pdst form if comments are
-# ignored, to ease transition for parsers that do not support
-# negative DST offsets. This is intended to change to full form at
-# some point, so that full-featured zi parsers that use the 'africa'
-# files will get the full data without changing anything.
+# Choose source data features. To get new features right away, use:
+# DATAFORM= vanguard
+# To wait a while before using new features, to give downstream users
+# time to upgrade zic (the default), use:
+# DATAFORM= main
+# To wait even longer for new features, use:
+# DATAFORM= rearguard
+DATAFORM= main
# Change the line below for your time zone (after finding the zone you want in
# the time zone files, or adding it to a time zone file).
@@ -483,8 +472,8 @@ TDATA= $(YDATA) $(NDATA) $(BACKWARD)
ZONETABLES= zone1970.tab zone.tab
TABDATA= iso3166.tab $(TZDATA_TEXT) $(ZONETABLES)
LEAP_DEPS= leapseconds.awk leap-seconds.list
-TZDATA_ZI_DEPS= zidst.awk zishrink.awk version $(TDATA) $(PACKRATDATA)
-DSTDATA_ZI_DEPS= zidst.awk $(TDATA) $(PACKRATDATA)
+TZDATA_ZI_DEPS= ziguard.awk zishrink.awk version $(TDATA) $(PACKRATDATA)
+DSTDATA_ZI_DEPS= ziguard.awk $(TDATA) $(PACKRATDATA)
DATA= $(TDATA_TO_CHECK) backzone iso3166.tab leap-seconds.list \
leapseconds yearistype.sh $(ZONETABLES)
AWK_SCRIPTS= checklinks.awk checktab.awk leapseconds.awk zishrink.awk
@@ -522,7 +511,7 @@ VERSION_DEPS= \
SHELL= /bin/sh
all: tzselect yearistype zic zdump libtz.a $(TABDATA) \
- fulldata.zi pdstdata.zi
+ vanguard.zi main.zi rearguard.zi
ALL: all date $(ENCHILADA)
@@ -558,14 +547,14 @@ version: $(VERSION_DEPS)
mv $@.out $@
# These files can be tailored by setting BACKWARD, PACKRATDATA, etc.
-fulldata.zi pdstdata.zi: $(DSTDATA_ZI_DEPS)
- $(AWK) -v outfile='$@' -f zidst.awk $(TDATA) $(PACKRATDATA) \
+vanguard.zi main.zi rearguard.zi: $(DSTDATA_ZI_DEPS)
+ $(AWK) -v outfile='$@' -f ziguard.awk $(TDATA) $(PACKRATDATA) \
>$@.out
mv $@.out $@
-tzdata.zi: $(XDST)data.zi version
+tzdata.zi: $(DATAFORM).zi version
version=`sed 1q version` && \
LC_ALL=C $(AWK) -v version="$$version" -f zishrink.awk \
- $(XDST)data.zi >$@.out
+ $(DATAFORM).zi >$@.out
mv $@.out $@
version.h: version
@@ -747,31 +736,25 @@ check_tzs: $(TZS) $(TZS_NEW)
check_web: tz-how-to.html
$(VALIDATE_ENV) $(VALIDATE) $(VALIDATE_FLAGS) tz-how-to.html
-# The format of the source files, either full or pdst.
-# Currently they are in pdst format, but this is expected to change.
-SDST = pdst
-
-# Check that zishrink.awk does not alter the data, and that zidst.awk
-# preserves $(SDST) data.
+# Check that zishrink.awk does not alter the data, and that ziguard.awk
+# preserves main-format data.
check_zishrink: zic leapseconds $(PACKRATDATA) $(TDATA) \
- $(XDST)data.zi tzdata.zi
+ $(DATAFORM).zi tzdata.zi
for type in posix right; do \
- mkdir -p time_t.dir/$$type time_t.dir/$$type-$(SDST) \
+ mkdir -p time_t.dir/$$type time_t.dir/$$type-t \
time_t.dir/$$type-shrunk && \
case $$type in \
right) leap='-L leapseconds';; \
*) leap=;; \
esac && \
- $(ZIC) $$leap -d time_t.dir/$$type $(XDST)data.zi && \
- $(AWK) '/^Rule/' $(XDST)data.zi | \
- $(ZIC) $$leap -d time_t.dir/$$type - $(PACKRATDATA) && \
- case $(XDST) in \
- $(SDST)) \
- $(ZIC) $$leap -d time_t.dir/$$type-$(SDST) $(TDATA) && \
+ $(ZIC) $$leap -d time_t.dir/$$type $(DATAFORM).zi && \
+ case $(DATAFORM) in \
+ main) \
+ $(ZIC) $$leap -d time_t.dir/$$type-t $(TDATA) && \
$(AWK) '/^Rule/' $(TDATA) | \
- $(ZIC) $$leap -d time_t.dir/$$type-$(SDST) \
- $(XDST)data.zi && \
- diff -r time_t.dir/$$type time_t.dir/$$type-$(SDST);; \
+ $(ZIC) $$leap -d time_t.dir/$$type-t - \
+ $(PACKRATDATA) && \
+ diff -r time_t.dir/$$type time_t.dir/$$type-t;; \
esac && \
$(ZIC) $$leap -d time_t.dir/$$type-shrunk tzdata.zi && \
diff -r time_t.dir/$$type time_t.dir/$$type-shrunk || exit; \
diff --git a/NEWS b/NEWS
index adc9814..b13c356 100644
--- a/NEWS
+++ b/NEWS
@@ -3,8 +3,8 @@ News for the tz database
Unreleased, experimental changes
Briefly:
- Support zi parsers that mishandle negative DST offsets
- Add fractional seconds to source data format.
+ Add support for vanguard and rearguard data consumers.
+ Add fractional seconds to source data format and to vanguard data.
Changes to past time stamps
@@ -16,30 +16,41 @@ Unreleased, experimental changes
Changes to build procedure
- The new XDST macro in the Makefile lets the installer choose
- XDST=full, which allows arbitrary DST offsets in the data, or
- XDST=pdst, which allows only positive DST offsets. Choosing
- XDST=full is arguably more correct for Ireland, which observes
- Irish Standard Time (IST, UTC+01) in summer and GMT (UTC) in
- winter. Choosing XDST=pdst is better for zoneinfo parsers that do
- not work well with negative DST offsets, notably OpenJDK+CLDR.
- On platforms using tzcode or similar APIs, XDST should not affect
- any behavior other than that depending on the tm_isdst flag.
-
- For now this change does not affect client-visible behavior by
- default, as the Makefile defaults to XDST=pdst and uncommented
- parts of the data source files contain only pdst-format data.
- After a bit of time for testing, XDST=full and full-format source
- files are planned to become the default, so that parsers that
- support negative DST offsets can get full data without changing
- their build procedures. Parsers requiring positive DST offsets
- should use the new file pdstdata.zi instead of tzdata.zi or the
- source files 'africa' etc.: pdstdata.zi is pdst-compatible, it is
- automatically built from the data source files, and it will
- continue to be pdst-compatible regardless of XDST. To get
- full-format data now, use the new file fulldata.zi, which will
- continue to be full-format regardless of XDST. To get the format
- selected by XDST, use tzdata.zi.
+ The new DATAFORM macro in the Makefile lets the installer choose
+ among three source data formats. The idea is to lessen downstream
+ disruption when data formats are improved.
+
+ * DATAFORM=vanguard installs from the latest, bleeding-edge
+ format. DATAFORM=main (the default) installs from the format
+ used in the 'africa' etc. files. DATAFORM=rearguard installs
+ from a trailing-edge format. Eventually, elements of today's
+ vanguard format should move to the main format, and similarly
+ the main format's features should eventually move to the
+ rearguard format.
+
+ * In the current version, the main and rearguard formats are
+ identical and match that of 2018c, so this change does not
+ affect default behavior. The vanguard format contains two
+ features not in the main format: fractional seconds and negative
+ DST offsets. Fractional seconds were added in this release,
+ where they affect only zic input (output is unaffected).
+ Negative DST offsets improve support for Ireland, which uses
+ Irish Standard Time (IST, UTC+01) in summer and GMT (UTC) in
+ winter. tzcode has supported negative DST offsets for decades,
+ and this feature should move to the main format soon. However,
+ it will not move to the rearguard format for quite some time
+ because some downstream parsers do not support it.
+
+ * The build procedure constructs three files vanguard.zi, main.zi,
+ and rearguard.zi, one for each format. The files represent the
+ same data as closely as the formats allow. These three files
+ are intended for downstream data consumers and are not
+ installed. Zoneinfo parsers that require positive DST offsets
+ should start using rearguard.zi, so that they will be unaffected
+ when the negative-DST feature moves from vanguard to main.
+ Bleeding-edge Zoneinfo parsers that support the new features
+ already can use vanguard.zi; in this respect, current tzcode is
+ bleeding-edge.
Changes to code
@@ -48,6 +59,7 @@ Unreleased, experimental changes
zic currently rounds these fractions to the nearest integer
(breaking ties to the even integer), the fractions may be useful
to applications requiring more precision in historical timestamps.
+ This extension is currently used only in vanguard.zi.
The code is a bit more portable to MS-Windows. (Thanks to Manuela
Friedrich).
diff --git a/europe b/europe
index 76cbb5d..8aab26e 100644
--- a/europe
+++ b/europe
@@ -514,21 +514,19 @@ Link Europe/London Europe/Isle_of_Man
# https://mm.icann.org/pipermail/tz/2018-January/025825.html
# and with tests for OpenJDK:
# https://mm.icann.org/pipermail/tz/2018-January/025822.html
-# To work around this problem, zidst.awk translates the following data
-# lines into two forms. First, fulldata.zi contains the full data,
-# which includes negative DST offsets. Second, pdstdata.zi uses a
-# traditional approximation for Irish time stamps after 1971-10-31
-# 02:00 UTC; although this approximation has tm_isdst flags that are
-# the reverse of the full data, its UTC offsets are correct and this
-# suffices for ICU and OpenJDK. Although this source file currently
-# has pdstdata.zi lines active and fulldata.zi lines commented out,
-# this is intended to change in the near future and downstream code
-# should not rely on it.
+#
+# To work around this problem, the build procedure can translate the
+# following data into two forms, one with negative DST offsets and the
+# other form with a traditional approximation for Irish time stamps
+# after 1971-10-31 02:00 UTC; although this approximation has tm_isdst
+# flags that are reversed, its UTC offsets are correct and this often
+# suffices. This source file currently uses only positive DST
+# offsets, but this is intended to change and downstream code should
+# not rely on it.
#
# The following is like GB-Eire and EU, except with standard time in
-# summer and negative daylight saving time in winter.
-# This rule set is active in fulldata.zi and is commented out in
-# pdstdata.zi.
+# summer and negative daylight saving time in winter. It is for when
+# negative DST offsets are used.
# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
#Rule Eire 1971 only - Oct 31 2:00u -1:00 GMT
#Rule Eire 1972 1980 - Mar Sun>=16 2:00u 0 IST
@@ -549,10 +547,9 @@ Zone Europe/Dublin -0:25:00 - LMT 1880 Aug 2
0:00 1:00 IST 1947 Nov 2 2:00s
0:00 - GMT 1948 Apr 18 2:00s
0:00 GB-Eire GMT/IST 1968 Oct 27
-# The next line is active in fulldata.zi and commented out in pdstdata.zi.
+# The next line is for when negative DST offsets are used.
# 1:00 Eire IST/GMT
-# These three lines are active in pdstdata.zi and commented out in
-# fulldata.zi.
+# These three lines are for when positive DST offsets are used.
1:00 - IST 1971 Oct 31 2:00u
0:00 GB-Eire GMT/IST 1996
0:00 EU GMT/IST
diff --git a/zidst.awk b/zidst.awk
deleted file mode 100644
index 7885e9a..0000000
--- a/zidst.awk
+++ /dev/null
@@ -1,50 +0,0 @@
-# Convert tzdata source into full or positive-DST form
-
-# Contributed by Paul Eggert. This file is in the public domain.
-
-# This is not a general-purpose converter; it is designed for current tzdata.
-#
-# When converting to full form, the output can use negative DST offsets.
-#
-# When converting to positive-DST form, the output uses only positive
-# DST offsets. The idea is for the output data to simulate the
-# behavior of the input data as best it can within the constraints of
-# positive DST offsets.
-#
-# In the input, lines requiring the full format are commented #[full]
-# and the positive DST near-equivalents are commented #[pdst].
-
-BEGIN {
- dst_type["full"] = 1
- dst_type["pdst"] = 1
-
- # The command line should set OUTFILE to the name of the output file,
- # which should start with either "full" or "pdst".
- todst = substr(outfile, 1, 4)
- if (!dst_type[todst]) exit 1
-}
-
-/^Zone/ { zone = $2 }
-
-{
- in_comment = /^#/
-
- # Test whether this line should differ between the full and the pdst versions.
- Rule_Eire = /^#?Rule[\t ]+Eire[\t ]/
- Zone_Dublin_post_1968 \
- = (zone == "Europe/Dublin" && /^#?[\t ]+[01]:00[\t ]/ \
- && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
-
- # If so, uncomment the desired version and comment out the undesired one.
- if (Rule_Eire || Zone_Dublin_post_1968) {
- if ((Rule_Eire \
- || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \
- == (todst == "full")) {
- sub(/^#/, "")
- } else if (/^[^#]/) {
- sub(/^/, "#")
- }
- }
-}
-
-{ print }
diff --git a/ziguard.awk b/ziguard.awk
new file mode 100644
index 0000000..795b4ef
--- /dev/null
+++ b/ziguard.awk
@@ -0,0 +1,89 @@
+# Convert tzdata source into vanguard or rearguard form.
+
+# Contributed by Paul Eggert. This file is in the public domain.
+
+# This is not a general-purpose converter; it is designed for current tzdata.
+#
+# When converting to vanguard form, the output can use fractional seconds
+# and negative DST offsets.
+#
+# When converting to rearguard form, the output omits fractional
+# seconds and uses only positive DST offsets. The idea is for the
+# output data to simulate the behavior of the input data as best it
+# can within the constraints of the rearguard format.
+
+BEGIN {
+ dst_type["vanguard.zi"] = 1
+ dst_type["main.zi"] = 1
+ dst_type["rearguard.zi"] = 1
+
+ # The command line should set OUTFILE to the name of the output file.
+ if (!dst_type[outfile]) exit 1
+ vanguard = outfile == "vanguard.zi"
+
+ # List non-integer standard times more accurately if known.
+ # This list does not attempt to record every UT offset that is
+ # not an integral multiple of 1 s; it merely records those that
+ # do not appear to be just LMT.
+ frac["-5:36:13"] = "-5:36:13.3" # America/Costa_Rica before 1921
+ frac["-5:07:10"] = "-5:07:10.41" # America/Jamaica before 1912
+ frac["-4:16:48"] = "-4:16:48.25" # America/Cordoba etc. 1894-1920
+ frac["-0:36:45"] = "-0:36:44.68" # Europe/Lisbon before 1912
+ frac["-0:25:21"] = "-0:25:21.1" # Europe/Dublin 1880-1916
+ frac["0:19:32"] = "0:19:32.13" # Europe/Amsterdam before 1937
+ frac["1:39:49"] = "1:39:49.2" # Europe/Helsinki before 1921
+ frac["2:05:09"] = "2:05:08.9" # Africa/Cairo before 1900
+ frac["4:37:11"] = "4:37:10.8" # Asia/Tashkent before 1924
+ frac["7:06:30"] = "7:06:30.1333" #... Asia/Ho_Chi_Minh 1906-1911
+ frac["7:07:12"] = "7:07:12.5" # Asia/Jakarta before 1923
+ frac["7:36:42"] = "7:36:41.7" # Asia/Hong_Kong before 1904
+ frac["8:05:43"] = "8:05:43.2" # Asia/Shanghai before 1901
+}
+
+/^Zone/ { zone = $2 }
+
+outfile != "main.zi" {
+ in_comment = /^#/
+
+ # If this line should differ due to Ireland using negative DST offsets,
+ # uncomment the desired version and comment out the undesired one.
+ Rule_Eire = /^#?Rule[\t ]+Eire[\t ]/
+ Zone_Dublin_post_1968 \
+ = (zone == "Europe/Dublin" && /^#?[\t ]+[01]:00[\t ]/ \
+ && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
+ if (Rule_Eire || Zone_Dublin_post_1968) {
+ if ((Rule_Eire \
+ || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \
+ == vanguard) {
+ sub(/^#/, "")
+ } else if (/^[^#]/) {
+ sub(/^/, "#")
+ }
+ }
+
+ # Add or remove fractional seconds as needed.
+ f = $1 == "Zone" ? 3 : 1
+ for (rounded in frac) {
+ original = frac[rounded]
+ if ($f == rounded || $f == original) {
+ $f = vanguard ? original : rounded
+ }
+ }
+}
+
+# If a Link line is followed by a Zone line for the same data, comment
+# out the Link line. This can happen if backzone overrides a Link
+# with a Zone.
+/^Link/ {
+ linkline[$3] = NR
+}
+/^Zone/ {
+ sub(/^Link/, "#Link", line[linkline[$2]])
+}
+
+{ line[NR] = $0 }
+
+END {
+ for (i = 1; i <= NR; i++)
+ print line[i]
+}
--
2.14.3
More information about the tz
mailing list