[tz] [PROPOSED] Support zi parsers that mishandle negative DST offsets
Paul Eggert
eggert at cs.ucla.edu
Tue Jan 30 08:49:12 UTC 2018
This is intended to provide a way to support both clients that require
data to have only positive DST offsets, and clients that do not have
this restriction.
* Makefile (XDST, SDST): New macros.
(TZDATA_ZI_DEPS): Add zidst.awk.
(DSTDATA_ZI_DEPS): New macro.
(all): Depend on fulldata.zi and pdstdata.zi.
(fulldata.zi pdstdata.zi): New rule.
(tzdata.zi): Use $(XDST)data.zi instead of reading original source.
(check_zishrink): Check zidst.awk, too.
(clean): Remove all *.zi files, not just tzdata.zi.
* NEWS, europe: Mention this.
* zidst.awk: New file.
---
Makefile | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
NEWS | 30 ++++++++++++++++++++++++++++++
europe | 39 ++++++++++++++++++++++-----------------
zidst.awk | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 154 insertions(+), 28 deletions(-)
create mode 100644 zidst.awk
diff --git a/Makefile b/Makefile
index 8c84cd9..92ddb80 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,26 @@ VERSION= unknown
# Email address for bug reports.
BUGEMAIL= tz at iana.org
+# To install the full data, which can contain daylight saving time
+# offsets that are negative (relative to standard time), use
+# XDST= full
+# To install data containing only positive daylight saving time
+# offsets, but otherwise as close to the full data as practical, use
+# XDST= pdst
+XDST= pdst
+# Parsers requiring DST offsets to be positive should use the file
+# pdstdata.zi, which contains almost all the data of 'africa' etc.,
+# except with positive DST offsets. This works around a problem that
+# was discovered in January 2018 with negative DST in tests for ICU
+# and OpenJDK. See:
+# https://mm.icann.org/pipermail/tz/2018-January/025825.html
+# https://mm.icann.org/pipermail/tz/2018-January/025822.html
+# Currently the 'africa' etc. files use pdst form if comments are
+# ignored, to ease transition for parsers that do not support
+# negative DST offsets. This is intended to change to full form at
+# some point, so that full-featured zi parsers that use the 'africa'
+# files will get the full data without changing anything.
+
# Change the line below for your time zone (after finding the zone you want in
# the time zone files, or adding it to a time zone file).
# Alternately, if you discover you've got the wrong time zone, you can just
@@ -463,7 +483,8 @@ TDATA= $(YDATA) $(NDATA) $(BACKWARD)
ZONETABLES= zone1970.tab zone.tab
TABDATA= iso3166.tab $(TZDATA_TEXT) $(ZONETABLES)
LEAP_DEPS= leapseconds.awk leap-seconds.list
-TZDATA_ZI_DEPS= zishrink.awk version $(TDATA) $(PACKRATDATA)
+TZDATA_ZI_DEPS= zidst.awk zishrink.awk version $(TDATA) $(PACKRATDATA)
+DSTDATA_ZI_DEPS= zidst.awk $(TDATA) $(PACKRATDATA)
DATA= $(TDATA_TO_CHECK) backzone iso3166.tab leap-seconds.list \
leapseconds yearistype.sh $(ZONETABLES)
AWK_SCRIPTS= checklinks.awk checktab.awk leapseconds.awk zishrink.awk
@@ -500,7 +521,8 @@ VERSION_DEPS= \
SHELL= /bin/sh
-all: tzselect yearistype zic zdump libtz.a $(TABDATA)
+all: tzselect yearistype zic zdump libtz.a $(TABDATA) \
+ fulldata.zi pdstdata.zi
ALL: all date $(ENCHILADA)
@@ -535,11 +557,15 @@ version: $(VERSION_DEPS)
printf '%s\n' "$$V" >$@.out
mv $@.out $@
-# This file can be tailored by setting BACKWARD, PACKRATDATA, etc.
-tzdata.zi: $(TZDATA_ZI_DEPS)
+# These files can be tailored by setting BACKWARD, PACKRATDATA, etc.
+fulldata.zi pdstdata.zi: $(DSTDATA_ZI_DEPS)
+ $(AWK) -v outfile='$@' -f zidst.awk $(TDATA) $(PACKRATDATA) \
+ >$@.out
+ mv $@.out $@
+tzdata.zi: $(XDST)data.zi version
version=`sed 1q version` && \
LC_ALL=C $(AWK) -v version="$$version" -f zishrink.awk \
- $(TDATA) $(PACKRATDATA) >$@.out
+ $(XDST)data.zi >$@.out
mv $@.out $@
version.h: version
@@ -721,17 +747,32 @@ check_tzs: $(TZS) $(TZS_NEW)
check_web: tz-how-to.html
$(VALIDATE_ENV) $(VALIDATE) $(VALIDATE_FLAGS) tz-how-to.html
-# Check that tzdata.zi generates the same binary data that its sources do.
-check_zishrink: tzdata.zi zic leapseconds $(PACKRATDATA) $(TDATA)
+# The format of the source files, either full or pdst.
+# Currently they are in pdst format, but this is expected to change.
+SDST = pdst
+
+# Check that zishrink.awk does not alter the data, and that zidst.awk
+# preserves $(SDST) data.
+check_zishrink: zic leapseconds $(PACKRATDATA) $(TDATA) \
+ $(XDST)data.zi tzdata.zi
for type in posix right; do \
- mkdir -p time_t.dir/$$type time_t.dir/$$type-shrunk && \
+ mkdir -p time_t.dir/$$type time_t.dir/$$type-$(SDST) \
+ time_t.dir/$$type-shrunk && \
case $$type in \
right) leap='-L leapseconds';; \
*) leap=;; \
esac && \
- $(ZIC) $$leap -d time_t.dir/$$type $(TDATA) && \
- $(AWK) '/^Rule/' $(TDATA) | \
+ $(ZIC) $$leap -d time_t.dir/$$type $(XDST)data.zi && \
+ $(AWK) '/^Rule/' $(XDST)data.zi | \
$(ZIC) $$leap -d time_t.dir/$$type - $(PACKRATDATA) && \
+ case $(XDST) in \
+ $(SDST)) \
+ $(ZIC) $$leap -d time_t.dir/$$type-$(SDST) $(TDATA) && \
+ $(AWK) '/^Rule/' $(TDATA) | \
+ $(ZIC) $$leap -d time_t.dir/$$type-$(SDST) \
+ $(XDST)data.zi && \
+ diff -r time_t.dir/$$type time_t.dir/$$type-$(SDST);; \
+ esac && \
$(ZIC) $$leap -d time_t.dir/$$type-shrunk tzdata.zi && \
diff -r time_t.dir/$$type time_t.dir/$$type-shrunk || exit; \
done
@@ -741,7 +782,7 @@ clean_misc:
rm -f core *.o *.out \
date tzselect version.h zdump zic yearistype libtz.a
clean: clean_misc
- rm -fr *.dir tzdata.zi tzdb-*/ $(TZS_NEW)
+ rm -fr *.dir *.zi tzdb-*/ $(TZS_NEW)
maintainer-clean: clean
@echo 'This command is intended for maintainers to use; it'
diff --git a/NEWS b/NEWS
index 4f763c0..c455f3c 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,36 @@ News for the tz database
Unreleased, experimental changes
+ Briefly:
+ Support zi parsers that mishandle negative DST offsets
+
+ Changes to build procedure
+
+ The new XDST macro in the Makefile lets the installer choose
+ XDST=full, which allows arbitrary DST offsets in the data, or
+ XDST=pdst, which allows only positive DST offsets. Choosing
+ XDST=full is arguably more correct for Ireland, which observes
+ Irish Standard Time (IST, UTC+01) in summer and GMT (UTC) in
+ winter. Choosing XDST=pdst is better for zoneinfo parsers that do
+ not work well with negative DST offsets, notably OpenJDK+CLDR.
+ On platforms using tzcode or similar APIs, XDST should not affect
+ any behavior other than that depending on the tm_isdst flag.
+
+ For now this change does not affect client-visible behavior by
+ default, as the Makefile defaults to XDST=pdst and uncommented
+ parts of the data source files contain only pdst-format data.
+ After a bit of time for testing, XDST=full and full-format source
+ files are planned to become the default, so that parsers that
+ support negative DST offsets can get full data without changing
+ their build procedures. Parsers requiring positive DST offsets
+ should use the new file pdstdata.zi instead of tzdata.zi or the
+ source files 'africa' etc.: pdstdata.zi is pdst-compatible, it is
+ automatically built from the data source files, and it will
+ continue to be pdst-compatible regardless of XDST. To get
+ full-format data now, use the new file fulldata.zi, which will
+ continue to be full-format regardless of XDST. To get the format
+ selected by XDST, use tzdata.zi.
+
Changes to code
The code is a bit more portable to MS-Windows. (Thanks to Manuela
diff --git a/europe b/europe
index 6c1ccbe..5aeda33 100644
--- a/europe
+++ b/europe
@@ -508,11 +508,27 @@ Link Europe/London Europe/Jersey
Link Europe/London Europe/Guernsey
Link Europe/London Europe/Isle_of_Man
-# From Paul Eggert (2018-01-19):
+# From Paul Eggert (2018-01-30):
+# In January 2018 we discovered that the negative DST offsets in the
+# Eire rules cause problems with tests for ICU:
+# https://mm.icann.org/pipermail/tz/2018-January/025825.html
+# and with tests for OpenJDK:
+# https://mm.icann.org/pipermail/tz/2018-January/025822.html
+# To work around this problem, zidst.awk translates the following data
+# lines into two forms. First, fulldata.zi contains the full data,
+# which includes negative DST offsets. Second, pdstdata.zi uses a
+# traditional approximation for Irish time stamps after 1971-10-31
+# 02:00 UTC; although this approximation has tm_isdst flags that are
+# the reverse of the full data, its UTC offsets are correct and this
+# suffices for ICU and OpenJDK. Although this source file currently
+# has pdstdata.zi lines active and fulldata.zi lines commented out,
+# this is intended to change in the near future and downstream code
+# should not rely on it.
+#
# The following is like GB-Eire and EU, except with standard time in
# summer and negative daylight saving time in winter.
-# Although currently commented out, this will need to become uncommented
-# once the ICU/OpenJDK workaround is removed; see below.
+# This rule set is active in fulldata.zi and is commented out in
+# pdstdata.zi.
# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
#Rule Eire 1971 only - Oct 31 2:00u -1:00 GMT
#Rule Eire 1972 1980 - Mar Sun>=16 2:00u 0 IST
@@ -533,24 +549,13 @@ Zone Europe/Dublin -0:25:00 - LMT 1880 Aug 2
0:00 1:00 IST 1947 Nov 2 2:00s
0:00 - GMT 1948 Apr 18 2:00s
0:00 GB-Eire GMT/IST 1968 Oct 27
-# From Paul Eggert (2018-01-18):
-# The next line should look like this:
+# The next line is active in fulldata.zi and commented out in pdstdata.zi.
# 1:00 Eire IST/GMT
-# However, in January 2018 we discovered that the Eire rules cause
-# problems with tests for ICU:
-# https://mm.icann.org/pipermail/tz/2018-January/025825.html
-# and with tests for OpenJDK:
-# https://mm.icann.org/pipermail/tz/2018-January/025822.html
-# To work around this problem, use a traditional approximation for
-# time stamps after 1971-10-31 02:00 UTC, to give ICU and OpenJDK
-# developers breathing room to fix bugs. This approximation has
-# correct UTC offsets, but results in tm_isdst flags are the reverse
-# of what they should be. This workaround is temporary and should be
-# removed reasonably soon.
+# These three lines are active in pdstdata.zi and commented out in
+# fulldata.zi.
1:00 - IST 1971 Oct 31 2:00u
0:00 GB-Eire GMT/IST 1996
0:00 EU GMT/IST
-# End of workaround for ICU and OpenJDK bugs.
###############################################################################
diff --git a/zidst.awk b/zidst.awk
new file mode 100644
index 0000000..7885e9a
--- /dev/null
+++ b/zidst.awk
@@ -0,0 +1,50 @@
+# Convert tzdata source into full or positive-DST form
+
+# Contributed by Paul Eggert. This file is in the public domain.
+
+# This is not a general-purpose converter; it is designed for current tzdata.
+#
+# When converting to full form, the output can use negative DST offsets.
+#
+# When converting to positive-DST form, the output uses only positive
+# DST offsets. The idea is for the output data to simulate the
+# behavior of the input data as best it can within the constraints of
+# positive DST offsets.
+#
+# In the input, lines requiring the full format are commented #[full]
+# and the positive DST near-equivalents are commented #[pdst].
+
+BEGIN {
+ dst_type["full"] = 1
+ dst_type["pdst"] = 1
+
+ # The command line should set OUTFILE to the name of the output file,
+ # which should start with either "full" or "pdst".
+ todst = substr(outfile, 1, 4)
+ if (!dst_type[todst]) exit 1
+}
+
+/^Zone/ { zone = $2 }
+
+{
+ in_comment = /^#/
+
+ # Test whether this line should differ between the full and the pdst versions.
+ Rule_Eire = /^#?Rule[\t ]+Eire[\t ]/
+ Zone_Dublin_post_1968 \
+ = (zone == "Europe/Dublin" && /^#?[\t ]+[01]:00[\t ]/ \
+ && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
+
+ # If so, uncomment the desired version and comment out the undesired one.
+ if (Rule_Eire || Zone_Dublin_post_1968) {
+ if ((Rule_Eire \
+ || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \
+ == (todst == "full")) {
+ sub(/^#/, "")
+ } else if (/^[^#]/) {
+ sub(/^/, "#")
+ }
+ }
+}
+
+{ print }
--
2.14.3
More information about the tz
mailing list