[tz] [PATCH] New file 'pre1970' for zones that differ only in pre-1970 time stamps.

Paul Eggert eggert at cs.ucla.edu
Fri Aug 30 08:09:13 UTC 2013


This lets us preserve information about pre-1970 time stamps when
we change a Zone to a Link to another zone whose time stamps agree
after 1970.  This should address concerns about some recent
changes that removed this information.  This implementation is a
stripped-down version of a suggestion by Andrew Main (Zefram) in
<http://mm.icann.org/pipermail/tz/2013-August/019615.html> and
<http://mm.icann.org/pipermail/tz/2013-August/019639.html> to
allow filtering tz data by date range.  Unlike Zefram's
suggestion, this implementation supports only two date ranges,
namely 1970 on, using 'make BACKWARD=backward'; and all dates,
using 'make BACKWARD="pre1970 back-pre1970"'.  At some point I'd
like to improve it to support arbitrary date ranges, but at least
we've now restored the data whose loss was of some concern.
* .gitignore: Add back-pre1970.
* Makefile (BACKWARD): New macro.
(YDATA): Use it instead of 'backward'.
(AWK_SCRIPTS): New macro, with additional script back-pre1970.awk.
(MISC): Use it.
(back-pre1970): New rule.
(clean_misc): Clean back-pre1970.  Also clean time.tab, while we're
at it.
(check_public): Don't require pre1970 to stand alone.
* pre1970, back-pre1970.awk: New files.
---
 .gitignore       |   1 +
 Makefile         |  32 +++++-
 back-pre1970.awk |  18 ++++
 pre1970          | 291 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 337 insertions(+), 5 deletions(-)
 create mode 100644 back-pre1970.awk
 create mode 100644 pre1970

diff --git a/.gitignore b/.gitignore
index 18dbbcc..28b1bc9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 *.txt
 *~
 ChangeLog
+back-pre1970
 date
 leapseconds
 time.tab
diff --git a/Makefile b/Makefile
index a74d1a7..ffddb08 100644
--- a/Makefile
+++ b/Makefile
@@ -49,6 +49,22 @@ POSIXRULES=	America/New_York
 
 ZONETABTYPE=	zone
 
+# How to support obsolescent time zones in a backward-compatible way.
+# This variable affects only pre-1970 time stamps, on hosts that support them.
+# It has two possible values, 'backward' and 'pre1970 back-pre1970'.
+#
+# 'backward' is the traditional approach, and is simpler and more efficient;
+# it is designed to generate one zone for each region where clocks have agreed
+# since 1970.
+#
+# 'pre1970 back-pre1970' can generate more than one zone in that situation,
+# which means it can preserve a bit of pre-1970 data that 'backward' does not;
+# almost all pre-1970 data is missing, though, so don't get your hopes up.
+#
+# Sometimes 'backward' is more-compatible with earlier versions of this database,
+# and sometimes 'pre1970 back-pre1970' is; it depends on the situation.
+BACKWARD=	backward
+
 # Also see TZDEFRULESTRING below, which takes effect only
 # if the time zone files cannot be accessed.
 
@@ -322,7 +338,7 @@ COMMON=		Makefile
 DOCS=		README Theory $(MANS) date.1
 PRIMARY_YDATA=	africa antarctica asia australasia \
 		europe northamerica southamerica
-YDATA=		$(PRIMARY_YDATA) pacificnew etcetera backward
+YDATA=		$(PRIMARY_YDATA) pacificnew etcetera $(BACKWARD)
 NDATA=		systemv factory
 SDATA=		solar87 solar88 solar89
 TDATA=		$(YDATA) $(NDATA) $(SDATA)
@@ -330,9 +346,10 @@ TABDATA=	iso3166.tab time.tab zone.tab
 DATA=		$(YDATA) $(NDATA) $(SDATA) $(TABDATA) \
 			leap-seconds.list yearistype.sh
 WEB_PAGES=	tz-art.htm tz-link.htm
+AWK_SCRIPTS=	back-pre1970.awk checktab.awk leapseconds.awk zone-time.awk
 MISC=		usno1988 usno1989 usno1989a usno1995 usno1997 usno1998 \
-			$(WEB_PAGES) checktab.awk leapseconds.awk workman.sh \
-			zoneinfo2tdf.pl
+			$(WEB_PAGES) $(AWK_SCRIPTS) \
+			workman.sh zoneinfo2tdf.pl
 ENCHILADA=	$(COMMON) $(DOCS) $(SOURCES) $(DATA) $(MISC)
 
 # And for the benefit of csh users on systems that assume the user
@@ -423,6 +440,9 @@ zones:		$(REDO)
 time.tab:	$(YDATA) zone.tab zone-time.awk
 		$(AWK) -f zone-time.awk $(YDATA) >$@
 
+back-pre1970:	pre1970 backward
+		$(AWK) -v pre1970=pre1970 -f $@.awk backward >$@
+
 $(TZLIB):	$(LIBOBJS)
 		-mkdir $(TOPDIR) $(LIBDIR)
 		ar ru $@ $(LIBOBJS)
@@ -457,6 +477,7 @@ check_web:	$(WEB_PAGES)
 
 clean_misc:
 		rm -f core *.o *.out \
+		  back-pre1970 time.tab \
 		  date leapseconds tzselect version.h zdump zic yearistype
 clean:		clean_misc
 		rm -f -r tzpublic
@@ -488,7 +509,7 @@ set-timestamps:
 		  $$cmd || exit; \
 		done
 
-# The zics below ensure that each data file can stand on its own.
+# The zics below ensure that each non-pre1970 data file can stand on its own.
 # We also do an all-files run to catch links to links.
 
 check_public:	$(ENCHILADA)
@@ -496,7 +517,8 @@ check_public:	$(ENCHILADA)
 		make "CFLAGS=$(GCC_DEBUG_FLAGS)"
 		mkdir tzpublic
 		for i in $(TDATA) ; do \
-		  $(zic) -v -d tzpublic $$i 2>&1 || exit; \
+		  test $$i = pre1970 || $(zic) -v -d tzpublic $$i 2>&1 \
+		    || exit; \
 		done
 		$(zic) -v -d tzpublic $(TDATA)
 		rm -f -r tzpublic
diff --git a/back-pre1970.awk b/back-pre1970.awk
new file mode 100644
index 0000000..f7c54fc
--- /dev/null
+++ b/back-pre1970.awk
@@ -0,0 +1,18 @@
+# Generate 'back-pre1970' from the two input files 'pre1970' and 'backward'.
+# The output consists of all lines in 'backward' that are not links to
+# files mentioned in 'pre1970'.  Think of it as 'backward' minus 'pre1970'.
+
+# The 'backward' file is the input.
+# The awk variable 'pre1970' contains the name of the pre1970 file.
+
+# This file is in the public domain.
+
+# Contributed by Paul Eggert.
+
+BEGIN {
+    while ((getline <pre1970) == 1)
+	if ($1 == "Zone")
+	    pre1970_zone[$2] = 1
+}
+
+! (/^Link/ && pre1970_zone[$3]) { print }
diff --git a/pre1970 b/pre1970
new file mode 100644
index 0000000..d8b8f34
--- /dev/null
+++ b/pre1970
@@ -0,0 +1,291 @@
+# Pre-1970 data
+
+# This file is in the public domain.
+
+# This file contains zones that were formerly in other source files,
+# but were later removed or replaced by backward-compatibility links
+# as they differ from other zones only in pre-1970 time stamps.
+
+# Although the tz database focuses on post-1970 time stamps, these
+# entries are retained here as they may be of some use to people
+# interested in pre-1970 time stamps, even though they cover only a
+# tiny sliver of pre-1970 data and are unreliable for that data.
+# Also, these entries can help with backward compatibility with some
+# old versions of the tz database.  They are incompatible with other
+# old versions of the database, though; it depends on which old
+# version you're interested in.
+
+# Entries are sorted by Zone name.  Each entry is preceded by the name
+# of the country that the entry is in, along with any other commentary
+# and rules associated with the entry.  Some rules, e.g., 'Canada',
+# are defined by other source files; this file is not intended to be
+# used without those other files.
+
+# Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
+
+# Mali
+# no longer different from Bamako, but too famous to omit
+Zone	Africa/Timbuktu	-0:12:04 -	LMT	1912
+			 0:00	-	GMT
+
+# Anguilla
+Zone America/Anguilla	-4:12:16 -	LMT	1912 Mar 2
+			-4:00	-	AST
+
+# Antigua and Barbuda
+Zone	America/Antigua	-4:07:12 -	LMT	1912 Mar 2
+			-5:00	-	EST	1951
+			-4:00	-	AST
+
+# Argentina
+# Chubut (CH)
+# The name "Comodoro Rivadavia" exceeds the 14-byte POSIX limit.
+Zone America/Argentina/ComodRivadavia -4:30:00 - LMT	1894 Oct 31
+			-4:16:48 -	CMT	1920 May
+			-4:00	-	ART	1930 Dec
+			-4:00	Arg	AR%sT	1969 Oct  5
+			-3:00	Arg	AR%sT	1991 Mar  3
+			-4:00	-	WART	1991 Oct 20
+			-3:00	Arg	AR%sT	1999 Oct  3
+			-4:00	Arg	AR%sT	2000 Mar  3
+			-3:00	-	ART	2004 Jun  1
+			-4:00	-	WART	2004 Jun 20
+			-3:00	-	ART
+
+# Aruba
+Zone	America/Aruba	-4:40:24 -	LMT	1912 Feb 12	# Oranjestad
+			-4:30	-	ANT	1965 # Netherlands Antilles Time
+			-4:00	-	AST
+
+# Canada
+
+Zone America/Atikokan	-6:06:28 -	LMT	1895
+			-6:00	Canada	C%sT	1940 Sep 29
+			-6:00	1:00	CDT	1942 Feb  9 2:00s
+			-6:00	Canada	C%sT	1945 Sep 30 2:00
+			-5:00	-	EST
+
+Zone America/Blanc-Sablon -3:48:28 -	LMT	1884
+			-4:00	Canada	A%sT	1970
+			-4:00	-	AST
+
+# Cayman Is
+Zone	America/Cayman	-5:25:32 -	LMT	1890		# Georgetown
+			-5:07:12 -	KMT	1912 Feb    # Kingston Mean Time
+			-5:00	-	EST
+
+# Canada
+Zone America/Coral_Harbour -5:32:40 -	LMT	1884
+			-5:00	NT_YK	E%sT	1946
+			-5:00	-	EST
+
+# Curacao
+Zone	America/Curacao	-4:35:47 -	LMT	1912 Feb 12	# Willemstad
+			-4:30	-	ANT	1965 # Netherlands Antilles Time
+			-4:00	-	AST
+
+# Dominica
+Zone America/Dominica	-4:05:36 -	LMT	1911 Jul 1 0:01		# Roseau
+			-4:00	-	AST
+
+# Mexico
+Zone America/Ensenada	-7:46:28 -	LMT	1922 Jan  1  0:13:32
+			-8:00	-	PST	1927 Jun 10 23:00
+			-7:00	-	MST	1930 Nov 16
+			-8:00	-	PST	1942 Apr
+			-7:00	-	MST	1949 Jan 14
+			-8:00	-	PST	1996
+			-8:00	Mexico	P%sT
+
+# US
+Zone America/Fort_Wayne -5:00	US	E%sT	1946
+			-5:00	-	EST	# Always EST as of 1986
+
+# Grenada
+Zone	America/Grenada	-4:07:00 -	LMT	1911 Jul	# St George's
+			-4:00	-	AST
+
+# Guadeloupe
+Zone America/Guadeloupe	-4:06:08 -	LMT	1911 Jun 8	# Pointe a Pitre
+			-4:00	-	AST
+
+# Canada
+# Rule	NAME	FROM	TO	TYPE	IN	ON	AT	SAVE	LETTER/S
+Rule	Mont	1917	only	-	Mar	25	2:00	1:00	D
+Rule	Mont	1917	only	-	Apr	24	0:00	0	S
+Rule	Mont	1919	only	-	Mar	31	2:30	1:00	D
+Rule	Mont	1919	only	-	Oct	25	2:30	0	S
+Rule	Mont	1920	only	-	May	 2	2:30	1:00	D
+Rule	Mont	1920	1922	-	Oct	Sun>=1	2:30	0	S
+Rule	Mont	1921	only	-	May	 1	2:00	1:00	D
+Rule	Mont	1922	only	-	Apr	30	2:00	1:00	D
+Rule	Mont	1924	only	-	May	17	2:00	1:00	D
+Rule	Mont	1924	1926	-	Sep	lastSun	2:30	0	S
+Rule	Mont	1925	1926	-	May	Sun>=1	2:00	1:00	D
+# The 1927-to-1937 rules can be expressed more simply as
+# Rule	Mont	1927	1937	-	Apr	lastSat	24:00	1:00	D
+# Rule	Mont	1927	1937	-	Sep	lastSat	24:00	0	S
+# The rules below avoid use of 24:00
+# (which pre-1998 versions of zic cannot handle).
+Rule	Mont	1927	only	-	May	1	0:00	1:00	D
+Rule	Mont	1927	1932	-	Sep	lastSun	0:00	0	S
+Rule	Mont	1928	1931	-	Apr	lastSun	0:00	1:00	D
+Rule	Mont	1932	only	-	May	1	0:00	1:00	D
+Rule	Mont	1933	1940	-	Apr	lastSun	0:00	1:00	D
+Rule	Mont	1933	only	-	Oct	1	0:00	0	S
+Rule	Mont	1934	1939	-	Sep	lastSun	0:00	0	S
+Rule	Mont	1946	1973	-	Apr	lastSun	2:00	1:00	D
+Rule	Mont	1945	1948	-	Sep	lastSun	2:00	0	S
+Rule	Mont	1949	1950	-	Oct	lastSun	2:00	0	S
+Rule	Mont	1951	1956	-	Sep	lastSun	2:00	0	S
+Rule	Mont	1957	1973	-	Oct	lastSun	2:00	0	S
+# Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
+Zone America/Montreal	-4:54:16 -	LMT	1884
+			-5:00	Mont	E%sT	1918
+			-5:00	Canada	E%sT	1919
+			-5:00	Mont	E%sT	1942 Feb  9 2:00s
+			-5:00	Canada	E%sT	1946
+			-5:00	Mont	E%sT	1974
+			-5:00	Canada	E%sT
+
+# Montserrat
+Zone America/Montserrat	-4:08:52 -	LMT	1911 Jul 1 0:01   # Cork Hill
+			-4:00	-	AST
+
+# Bahamas
+# Rule	NAME	FROM	TO	TYPE	IN	ON	AT	SAVE	LETTER/S
+Rule	Bahamas	1964	1975	-	Oct	lastSun	2:00	0	S
+Rule	Bahamas	1964	1975	-	Apr	lastSun	2:00	1:00	D
+# Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
+Zone	America/Nassau	-5:09:30 -	LMT	1912 Mar 2
+			-5:00	Bahamas	E%sT	1976
+			-5:00	US	E%sT
+
+# Trinidad and Tobago
+Zone America/Port_of_Spain -4:06:04 -	LMT	1912 Mar 2
+			-4:00	-	AST
+
+# Brazil
+# Rio_Branco is too ambiguous, since there's a Rio Branco in Uruguay too.
+Zone America/Porto_Acre	-4:31:12 -	LMT	1914
+			-5:00	Brazil	AC%sT	1988 Sep 12
+			-5:00	-	ACT
+
+# Argentina
+# Santa Fe (SF), Entre Rios (ER), Corrientes (CN), Misiones (MN), Chaco (CC),
+# Formosa (FM), La Pampa (LP), Chubut (CH)
+Zone America/Rosario	-4:02:40 -	LMT	1894 Nov
+			-4:16:44 -	CMT	1920 May
+			-4:00	-	ART	1930 Dec
+			-4:00	Arg	AR%sT	1969 Oct  5
+			-3:00	Arg	AR%sT	1991 Jul
+			-3:00	-	ART	1999 Oct  3 0:00
+			-4:00	Arg	AR%sT	2000 Mar  3 0:00
+			-3:00	-	ART
+
+# St Kitts-Nevis
+Zone America/St_Kitts	-4:10:52 -	LMT	1912 Mar 2	# Basseterre
+			-4:00	-	AST
+
+# St Lucia
+Zone America/St_Lucia	-4:04:00 -	LMT	1890		# Castries
+			-4:04:00 -	CMT	1912	    # Castries Mean Time
+			-4:00	-	AST
+
+# Virgin Is
+Zone America/St_Thomas	-4:19:44 -	LMT	1911 Jul    # Charlotte Amalie
+			-4:00	-	AST
+
+# St Vincent and the Grenadines
+Zone America/St_Vincent	-4:04:56 -	LMT	1890		# Kingstown
+			-4:04:56 -	KMT	1912	   # Kingstown Mean Time
+			-4:00	-	AST
+
+# British Virgin Is
+Zone America/Tortola	-4:18:28 -	LMT	1911 Jul    # Road Town
+			-4:00	-	AST
+
+# McMurdo, Ross Island, since 1955-12
+Zone Antarctica/McMurdo	0	-	zzz	1956
+			12:00	NZAQ	NZ%sT
+
+# Japan
+Zone	Asia/Ishigaki	8:16:36	-	LMT	1896
+			8:00	-	CST
+
+# Israel
+Zone	Asia/Tel_Aviv	2:19:04 -	LMT	1880
+			2:21	-	JMT	1918
+			2:00	Zion	I%sT
+
+# Russia
+Zone Asia/Tomsk		 5:39:52 -	LMT	1924 May  2
+			 6:00	-	TSK	1957 Mar
+			 7:00	Russia	TS%s	1991 Mar 31 2:00s
+			 6:00	1:00	TSD	1991 Sep 29 2:00s
+			 6:00	-	TSK	1992 Jan 19 2:00s
+			 7:00	Russia	TS%s
+
+# Svalbard & Jan Mayen
+Zone Atlantic/Jan_Mayen	-1:00	-	EGT
+
+# Australia
+Zone Australia/Canberra	 9:56:32 -	LMT	1895 Feb
+			10:00	-	EST	1917 Jan  1 0:01
+			10:00	Aus	EST	1971 Oct 31 2:00
+			10:00	AN	EST	1981 Oct 25 2:00
+			10:00	1:00	EST	1982 Apr  4 3:00
+			10:00	AN	EST
+
+# UK
+Zone	Europe/Belfast	-0:23:40 -	LMT	1880 Aug  2
+			-0:25:21 -	DMT	1916 May 21 2:00 # Dublin/Dunsink MT
+			-0:25:21 1:00	IST	1916 Oct  1 2:00s   # Irish Summer Time
+			 0:00	GB-Eire	%s	1968 Oct 27
+			 1:00	-	BST	1971 Oct 31 2:00u
+			 0:00	GB-Eire	%s	1996
+			 0:00	EU	GMT/BST
+
+# Slovenia
+Zone Europe/Ljubljana	0:58:04	-	LMT	1884
+			1:00	-	CET	1941 Apr 18 23:00
+			1:00	C-Eur	CE%sT	1945 May  8  2:00s
+			1:00	1:00	CEST	1945 Sep 16  2:00s
+			1:00	-	CET	1982 Nov 27
+			1:00	EU	CE%sT
+
+# Bosnia and Herzegovina
+Zone	Europe/Sarajevo	1:13:40	-	LMT	1884
+			1:00	-	CET	1941 Apr 18 23:00
+			1:00	C-Eur	CE%sT	1945 May  8  2:00s
+			1:00	1:00	CEST	1945 Sep 16  2:00s
+			1:00	-	CET	1982 Nov 27
+			1:00	EU	CE%sT
+
+# Macedonia
+Zone	Europe/Skopje	1:25:44	-	LMT	1884
+			1:00	-	CET	1941 Apr 18 23:00
+			1:00	C-Eur	CE%sT	1945 May  8  2:00s
+			1:00	1:00	CEST	1945 Sep 16  2:00s
+			1:00	-	CET	1982 Nov 27
+			1:00	EU	CE%sT
+
+# Moldova
+Zone	Europe/Tiraspol	1:58:32	-	LMT	1880
+			1:55	-	CMT	1918 Feb 15 # Chisinau MT
+			1:44:24	-	BMT	1931 Jul 24 # Bucharest MT
+			2:00	Romania	EE%sT	1940 Aug 15
+			2:00	1:00	EEST	1941 Jul 17
+			1:00	C-Eur	CE%sT	1944 Aug 24
+			3:00	Russia	MSK/MSD	1991 Mar 31 2:00
+			2:00	Russia	EE%sT	1992 Jan 19 2:00
+			3:00	Russia	MSK/MSD
+
+# Croatia
+# Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
+Zone	Europe/Zagreb	1:03:52	-	LMT	1884
+			1:00	-	CET	1941 Apr 18 23:00
+			1:00	C-Eur	CE%sT	1945 May  8  2:00s
+			1:00	1:00	CEST	1945 Sep 16  2:00s
+			1:00	-	CET	1982 Nov 27
+			1:00	EU	CE%sT
-- 
1.8.1.2





More information about the tz mailing list