[tz] [PROPOSED 1/2] zic now supports links to links

Paul Eggert eggert at cs.ucla.edu
Mon Oct 24 06:08:35 UTC 2022


* NEWS, backward, zic.8: Mention this.
* zic.c (make_links):
New function, which supports links to later links.
In this function, improve quality of warnings about
links to links.
(main): Use it.
---
 NEWS     | 14 ++++++++
 backward |  6 ++--
 zic.8    | 29 ++++++++++++++---
 zic.c    | 97 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 4 files changed, 126 insertions(+), 20 deletions(-)

diff --git a/NEWS b/NEWS
index 64a2d0c..f400d71 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,7 @@ Unreleased, experimental changes
   Briefly:
     Move links to 'backward'.
     In vanguard form, GMT is now a Zone and Etc/GMT a link.
+    zic now supports links to links.
     Simplify four Ontario zones into one.
     Fix a Y2438 bug when reading TZif data.
 
@@ -28,6 +29,19 @@ Unreleased, experimental changes
 
   Changes to code
 
+    zic now supports links to links regardless of input line order.
+    For example, if Australia/Sydney is a Zone, the lines
+      Link Australia/Canberra Australia/ACT
+      Link Australia/Sydney Australia/Canberra
+    now work correctly, even though the shell commands
+      ln Australia/Canberra Australia/ACT
+      ln Australia/Sydney Australia/Canberra
+    would fail because the first command attempts to use a link
+    Australia/Canberra that does not exist until after the second
+    command is executed.  Previously, zic had unspecified behavior if
+    a Link line's target was another link, and zic often misbehaved if
+    a Link line's target was a later Link line.
+
     Fix line number in zic's diagnostic for a link to a link.
 
     Fix a bug that caused localtime to mishandle timestamps starting
diff --git a/backward b/backward
index 101cbc0..4c1c5d5 100644
--- a/backward
+++ b/backward
@@ -17,9 +17,9 @@
 # backward compatibility link.  Each section is sorted by link name.
 
 # A "#= TARGET1" comment labels each link inserted only because some
-# .zi parsers mishandle links to links.  The comment says what the
-# target would be if these parsers were fixed so that data could
-# contain links to links.  For example, the line
+# .zi parsers (including tzcode through 2022e) mishandle links to links.
+# The comment says what the target would be if these parsers were fixed
+# so that data could contain links to links.  For example, the line
 # "Link Australia/Sydney Australia/ACT #= Australia/Canberra" would be
 # "Link Australia/Canberra Australia/ACT" were it not that data lines
 # refrain from linking to links like Australia/Canberra, which means
diff --git a/zic.8 b/zic.8
index e46d0ab..f79148f 100644
--- a/zic.8
+++ b/zic.8
@@ -190,7 +190,10 @@ the named file rather than in the standard location.
 Be more verbose, and complain about the following situations:
 .RS
 .PP
-The input specifies a link to a link.
+The input specifies a link to a link,
+something not supported by some older parsers, including
+.B zic
+itself through release 2022e.
 .PP
 A year that appears in a data file is outside the range
 of representable years.
@@ -691,19 +694,37 @@ The
 .B TARGET
 field should appear as the
 .B NAME
-field in some zone line.
+field in some zone line or as the
+.B LINK-NAME
+field in some link line.
 The
 .B LINK-NAME
 field is used as an alternative name for that zone;
 it has the same syntax as a zone line's
 .B NAME
 field.
+Links can chain together, although the behavior is unspecified if a
+chain of one or more links does not terminate in a Zone name.
+A link line can appear before the line that defines the link target.
+For example:
+.sp
+.ne 3
+.nf
+.in +2m
+.ta \w'Zone\0\0'u +\w'Greenwich\0\0'u
+Link	Greenwich	G_M_T
+Link	Etc/GMT	Greenwich
+Zone	Etc/GMT\0\00\0\0\*-\0\0GMT
+.sp
+.in
+.fi
+The two links are chained together, and G_M_T, Greenwich, and Etc/GMT
+all name the same zone.
 .PP
 Except for continuation lines,
 lines may appear in any order in the input.
 However, the behavior is unspecified if multiple zone or link lines
-define the same name, or if the source of one link line is the target
-of another.
+define the same name.
 .PP
 The file that describes leap seconds can have leap lines and an
 expiration line.
diff --git a/zic.c b/zic.c
index 82e1a4d..78afa67 100644
--- a/zic.c
+++ b/zic.c
@@ -678,6 +678,89 @@ bsearch_linkcmp(void const *key, void const *b)
   return strcmp(key, m->l_linkname);
 }
 
+/* Make the links specified by the Link lines.  */
+static void
+make_links(void)
+{
+  ptrdiff_t i, j, nalinks;
+  qsort(links, nlinks, sizeof *links, qsort_linkcmp);
+
+  /* Ignore each link superseded by a later link with the same name.  */
+  j = 0;
+  for (i = 0; i < nlinks; i++) {
+    while (i + 1 < nlinks
+	   && strcmp(links[i].l_linkname, links[i + 1].l_linkname) == 0)
+      i++;
+    links[j++] = links[i];
+  }
+  nlinks = j;
+
+  /* Walk through the link array making links.  However,
+     if a link's target has not been made yet, append a copy to the
+     end of the array.  The end of the array will gradually fill
+     up with a small sorted subsequence of not-yet-made links.
+     nalinks counts all the links in the array, including copies.
+     When we reach the copied subsequence, it may still contain
+     a link to a not-yet-made link, so the process repeats.
+     At any given point in time, the link array consists of the
+     following subregions, where 0 <= i <= j <= nalinks and
+     0 <= nlinks <= nalinks:
+
+       0 .. (i - 1):
+	 links that either have been made, or have been copied to a
+	 later point point in the array (this later point can be in
+	 any of the three subregions)
+       i .. (j - 1):
+	 not-yet-made links for this pass
+       j .. (nalinks - 1):
+	 not-yet-made links that this pass has skipped because
+	 they were links to not-yet-made links
+
+     The first subregion might not be sorted if nlinks < i;
+     the other two subregions are sorted.  This algorithm does
+     not alter entries 0 .. (nlinks - 1), which remain sorted.
+
+     If there are L links, this algorithm is O(C*L*log(L)) where
+     C is the length of the longest link chain.  Usually C is
+     short (e.g., 3) though its worst-case value is L.  */
+
+  j = nalinks = nlinks;
+
+  for (i = 0; i < nalinks; i++) {
+    struct link *l;
+
+    eat(links[i].l_filenum, links[i].l_linenum);
+
+    /* If this pass examined all its links, start the next pass.  */
+    if (i == j)
+      j = nalinks;
+
+    /* Make this link unless its target has not been made yet.  */
+    l = bsearch(links[i].l_target, &links[i + 1], j - (i + 1),
+		sizeof *links, bsearch_linkcmp);
+    if (!l)
+      l = bsearch(links[i].l_target, &links[j], nalinks - j,
+		  sizeof *links, bsearch_linkcmp);
+    if (!l)
+      dolink(links[i].l_target, links[i].l_linkname, false);
+    else {
+      /* The link target has not been made yet; copy the link to the end.  */
+      links = growalloc (links, sizeof *links, nalinks, &nlinks_alloc);
+      links[nalinks++] = links[i];
+    }
+
+    if (noise && i < nlinks) {
+      if (l)
+	warning(_("link %s targeting link %s mishandled by pre-2023 zic"),
+		links[i].l_linkname, links[i].l_target);
+      else if (bsearch(links[i].l_target, links, nlinks, sizeof *links,
+		       bsearch_linkcmp))
+	warning(_("link %s targeting link %s"),
+		links[i].l_linkname, links[i].l_target);
+    }
+  }
+}
+
 /* Simple signal handling: just set a flag that is checked
    periodically outside critical sections.  To set up the handler,
    prefer sigaction if available to close a signal race.  */
@@ -992,19 +1075,7 @@ _("%s: invalid time range: %s\n"),
 			continue;
 		outzone(&zones[i], j - i);
 	}
-	/*
-	** Make links.
-	*/
-	if (noise)
-	  qsort(links, nlinks, sizeof *links, qsort_linkcmp);
-	for (i = 0; i < nlinks; ++i) {
-		eat(links[i].l_filenum, links[i].l_linenum);
-		dolink(links[i].l_target, links[i].l_linkname, false);
-		if (noise
-		    && bsearch(links[i].l_target, links, nlinks, sizeof *links,
-			       bsearch_linkcmp))
-		  warning(_("link to link"));
-	}
+	make_links();
 	if (lcltime != NULL) {
 		eat(COMMAND_LINE_FILENUM, 1);
 		dolink(lcltime, tzdefault, true);
-- 
2.37.3



More information about the tz mailing list