[tz] Simplification and unification of scheme:// anchors

Wed Jan 30 11:28:36 UTC 2013

Ian Abbott <abbotti at mev.co.uk> wrote:
 |On 28/01/13 11:44, Steffen Daode Nurpmeso wrote:
 |> Ian Abbott <abbotti at mev.co.uk> wrote:
 |>|On 2013-01-27 00:45, Steffen Daode Nurpmeso wrote:
 |>|> Here is the promised patch.
 |>|> I would be happy if it would be included, so that i finally would
 |>|> have given something back to this project.
 [.]
 |While on the subject, the backslash escapes at the ends of the lines 
 |with a <URL> with a parenthesised comment on the following line is kind 
 |of ugly.  I'm sure it must be possible to re-work your script to avoid 
 |the need for that.  (I.e. if a line ends with a <URL> plus optional 
 |whitespace, check if the following line starts with optional whitespace 
 |plus parenthesised link text.)

Hmm.
So i've reworked the (Pod-less) script to support multiple follow
lines in the middle of nowhere, and changed the two links from
which i remembered that it did matter.

This updated version also fixes the "trailing empty line after
rules are included in data boxes" issue.
And it uses normal text paragraphs for the comment text, forcing
newline breaks via <br />, instead of using preformatted text for
that, which makes it even nicer, since some of the dramatically
long links will now be wrapped by browsers.

 [.]
 |Then again, perhaps parentheses aren't the ideal delimiters for link 
 |text anyway, as I'm sure there must be cases in the data files where 
 |this was not the intended interpretation of the parenthesised text 
 |following a URL.

On sunday night i recognized that some .sy domains where
reachable, but the english versions didn't; from within Germany.
So maybe using the *check* mode doesn't really make sense, since
results may vary from time to time.
But i don't know.

 |-- 
 |-=( Ian Abbott @ MEV Ltd.    E-mail: <abbotti at mev.co.uk>        )=-
 |-=( Tel: +44 (0)161 477 1898   FAX: +44 (0)161 718 3587         )=-

Ciao,

--steffen

diff --git a/northamerica b/northamerica
index 6aaf665..46cfaa9 100644
--- a/northamerica
+++ b/northamerica
@@ -59,7 +59,8 @@
 #
 # For more about the first ten years of DST in the United States, see
 # Robert Garland's <http://www.clpgh.org/exhibit/dst.html> \
-# (``Ten years of daylight saving from the Pittsburgh standpoint'', Carnegie Library of Pittsburgh, 1927).
+# (``Ten years of daylight saving from the Pittsburgh standpoint'', \
+# Carnegie Library of Pittsburgh, 1927).
 #
 # Shanks says that DST was called "War Time" in the US in 1918 and 1919.
 # However, DST was imposed by the Standard Time Act of 1918, which
@@ -79,9 +80,9 @@
 # Last night I heard part of a rebroadcast of a 1945 Arch Oboler radio drama.
 # In the introduction, Oboler spoke of "Eastern Peace Time."
 # An AltaVista search turned up
-# <http://rowayton.org/rhs/hstaug45.html>:
-# ``When the time is announced over the radio now, it is 'Eastern Peace
-# Time' instead of the old familiar 'Eastern War Time.'  Peace is wonderful.''
+# <http://rowayton.org/rhs/hstaug45.html> \
+# (``When the time is announced over the radio now, it is 'Eastern Peace \
+# Time' instead of the old familiar 'Eastern War Time.'  Peace is wonderful.'')
 # (August 1945) by way of confirmation.
 
 # From Joseph Gallant citing
diff --git a/workht.pl b/workht.pl
index 56ee83c..7c742c6 100644
--- a/workht.pl
+++ b/workht.pl
@@ -3,7 +3,7 @@ require 5.008_001;
 #@ workht.pl - URL checker / output dumper for tz data files.
 #@ Public domain, 2013, Steffen Nurpmeso.
 #@ Synopsis:
-#@    workht.pl html   < DATA_FILE | elinks -force-html -dump 1
+#@    workht.pl html   < DATA_FILE | elinks -force-html -dump 0
 #@    workht.pl check  < DATA_FILE > NEW_DATA_FILE
 #@ The *check* mode requires an installed curl(1) (<http://curl.haxx.se>);
 #@ Input data notes:
@@ -17,11 +17,9 @@ require 5.008_001;
 #@   work.)
 #@ - A link may be followed by WS and a link text in parenthesis ('\([^)]*?\)');
 #@   If no link text exists, the URL is used as the link content, too.
-#@   Note this only works in *html* mode, otherwise it'll always be the URL,
-#@   and the text in parenthesis will be left as is.
 #@ - A link may also be followed by WS, a backslash and a LF ('\s*\\$'),
-#@   in which case the link text in parenthesis may be placed on the very next
-#@   line.
+#@   in which case the link text in parenthesis may be placed on follow lines;
+#@   multiple follow lines with "backslash escaped newlines" may be used.
 #@ Note: slurps the entire data into memory.
 
 my $SCHEME_CHECKER = 'curl -q --silent --fail --head --location';
@@ -76,7 +74,7 @@ sub main_fun {
 sub usage {
    print STDERR <<__EOT__;
 Synopsis:
-   workht.pl html   < DATA_FILE | elinks -force-html -dump 1
+   workht.pl html   < DATA_FILE | elinks -force-html -dump 0
    workht.pl check  < DATA_FILE > NEW_DATA_FILE
 
 The *html* mode generates a very simple HTML page with hyperlinks.
@@ -94,39 +92,46 @@ sub mode_html {
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
 <style>
-body {margin:0; margin-left:5%; padding:0; width:88%}
-pre {line-height:1.4em; font-family:serif; font-size:100%}
-.indat {padding:1em; border:1px solid black; background-color:#F0F0F0;
-   font-family:monospace; font-size:90%}
+body {margin:0; margin-left:5%; padding:1em; width:88%}
+p, pre {line-height:1.5em; font-family:serif; font-size:100%}
+pre {padding:1em; border:1px solid black; background-color:#F0F0F0;
+   line-height:1.3em; font-family:monospace; font-size:90%}
 </style>
 <body>
 __EOT__
 
-   my ($indat, $intxt) = (0, 0);
+   my ($indat, $intxt, $prefix) = (0, 0);
    while (defined(my $lo = shift @$INPUT)) {
+      $prefix = '';
       if (! $lo->{ISCOMM}) {
          if ($intxt) {
             $intxt = 0;
-            die unless print "</pre>\n";
+            $prefix = "</p>";
          }
-         if ($lo->{DATA} !~ /^\s*$/ && ! $indat) {
-            $indat = 1;
-            die unless print "<pre class=indat>\n";
+         if ($lo->{DATA} !~ /^\s*$/) {
+            if (! $indat) {
+               $indat = 1;
+               $prefix .= "<pre>";
+            }
+         } elsif ($indat) {
+            $indat = 0;
+            $prefix .= "</pre>";
          }
-         die unless print $lo->{DATA}, "\n";
+         die unless print $prefix, $lo->{DATA}, "\n";
          next;
       }
+
       if ($indat) {
          $indat = 0;
-         die unless print "</pre>\n";
+         $prefix = "</pre>\n";
       }
       if (! $intxt) {
          $intxt = 1;
-         die unless print "<pre>\n";
+         $prefix .= "<p>\n";
       }
 
       my ($l, $rest) = ('', substr $lo->{DATA}, $lo->{ISCOMM});
-      Line::join_follow(\$lo, \$rest, $INPUT) if $lo->{FOLLOW};
+      Line::join_follow(\$rest, $lo, $INPUT) if $lo->{FOLLOW};
 
       while ($rest =~ $SCHEME_URL) {
          $l .= $1 ? $1 : '';
@@ -142,7 +147,7 @@ __EOT__
          $l .= '<a href="' . $url . '">' . $text . '</a>';
       }
       $l .= $rest if $rest;
-      die unless print $l, "\n";
+      die unless print $prefix, $l, "<br />\n";
    }
 
    print <<__EOT__;
@@ -209,35 +214,30 @@ sub mode_check {
       while (@ld) {
          $il = shift @ld;
          chomp $il;
-   jloop:
          $ol = new Line;
          push @xd, $ol;
          $ol->{DATA} = $il;
-         next unless (($ol->{ISCOMM} = ($il =~ /^(\s*#\s*)/) ? length $1 : 0));
-         next unless (($ol->{FOLLOW} = ($il =~ /(\s*\\\s*)$/) ? length $1 : 0));
-         if (@ld) {
-            $il = shift @ld;
-            chomp $il;
-            goto jloop if $il =~ $SCHEME_TEXT;
-         } else {
-            $il = undef;
-         }
+         $ol->{ISCOMM} = ($il =~ /^(\s*#\s*)/) ? length $1 : 0;
+         $ol->{FOLLOW} = ($il =~ /\s*(\\\s*)$/) ? length $1 : 0;
+      }
+      if (defined $ol && $ol->{FOLLOW}) {
          $ol->{FOLLOW} = 0;
-         print STDERR "! False line continuation after: $ol->{DATA}\n";
+         print STDERR "! File ends with backslash escaped follow line\n";
          $ESTAT = 1;
-         goto jloop if defined $il;
       }
       $INPUT = \@xd;
    }
 
    sub join_follow {
-      my ($sr, $lr, $lar) = @_;
-      if (${$sr}->{FOLLOW}) {
-         $$lr = substr $$lr, 0, -${$sr}->{FOLLOW};
-         $$lr .= ' ';
-         $$sr = shift @$lar;
-         $$lr .= substr ${$sr}->{DATA}, ${$sr}->{ISCOMM};
-      }
+      my ($lr, $lor, $lar) = @_;
+      $$lr = substr $$lr, 0, -$lor->{FOLLOW};
+      do {
+         $lor = shift @$lar;
+         my $l = $lor->{DATA};
+         $l = $lor->{FOLLOW} ? substr $l, $lor->{ISCOMM}, -$lor->{FOLLOW}
+               : substr $l, $lor->{ISCOMM};
+         $$lr .= $l;
+      } while $lor->{FOLLOW};
    }
 }