[tz] User time zones

Thu Dec 8 16:19:28 UTC 2011

Attached is some Perl code to extract abbreviation usage from the
timezone database.  This is essentially what's required to produce the
index that would be required to decode abbreviations.  On the current
database it finds 316 distinct abbreviations, 173 referring to only one
offset and 143 referring to more than one.  A perusal of the output is
enlightening regarding how abbreviations are shared, both clashingly
and non-clashingly, between zones.

-zefram
-------------- next part --------------
#!/usr/bin/perl

{ use 5.006; }
use warnings;
use strict;

use Time::OlsonTZ::Data qw(olson_canonical_names olson_tzfile);
use DateTime::TimeZone::Tzfile ();

my %abbr;
foreach my $zonename (keys %{olson_canonical_names()}) {
	my $zone = DateTime::TimeZone::Tzfile->new(olson_tzfile($zonename));
	# icky dependence on DT:TZ:Tzfile internals,
	# liable to break on future versions of DT:TZ:Tzfile
	foreach my $obs (@{$zone->{obs_types}}) {
		next unless ref($obs) eq "ARRAY";
		my($offs, undef, $abbr) = @$obs;
		my $arec = $abbr{$abbr} ||= { offsets=>{}, zones=>{} };
		$arec->{offsets}->{$offs} = undef;
		$arec->{zones}->{$zonename} = undef;
	}
}

foreach my $abbr (sort keys %abbr) {
	my $arec = $abbr{$abbr};
	print $abbr, ": ", join(" ", map {
		my $neg = $_ < 0;
		my $mag = abs($_);
		use integer;
		my $txt = sprintf("%s%02d:%02d:%02d", $neg ? "-" : "+",
				$mag/3600, $mag/60%60, $mag%60);
		$txt =~ s/(?::00)+\z//;
		$txt;
	} sort { $a <=> $b } keys %{$arec->{offsets}}),
	" (", join(" ", sort keys %{$arec->{zones}}), ")\n";
}

exit 0;