[tz] disclaimer removals
Paul Eggert
eggert at cs.ucla.edu
Thu Jun 26 21:54:40 UTC 2014
Christos Zoulas wrote:
> Please fix it so that it does not pass char into the isfoo() functions
The code doesn't pass char into the isfoo() functions. But your message
and its followups suggest that the code is overly confusing (especially
the bit about isascii, which nowadays never tests whether its argument
is ASCII!). And there is one real bug in this area: the code uses
predicates like isalpha whose behavior can depend on the locale if
HAVE_GETTEXT is true, and that dependency was not intended.
I installed the attached patch to try to fix these problems. The basic
idea was to discard <ctype.h> as being more trouble than it's worth. A
nice little byproduct of this is that we no longer need to worry about
ctype.h defining a macro 'isleap' that's incompatible with ours,
something POSIX says it's entitled to do.
-------------- next part --------------
From 59a38b5ea7d304ef7fb59f72dcc50f80a3268e06 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert at cs.ucla.edu>
Date: Thu, 26 Jun 2014 14:41:07 -0700
Subject: [PATCH] Avoid <ctype.h> locale problems.
* zdump.c, zic.c: Don't include <ctype.h>, because the behavior if
its macros is locale-dependent if HAVE_GETTEXT, and we want the C
locale's semantics. Instead, use portable replacements for
ctype.h operations, with the desired semantics. All uses of
isalpha etc. replaced by calls to new functions is_alpha etc. or
by inline code.
(isascii): Remove.
(is_alpha): New function.
* zic.c (doabbr): Simplify by using is_alpha.
(is_space): New function.
---
zdump.c | 32 ++++++++++++++------
zic.c | 105 ++++++++++++++++++++++++++++++++++++++++++----------------------
2 files changed, 92 insertions(+), 45 deletions(-)
diff --git a/zdump.c b/zdump.c
index f3e2a24..bde827a 100644
--- a/zdump.c
+++ b/zdump.c
@@ -24,10 +24,6 @@
#include "time.h" /* for struct tm */
#include "stdlib.h" /* for exit, malloc, atoi */
#include "limits.h" /* for CHAR_BIT, LLONG_MAX */
-#include "ctype.h" /* for isalpha et al. */
-#ifndef isascii
-#define isascii(x) 1
-#endif /* !defined isascii */
/*
** Substitutes for pre-C99 compilers.
@@ -220,6 +216,25 @@ static void show(char * zone, time_t t, int v);
static const char * tformat(void);
static time_t yeartot(intmax_t y) ATTRIBUTE_PURE;
+/* Is A an alphabetic character in the C locale? */
+static int
+is_alpha(char a)
+{
+ switch (a) {
+ default:
+ return 0;
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ return 1;
+ }
+}
+
#ifndef TYPECHECK
#define my_localtime localtime
#else /* !defined TYPECHECK */
@@ -266,7 +281,7 @@ abbrok(const char *const abbrp, const char *const zone)
return;
cp = abbrp;
wp = NULL;
- while (isascii((unsigned char) *cp) && isalpha((unsigned char) *cp))
+ while (is_alpha(*cp))
++cp;
if (cp - abbrp == 0)
wp = _("lacks alphabetic at start");
@@ -276,10 +291,9 @@ abbrok(const char *const abbrp, const char *const zone)
wp = _("has more than 6 alphabetics");
if (wp == NULL && (*cp == '+' || *cp == '-')) {
++cp;
- if (isascii((unsigned char) *cp) &&
- isdigit((unsigned char) *cp))
- if (*cp++ == '1' && *cp >= '0' && *cp <= '4')
- ++cp;
+ if ('0' <= *cp && *cp <= '9')
+ if (*cp++ == '1' && '0' <= *cp && *cp <= '4')
+ cp++;
if (*cp != '\0')
wp = _("differs from POSIX standard");
}
diff --git a/zic.c b/zic.c
index fa25cd5..9c22e7a 100644
--- a/zic.c
+++ b/zic.c
@@ -31,19 +31,6 @@ typedef int_fast64_t zic_t;
#define MKDIR_UMASK 0755
#endif
-/*
-** On some ancient hosts, predicates like 'isspace(C)' are defined
-** only if isascii(C) || C == EOF. Modern hosts obey the C Standard,
-** which says they are defined only if C == ((unsigned char) C) || C == EOF.
-** Neither the C Standard nor Posix require that 'isascii' exist.
-** For portability, we check both ancient and modern requirements.
-** If isascii is not defined, the isascii check succeeds trivially.
-*/
-#include "ctype.h"
-#ifndef isascii
-#define isascii(x) 1
-#endif
-
#define end(cp) (strchr((cp), '\0'))
struct rule {
@@ -132,7 +119,8 @@ static int inzcont(char ** fields, int nfields);
static int inzone(char ** fields, int nfields);
static int inzsub(char ** fields, int nfields, int iscont);
static int itsdir(const char * name);
-static int lowerit(int c);
+static int is_alpha(char a);
+static char lowerit(char);
static int mkdirs(char * filename);
static void newabbr(const char * abbr);
static zic_t oadd(zic_t t1, zic_t t2);
@@ -640,16 +628,25 @@ static void
namecheck(const char *name)
{
register char const *cp;
- static char const benign[] = ("-/_"
- "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+
+ /* Benign characters in a portable file name. */
+ static char const benign[] =
+ "-/_"
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+ /* Non-control chars in the POSIX portable character set,
+ excluding the benign characters. */
+ static char const printable_and_not_benign[] =
+ " !\"#$%&'()*+,.0123456789:;<=>?@[\\]^`{|}~";
+
register char const *component = name;
if (!noise)
return;
for (cp = name; *cp; cp++) {
unsigned char c = *cp;
if (!strchr(benign, c)) {
- warning((isascii(c) && isprint(c)
+ warning((strchr(printable_and_not_benign, c)
? _("file name '%s' contains byte '%c'")
: _("file name '%s' contains byte '\\%o'")),
name, c);
@@ -1862,10 +1859,8 @@ doabbr(char *const abbr, const char *const format, const char *const letters,
}
if (!doquotes)
return;
- for (cp = abbr; *cp != '\0'; ++cp)
- if (strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", *cp) == NULL &&
- strchr("abcdefghijklmnopqrstuvwxyz", *cp) == NULL)
- break;
+ for (cp = abbr; is_alpha(*cp); cp++)
+ continue;
len = strlen(abbr);
if (len > 0 && *cp == '\0')
return;
@@ -2582,11 +2577,54 @@ yearistype(const int year, const char *const type)
exit(EXIT_FAILURE);
}
+/* Is A a space character in the C locale? */
+static int
+is_space(char a)
+{
+ switch (a) {
+ default:
+ return 0;
+ case ' ': case '\f': case '\n': case '\r': case '\t': case '\v':
+ return 1;
+ }
+}
+
+/* Is A an alphabetic character in the C locale? */
static int
-lowerit(int a)
+is_alpha(char a)
+{
+ switch (a) {
+ default:
+ return 0;
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+ case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ return 1;
+ }
+}
+
+/* If A is an uppercase character in the C locale, return its lowercase
+ counterpart. Otherwise, return A. */
+static char
+lowerit(char a)
{
- a = (unsigned char) a;
- return (isascii(a) && isupper(a)) ? tolower(a) : a;
+ switch (a) {
+ default: return a;
+ case 'A': return 'a'; case 'B': return 'b'; case 'C': return 'c';
+ case 'D': return 'd'; case 'E': return 'e'; case 'F': return 'f';
+ case 'G': return 'g'; case 'H': return 'h'; case 'I': return 'i';
+ case 'J': return 'j'; case 'K': return 'k'; case 'L': return 'l';
+ case 'M': return 'm'; case 'N': return 'n'; case 'O': return 'o';
+ case 'P': return 'p'; case 'Q': return 'q'; case 'R': return 'r';
+ case 'S': return 's'; case 'T': return 't'; case 'U': return 'u';
+ case 'V': return 'v'; case 'W': return 'w'; case 'X': return 'x';
+ case 'Y': return 'y'; case 'Z': return 'z';
+ }
}
/* case-insensitive equality */
@@ -2653,8 +2691,7 @@ getfields(register char *cp)
array = emalloc(size_product(strlen(cp) + 1, sizeof *array));
nsubs = 0;
for ( ; ; ) {
- while (isascii((unsigned char) *cp) &&
- isspace((unsigned char) *cp))
+ while (is_space(*cp))
++cp;
if (*cp == '\0' || *cp == '#')
break;
@@ -2671,9 +2708,8 @@ getfields(register char *cp)
));
exit(1);
}
- } while (*cp != '\0' && *cp != '#' &&
- (!isascii(*cp) || !isspace((unsigned char) *cp)));
- if (isascii(*cp) && isspace((unsigned char) *cp))
+ } while (*cp && *cp != '#' && !is_space(*cp));
+ if (is_space(*cp))
++cp;
*dp = '\0';
}
@@ -2806,8 +2842,7 @@ newabbr(const char *const string)
*/
cp = string;
mp = NULL;
- while (isascii((unsigned char) *cp) &&
- isalpha((unsigned char) *cp))
+ while (is_alpha(*cp))
++cp;
if (cp - string == 0)
mp = _("time zone abbreviation lacks alphabetic at start");
@@ -2817,8 +2852,7 @@ mp = _("time zone abbreviation has fewer than 3 alphabetics");
mp = _("time zone abbreviation has too many alphabetics");
if (mp == NULL && (*cp == '+' || *cp == '-')) {
++cp;
- if (isascii((unsigned char) *cp) &&
- isdigit((unsigned char) *cp))
+ if (is_digit(*cp))
if (*cp++ == '1' &&
*cp >= '0' && *cp <= '4')
++cp;
@@ -2852,8 +2886,7 @@ mkdirs(char *argname)
/*
** DOS drive specifier?
*/
- if (isalpha((unsigned char) name[0]) &&
- name[1] == ':' && name[2] == '\0') {
+ if (is_alpha(name[0]) && name[1] == ':' && name[2] == '\0') {
*cp = '/';
continue;
}
--
1.9.1
More information about the tz
mailing list