From 571e9d4c6010f2953e9bf7faa905780019d1057c Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 1 Apr 2022 13:01:50 -0700 Subject: [PROPOSED 1/2] zic now checks input bytes more carefully Also, it allows input lines up to 2048 bytes, which is the current POSIX minimum limit (the old was 512). * zic.c (_POSIX2_LINE_MAX): New macro, if not already defined. (inputline): New function. (infile): Prefer it to fgets. Buffer size is now _POSIX2_LINE_MAX. --- NEWS | 8 ++++++++ zic.8 | 3 ++- zic.c | 54 ++++++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index 34c1cf2..2a5ba99 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,13 @@ News for the tz database +Unreleased, experimental changes + + Changes to code + + zic now checks its input for NUL bytes and unterminated lines, and + now supports input line lengths up to 2048 (not 512) bytes. + + Release 2022a - 2022-03-15 23:02:01 -0700 Briefly: diff --git a/zic.8 b/zic.8 index a0a9743..5e6997e 100644 --- a/zic.8 +++ b/zic.8 @@ -253,7 +253,8 @@ format. .PP Input files should be text files, that is, they should be a series of zero or more lines, each ending in a newline byte and containing at -most 511 bytes, and without any NUL bytes. The input text's encoding +most 2048 bytes counting the newline, and without any NUL bytes. +The input text's encoding is typically UTF-8 or ASCII; it should have a unibyte representation for the POSIX Portable Character Set (PPCS) \* diff --git a/zic.c b/zic.c index 2d1a187..50c4675 100644 --- a/zic.c +++ b/zic.c @@ -58,6 +58,11 @@ static ptrdiff_t const PTRDIFF_MAX = MAXVAL(ptrdiff_t, TYPE_BIT(ptrdiff_t)); # define _Alignof(type) offsetof(struct { char a; type b; }, b) #endif +/* The maximum length of a text line, including the trailing newline. */ +#ifndef _POSIX2_LINE_MAX +# define _POSIX2_LINE_MAX 2048 +#endif + /* The type for line numbers. Use PRIdMAX to format them; formerly there was also "#define PRIdLINENO PRIdMAX" and formats used PRIdLINENO, but xgettext cannot grok that. */ @@ -1304,17 +1309,49 @@ associate(void) exit(EXIT_FAILURE); } +/* Read a text line from FP into BUF, which is of size BUFSIZE. + Terminate it with a NUL byte instead of a newline. + Return the line's length, not counting the NUL byte. + On EOF, return a negative number. + On error, report the error and exit. */ +static ptrdiff_t +inputline(FILE *fp, char *buf, ptrdiff_t bufsize) +{ + ptrdiff_t linelen = 0, ch; + while ((ch = getc(fp)) != '\n') { + if (ch < 0) { + if (ferror(fp)) { + error(_("input error")); + exit(EXIT_FAILURE); + } + if (linelen == 0) + return -1; + error(_("unterminated line")); + exit(EXIT_FAILURE); + } + if (!ch) { + error(_("NUL input byte")); + exit(EXIT_FAILURE); + } + buf[linelen++] = ch; + if (linelen == bufsize) { + error(_("line too long")); + exit(EXIT_FAILURE); + } + } + buf[linelen] = '\0'; + return linelen; +} + static void infile(const char *name) { register FILE * fp; register char ** fields; - register char * cp; register const struct lookup * lp; register int nfields; register bool wantcont; register lineno num; - char buf[BUFSIZ]; if (strcmp(name, "-") == 0) { name = _("standard input"); @@ -1328,15 +1365,12 @@ infile(const char *name) } wantcont = false; for (num = 1; ; ++num) { + ptrdiff_t linelen; + char buf[_POSIX2_LINE_MAX]; eat(name, num); - if (fgets(buf, sizeof buf, fp) != buf) - break; - cp = strchr(buf, '\n'); - if (cp == NULL) { - error(_("line too long")); - exit(EXIT_FAILURE); - } - *cp = '\0'; + linelen = inputline(fp, buf, sizeof buf); + if (linelen < 0) + break; fields = getfields(buf); nfields = 0; while (fields[nfields] != NULL) { -- 2.35.1