Back-of-the-envelope cost of extra data :-)
Arthur David Olson
olsona at lecserver.nci.nih.gov
Mon May 9 15:19:45 UTC 2005
One way to reduce data file growth is to store just 5 bytes of time_t values
rather than 8. Since 32 bits buys about 70 years of time on each side of 1970,
40 bits buys about 256 times 70 or about 17920 years of time on each side of
1970. There will be a need to change the file format in the future
(the Y20K problem).
Recall that the eight-bit files resulted in a "du" report value of 1709.
With the changes attached below in place, we get...
Script started on Mon May 09 11:02:55 2005
du lecserver$ du -s -k tz/tmp/etc/zoneinfo tzexp/tmp/etc/zoneinfo
489 tz/tmp/etc/zoneinfo
1281 tzexp/tmp/etc/zoneinfo
lecserver$ exit
script done on Mon May 09 11:03:07 2005
...meaning that about 35% of the increase has been eliminated.
--ado
------- tzfile.5 -------
*** /tmp/geta29273 Mon May 9 11:01:02 2005
--- /tmp/getb29273 Mon May 9 11:01:02 2005
***************
*** 137,145 ****
For version-2-format time zone files,
the above header and data is followed by a second header and data,
identical in format except that
! eight bytes are used for each transition time or leap second time.
.SH SEE ALSO
newctime(3)
! .\" @(#)tzfile.5 7.13
.\" This file is in the public domain, so clarified as of
.\" 1996-06-05 by Arthur David Olson (arthur_david_olson at nih.gov).
--- 137,145 ----
For version-2-format time zone files,
the above header and data is followed by a second header and data,
identical in format except that
! five bytes are used for each transition time or leap second time.
.SH SEE ALSO
newctime(3)
! .\" @(#)tzfile.5 7.14
.\" This file is in the public domain, so clarified as of
.\" 1996-06-05 by Arthur David Olson (arthur_david_olson at nih.gov).
------- tzfile.h -------
*** /tmp/geta29292 Mon May 9 11:01:15 2005
--- /tmp/getb29292 Mon May 9 11:01:15 2005
***************
*** 21,27 ****
#ifndef lint
#ifndef NOID
! static char tzfilehid[] = "@(#)tzfile.h 7.19";
#endif /* !defined NOID */
#endif /* !defined lint */
--- 21,27 ----
#ifndef lint
#ifndef NOID
! static char tzfilehid[] = "@(#)tzfile.h 7.20";
#endif /* !defined NOID */
#endif /* !defined lint */
***************
*** 87,93 ****
/*
** If tzh_version is '2' or greater, the above is followed by a second instance
** of tzhead and a second instance of the data in which each coded transition
! ** time uses 8 rather than 4 chars.
*/
/*
--- 87,93 ----
/*
** If tzh_version is '2' or greater, the above is followed by a second instance
** of tzhead and a second instance of the data in which each coded transition
! ** time uses 5 rather than 4 chars.
*/
/*
------- zic.c -------
*** /tmp/geta29311 Mon May 9 11:01:25 2005
--- /tmp/getb29311 Mon May 9 11:01:25 2005
***************
*** 1,4 ****
! static char elsieid[] = "@(#)zic.c 7.127";
#include "private.h"
#include "locale.h"
--- 1,4 ----
! static char elsieid[] = "@(#)zic.c 7.128";
#include "private.h"
#include "locale.h"
***************
*** 99,105 ****
static void associate P((void));
static int ciequal P((const char * ap, const char * bp));
static void convert P((long val, char * buf));
! static void convert64 P((zic_t val, char * buf));
static void dolink P((const char * fromfile, const char * tofile));
static void doabbr P((char * abbr, const char * format,
const char * letters, int isdst));
--- 99,105 ----
static void associate P((void));
static int ciequal P((const char * ap, const char * bp));
static void convert P((long val, char * buf));
! static void convert40 P((zic_t val, char * buf));
static void dolink P((const char * fromfile, const char * tofile));
static void doabbr P((char * abbr, const char * format,
const char * letters, int isdst));
***************
*** 128,134 ****
static long oadd P((long t1, long t2));
static void outzone P((const struct zone * zp, int ntzones));
static void puttzcode P((long code, FILE * fp));
! static void puttzcode64 P((zic_t code, FILE * fp));
static int rcomp P((const void * leftp, const void * rightp));
static zic_t rpytime P((const struct rule * rp, int wantedy));
static void rulesub P((struct rule * rp,
--- 128,134 ----
static long oadd P((long t1, long t2));
static void outzone P((const struct zone * zp, int ntzones));
static void puttzcode P((long code, FILE * fp));
! static void puttzcode40 P((zic_t code, FILE * fp));
static int rcomp P((const void * leftp, const void * rightp));
static zic_t rpytime P((const struct rule * rp, int wantedy));
static void rulesub P((struct rule * rp,
***************
*** 1397,1403 ****
}
static void
! convert64(val, buf)
const zic_t val;
char * const buf;
{
--- 1397,1403 ----
}
static void
! convert40(val, buf)
const zic_t val;
char * const buf;
{
***************
*** 1404,1410 ****
register int i;
register int shift;
! for (i = 0, shift = 56; i < 8; ++i, shift -= 8)
buf[i] = val >> shift;
}
--- 1404,1410 ----
register int i;
register int shift;
! for (i = 0, shift = 32; i < 5; ++i, shift -= 8)
buf[i] = val >> shift;
}
***************
*** 1420,1432 ****
}
static void
! puttzcode64(val, fp)
const zic_t val;
FILE * const fp;
{
! char buf[8];
! convert64(val, buf);
(void) fwrite((void *) buf, (size_t) sizeof buf, (size_t) 1, fp);
}
--- 1420,1432 ----
}
static void
! puttzcode40(val, fp)
const zic_t val;
FILE * const fp;
{
! char buf[5];
! convert40(val, buf);
(void) fwrite((void *) buf, (size_t) sizeof buf, (size_t) 1, fp);
}
***************
*** 1603,1609 ****
for (i = thistimei; i < thistimelim; ++i)
if (pass == 1)
puttzcode((long) ats[i], fp);
! else puttzcode64(ats[i], fp);
if (thistimecnt > 0)
(void) fwrite((void *) &types[thistimei],
(size_t) sizeof types[0],
--- 1603,1609 ----
for (i = thistimei; i < thistimelim; ++i)
if (pass == 1)
puttzcode((long) ats[i], fp);
! else puttzcode40(ats[i], fp);
if (thistimecnt > 0)
(void) fwrite((void *) &types[thistimei],
(size_t) sizeof types[0],
***************
*** 1638,1645 ****
todo = tadd(trans[i], -gmtoffs[j]);
} else todo = trans[i];
if (pass == 1)
! puttzcode(todo, fp);
! else puttzcode64(todo, fp);
puttzcode(corr[i], fp);
}
for (i = 0; i < typecnt; ++i)
--- 1638,1645 ----
todo = tadd(trans[i], -gmtoffs[j]);
} else todo = trans[i];
if (pass == 1)
! puttzcode((long) todo, fp);
! else puttzcode40(todo, fp);
puttzcode(corr[i], fp);
}
for (i = 0; i < typecnt; ++i)
------- localtime.c -------
*** /tmp/geta29330 Mon May 9 11:01:36 2005
--- /tmp/getb29330 Mon May 9 11:01:36 2005
***************
*** 5,11 ****
#ifndef lint
#ifndef NOID
! static char elsieid[] = "@(#)localtime.c 7.92";
#endif /* !defined NOID */
#endif /* !defined lint */
--- 5,11 ----
#ifndef lint
#ifndef NOID
! static char elsieid[] = "@(#)localtime.c 7.93";
#endif /* !defined NOID */
#endif /* !defined lint */
***************
*** 125,131 ****
*/
static long detzcode P((const char * codep));
! static time_t detzcode64 P((const char * codep));
static const char * getzname P((const char * strp));
static const char * getnum P((const char * strp, int * nump, int min,
int max));
--- 125,131 ----
*/
static long detzcode P((const char * codep));
! static time_t detzcode40 P((const char * codep));
static const char * getzname P((const char * strp));
static const char * getnum P((const char * strp, int * nump, int min,
int max));
***************
*** 225,231 ****
}
static time_t
! detzcode64(codep)
const char * const codep;
{
register signed64_t result;
--- 225,231 ----
}
static time_t
! detzcode40(codep)
const char * const codep;
{
register signed64_t result;
***************
*** 232,238 ****
register int i;
result = (codep[0] & 0x80) ? ~0L : 0L;
! for (i = 0; i < 8; ++i)
result = (result << 8) | (codep[i] & 0xff);
return (time_t) result;
}
--- 232,238 ----
register int i;
result = (codep[0] & 0x80) ? ~0L : 0L;
! for (i = 0; i < 5; ++i)
result = (result << 8) | (codep[i] & 0xff);
return (time_t) result;
}
***************
*** 343,349 ****
nread = read(fid, u.buf, sizeof u.buf);
if (close(fid) < 0 || nread <= 0)
return -1;
! for (stored = 4; stored <= 8; stored *= 2) {
int ttisstdcnt;
int ttisgmtcnt;
--- 343,349 ----
nread = read(fid, u.buf, sizeof u.buf);
if (close(fid) < 0 || nread <= 0)
return -1;
! for (stored = 4; stored <= 5; ++stored) {
int ttisstdcnt;
int ttisgmtcnt;
***************
*** 372,378 ****
return -1;
for (i = 0; i < sp->timecnt; ++i) {
sp->ats[i] = (stored == 4) ?
! detzcode(p) : detzcode64(p);
p += stored;
}
for (i = 0; i < sp->timecnt; ++i) {
--- 372,378 ----
return -1;
for (i = 0; i < sp->timecnt; ++i) {
sp->ats[i] = (stored == 4) ?
! detzcode(p) : detzcode40(p);
p += stored;
}
for (i = 0; i < sp->timecnt; ++i) {
***************
*** 402,408 ****
lsisp = &sp->lsis[i];
lsisp->ls_trans = (stored == 4) ?
! detzcode(p) : detzcode64(p);
p += stored;
lsisp->ls_corr = detzcode(p);
p += 4;
--- 402,408 ----
lsisp = &sp->lsis[i];
lsisp->ls_trans = (stored == 4) ?
! detzcode(p) : detzcode40(p);
p += stored;
lsisp->ls_corr = detzcode(p);
p += 4;
More information about the tz
mailing list