Back-of-the-envelope cost of extra data :-)

Arthur David Olson olsona at lecserver.nci.nih.gov
Mon May 9 15:19:45 UTC 2005


One way to reduce data file growth is to store just 5 bytes of time_t values
rather than 8. Since 32 bits buys about 70 years of time on each side of 1970,
40 bits buys about 256 times 70 or about 17920 years of time on each side of
1970. There will be a need to change the file format in the future
(the Y20K problem).

Recall that the eight-bit files resulted in a "du" report value of 1709.
With the changes attached below in place, we get...

	Script started on Mon May 09 11:02:55 2005
	du lecserver$ du -s -k tz/tmp/etc/zoneinfo tzexp/tmp/etc/zoneinfo
	489	tz/tmp/etc/zoneinfo
	1281	tzexp/tmp/etc/zoneinfo
	lecserver$ exit

	script done on Mon May 09 11:03:07 2005

...meaning that about 35% of the increase has been eliminated.


				--ado

------- tzfile.5 -------
*** /tmp/geta29273	Mon May  9 11:01:02 2005
--- /tmp/getb29273	Mon May  9 11:01:02 2005
***************
*** 137,145 ****
  For version-2-format time zone files,
  the above header and data is followed by a second header and data,
  identical in format except that
! eight bytes are used for each transition time or leap second time.
  .SH SEE ALSO
  newctime(3)
! .\" @(#)tzfile.5	7.13
  .\" This file is in the public domain, so clarified as of
  .\" 1996-06-05 by Arthur David Olson (arthur_david_olson at nih.gov).
--- 137,145 ----
  For version-2-format time zone files,
  the above header and data is followed by a second header and data,
  identical in format except that
! five bytes are used for each transition time or leap second time.
  .SH SEE ALSO
  newctime(3)
! .\" @(#)tzfile.5	7.14
  .\" This file is in the public domain, so clarified as of
  .\" 1996-06-05 by Arthur David Olson (arthur_david_olson at nih.gov).

------- tzfile.h -------
*** /tmp/geta29292	Mon May  9 11:01:15 2005
--- /tmp/getb29292	Mon May  9 11:01:15 2005
***************
*** 21,27 ****
  
  #ifndef lint
  #ifndef NOID
! static char	tzfilehid[] = "@(#)tzfile.h	7.19";
  #endif /* !defined NOID */
  #endif /* !defined lint */
  
--- 21,27 ----
  
  #ifndef lint
  #ifndef NOID
! static char	tzfilehid[] = "@(#)tzfile.h	7.20";
  #endif /* !defined NOID */
  #endif /* !defined lint */
  
***************
*** 87,93 ****
  /*
  ** If tzh_version is '2' or greater, the above is followed by a second instance
  ** of tzhead and a second instance of the data in which each coded transition
! ** time uses 8 rather than 4 chars.
  */
  
  /*
--- 87,93 ----
  /*
  ** If tzh_version is '2' or greater, the above is followed by a second instance
  ** of tzhead and a second instance of the data in which each coded transition
! ** time uses 5 rather than 4 chars.
  */
  
  /*

------- zic.c -------
*** /tmp/geta29311	Mon May  9 11:01:25 2005
--- /tmp/getb29311	Mon May  9 11:01:25 2005
***************
*** 1,4 ****
! static char	elsieid[] = "@(#)zic.c	7.127";
  
  #include "private.h"
  #include "locale.h"
--- 1,4 ----
! static char	elsieid[] = "@(#)zic.c	7.128";
  
  #include "private.h"
  #include "locale.h"
***************
*** 99,105 ****
  static void	associate P((void));
  static int	ciequal P((const char * ap, const char * bp));
  static void	convert P((long val, char * buf));
! static void	convert64 P((zic_t val, char * buf));
  static void	dolink P((const char * fromfile, const char * tofile));
  static void	doabbr P((char * abbr, const char * format,
  			const char * letters, int isdst));
--- 99,105 ----
  static void	associate P((void));
  static int	ciequal P((const char * ap, const char * bp));
  static void	convert P((long val, char * buf));
! static void	convert40 P((zic_t val, char * buf));
  static void	dolink P((const char * fromfile, const char * tofile));
  static void	doabbr P((char * abbr, const char * format,
  			const char * letters, int isdst));
***************
*** 128,134 ****
  static long	oadd P((long t1, long t2));
  static void	outzone P((const struct zone * zp, int ntzones));
  static void	puttzcode P((long code, FILE * fp));
! static void	puttzcode64 P((zic_t code, FILE * fp));
  static int	rcomp P((const void * leftp, const void * rightp));
  static zic_t	rpytime P((const struct rule * rp, int wantedy));
  static void	rulesub P((struct rule * rp,
--- 128,134 ----
  static long	oadd P((long t1, long t2));
  static void	outzone P((const struct zone * zp, int ntzones));
  static void	puttzcode P((long code, FILE * fp));
! static void	puttzcode40 P((zic_t code, FILE * fp));
  static int	rcomp P((const void * leftp, const void * rightp));
  static zic_t	rpytime P((const struct rule * rp, int wantedy));
  static void	rulesub P((struct rule * rp,
***************
*** 1397,1403 ****
  }
  
  static void
! convert64(val, buf)
  const zic_t	val;
  char * const	buf;
  {
--- 1397,1403 ----
  }
  
  static void
! convert40(val, buf)
  const zic_t	val;
  char * const	buf;
  {
***************
*** 1404,1410 ****
  	register int	i;
  	register int	shift;
  
! 	for (i = 0, shift = 56; i < 8; ++i, shift -= 8)
  		buf[i] = val >> shift;
  }
  
--- 1404,1410 ----
  	register int	i;
  	register int	shift;
  
! 	for (i = 0, shift = 32; i < 5; ++i, shift -= 8)
  		buf[i] = val >> shift;
  }
  
***************
*** 1420,1432 ****
  }
  
  static void
! puttzcode64(val, fp)
  const zic_t	val;
  FILE * const	fp;
  {
! 	char	buf[8];
  
! 	convert64(val, buf);
  	(void) fwrite((void *) buf, (size_t) sizeof buf, (size_t) 1, fp);
  }
  
--- 1420,1432 ----
  }
  
  static void
! puttzcode40(val, fp)
  const zic_t	val;
  FILE * const	fp;
  {
! 	char	buf[5];
  
! 	convert40(val, buf);
  	(void) fwrite((void *) buf, (size_t) sizeof buf, (size_t) 1, fp);
  }
  
***************
*** 1603,1609 ****
  		for (i = thistimei; i < thistimelim; ++i)
  			if (pass == 1)
  				puttzcode((long) ats[i], fp);
! 			else	puttzcode64(ats[i], fp);
  		if (thistimecnt > 0)
  			(void) fwrite((void *) &types[thistimei],
  				(size_t) sizeof types[0],
--- 1603,1609 ----
  		for (i = thistimei; i < thistimelim; ++i)
  			if (pass == 1)
  				puttzcode((long) ats[i], fp);
! 			else	puttzcode40(ats[i], fp);
  		if (thistimecnt > 0)
  			(void) fwrite((void *) &types[thistimei],
  				(size_t) sizeof types[0],
***************
*** 1638,1645 ****
  				todo = tadd(trans[i], -gmtoffs[j]);
  			} else	todo = trans[i];
  			if (pass == 1)
! 				puttzcode(todo, fp);
! 			else	puttzcode64(todo, fp);
  			puttzcode(corr[i], fp);
  		}
  		for (i = 0; i < typecnt; ++i)
--- 1638,1645 ----
  				todo = tadd(trans[i], -gmtoffs[j]);
  			} else	todo = trans[i];
  			if (pass == 1)
! 				puttzcode((long) todo, fp);
! 			else	puttzcode40(todo, fp);
  			puttzcode(corr[i], fp);
  		}
  		for (i = 0; i < typecnt; ++i)

------- localtime.c -------
*** /tmp/geta29330	Mon May  9 11:01:36 2005
--- /tmp/getb29330	Mon May  9 11:01:36 2005
***************
*** 5,11 ****
  
  #ifndef lint
  #ifndef NOID
! static char	elsieid[] = "@(#)localtime.c	7.92";
  #endif /* !defined NOID */
  #endif /* !defined lint */
  
--- 5,11 ----
  
  #ifndef lint
  #ifndef NOID
! static char	elsieid[] = "@(#)localtime.c	7.93";
  #endif /* !defined NOID */
  #endif /* !defined lint */
  
***************
*** 125,131 ****
  */
  
  static long		detzcode P((const char * codep));
! static time_t		detzcode64 P((const char * codep));
  static const char *	getzname P((const char * strp));
  static const char *	getnum P((const char * strp, int * nump, int min,
  				int max));
--- 125,131 ----
  */
  
  static long		detzcode P((const char * codep));
! static time_t		detzcode40 P((const char * codep));
  static const char *	getzname P((const char * strp));
  static const char *	getnum P((const char * strp, int * nump, int min,
  				int max));
***************
*** 225,231 ****
  }
  
  static time_t
! detzcode64(codep)
  const char * const	codep;
  {
  	register signed64_t	result;
--- 225,231 ----
  }
  
  static time_t
! detzcode40(codep)
  const char * const	codep;
  {
  	register signed64_t	result;
***************
*** 232,238 ****
  	register int		i;
  
  	result = (codep[0] & 0x80) ? ~0L : 0L;
! 	for (i = 0; i < 8; ++i)
  		result = (result << 8) | (codep[i] & 0xff);
  	return (time_t) result;
  }
--- 232,238 ----
  	register int		i;
  
  	result = (codep[0] & 0x80) ? ~0L : 0L;
! 	for (i = 0; i < 5; ++i)
  		result = (result << 8) | (codep[i] & 0xff);
  	return (time_t) result;
  }
***************
*** 343,349 ****
  	nread = read(fid, u.buf, sizeof u.buf);
  	if (close(fid) < 0 || nread <= 0)
  		return -1;
! 	for (stored = 4; stored <= 8; stored *= 2) {
  		int		ttisstdcnt;
  		int		ttisgmtcnt;
  
--- 343,349 ----
  	nread = read(fid, u.buf, sizeof u.buf);
  	if (close(fid) < 0 || nread <= 0)
  		return -1;
! 	for (stored = 4; stored <= 5; ++stored) {
  		int		ttisstdcnt;
  		int		ttisgmtcnt;
  
***************
*** 372,378 ****
  				return -1;
  		for (i = 0; i < sp->timecnt; ++i) {
  			sp->ats[i] = (stored == 4) ?
! 				detzcode(p) : detzcode64(p);
  			p += stored;
  		}
  		for (i = 0; i < sp->timecnt; ++i) {
--- 372,378 ----
  				return -1;
  		for (i = 0; i < sp->timecnt; ++i) {
  			sp->ats[i] = (stored == 4) ?
! 				detzcode(p) : detzcode40(p);
  			p += stored;
  		}
  		for (i = 0; i < sp->timecnt; ++i) {
***************
*** 402,408 ****
  
  			lsisp = &sp->lsis[i];
  			lsisp->ls_trans = (stored == 4) ?
! 				detzcode(p) : detzcode64(p);
  			p += stored;
  			lsisp->ls_corr = detzcode(p);
  			p += 4;
--- 402,408 ----
  
  			lsisp = &sp->lsis[i];
  			lsisp->ls_trans = (stored == 4) ?
! 				detzcode(p) : detzcode40(p);
  			p += stored;
  			lsisp->ls_corr = detzcode(p);
  			p += 4;



More information about the tz mailing list