/* frcode -- front-compress a sorted list
Copyright (C) 1994, 2005, 2006, 2007, 2010, 2011 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/* Usage: frcode < sorted-list > compressed-list
Uses front compression (also known as incremental encoding);
see ";login:", March 1983, p. 8.
The input is a sorted list of NUL-terminated strings (or
newline-terminated if the -0 option is not given).
The output entries are in the same order as the input; each entry
consists of a signed offset-differential count byte (the additional
number of characters of prefix of the preceding entry to use beyond
the number that the preceding entry is using of its predecessor),
followed by a null-terminated ASCII remainder.
If the offset-differential count is larger than can be stored
in a byte (+/-127), the byte has the value LOCATEDB_ESCAPE
and the count follows in a 2-byte word, with the high byte first
(network byte order).
Example:
Input, with NULs changed to newlines:
/usr/src
/usr/src/cmd/aardvark.c
/usr/src/cmd/armadillo.c
/usr/tmp/zoo
Length of the longest prefix of the preceding entry to share:
0 /usr/src
8 /cmd/aardvark.c
14 rmadillo.c
5 tmp/zoo
Output, with NULs changed to newlines and count bytes made printable:
0 LOCATE02
0 /usr/src
8 /cmd/aardvark.c
6 rmadillo.c
-9 tmp/zoo
(6 = 14 - 8, and -9 = 5 - 14)
Written by James A. Woods .
Modified by David MacKenzie .
Modified by James Youngman .
*/
/* config.h must be included first. */
#include
/* system headers. */
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* gnulib headers. */
#include "closeout.h"
#include "error.h"
#include "gettext.h"
#include "progname.h"
#include "xalloc.h"
/* find headers. */
#include "findutils-version.h"
#include "locatedb.h"
#if ENABLE_NLS
# include
# define _(Text) gettext (Text)
#else
# define _(Text) Text
#define textdomain(Domain)
#define bindtextdomain(Package, Directory)
#endif
#ifdef gettext_noop
# define N_(String) gettext_noop (String)
#else
/* We used to use (String) instead of just String, but apparently ISO C
* doesn't allow this (at least, that's what HP said when someone reported
* this as a compiler bug). This is HP case number 1205608192. See
* also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
* ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
* like: static const char buf[] = ("string");
*/
# define N_(String) String
#endif
/* Write out a 16-bit int, high byte first (network byte order).
* Return true iff all went well.
*/
static int
put_short (int c, FILE *fp)
{
/* XXX: The value of c may be negative. ANSI C 1989 (section 6.3.7)
* indicates that the result of shifting a negative value right is
* implementation defined.
*/
assert (c <= SHRT_MAX);
assert (c >= SHRT_MIN);
return (putc (c >> 8, fp) != EOF) && (putc (c, fp) != EOF);
}
/* Return the length of the longest common prefix of strings S1 and S2. */
static int
prefix_length (char *s1, char *s2)
{
register char *start;
int limit = INT_MAX;
for (start = s1; *s1 == *s2 && *s1 != '\0'; s1++, s2++)
{
/* Don't emit a prefix length that will not fit into
* our return type.
*/
if (0 == --limit)
break;
}
return s1 - start;
}
static struct option const longopts[] =
{
{"help", no_argument, NULL, 'h'},
{"version", no_argument, NULL, 'v'},
{"null", no_argument, NULL, '0'},
{NULL, no_argument, NULL, 0}
};
extern char *version_string;
static void
usage (FILE *stream)
{
fprintf (stream,
_("Usage: %s [-0 | --null] [--version] [--help]\n"),
program_name);
fputs (_("\nReport bugs to .\n"), stream);
}
static long
get_seclevel (char *s)
{
long result;
char *p;
/* Reset errno in oreder to be able to distinguish LONG_MAX/LONG_MIN
* from values whichare actually out of range
*/
errno = 0;
result = strtol (s, &p, 10);
if ((0==result) && (p == optarg))
{
error (EXIT_FAILURE, 0,
_("You need to specify a security level as a decimal integer."));
/*NOTREACHED*/
return -1;
}
else if ((LONG_MIN==result || LONG_MAX==result) && errno)
{
error (EXIT_FAILURE, 0,
_("Security level %s is outside the convertible range."), s);
/*NOTREACHED*/
return -1;
}
else if (*p)
{
/* Some suffix exists */
error (EXIT_FAILURE, 0,
_("Security level %s has unexpected suffix %s."), s, p);
/*NOTREACHED*/
return -1;
}
else
{
return result;
}
}
static void
outerr (void)
{
/* Issue the same error message as closeout () would. */
error (EXIT_FAILURE, errno, _("write error"));
}
int
main (int argc, char **argv)
{
char *path; /* The current input entry. */
char *oldpath; /* The previous input entry. */
size_t pathsize, oldpathsize; /* Amounts allocated for them. */
int count, oldcount, diffcount; /* Their prefix lengths & the difference. */
int line_len; /* Length of input line. */
int delimiter = '\n';
int optc;
int slocate_compat = 0;
long slocate_seclevel = 0L;
if (argv[0])
set_program_name (argv[0]);
else
set_program_name ("frcode");
if (atexit (close_stdout))
{
error (EXIT_FAILURE, errno, _("The atexit library function failed"));
}
pathsize = oldpathsize = 1026; /* Increased as necessary by getline. */
path = xmalloc (pathsize);
oldpath = xmalloc (oldpathsize);
oldpath[0] = 0;
oldcount = 0;
while ((optc = getopt_long (argc, argv, "hv0S:", longopts, (int *) 0)) != -1)
switch (optc)
{
case '0':
delimiter = 0;
break;
case 'S':
slocate_compat = 1;
slocate_seclevel = get_seclevel (optarg);
if (slocate_seclevel < 0 || slocate_seclevel > 1)
{
error (EXIT_FAILURE, 0,
_("slocate security level %ld is unsupported."),
slocate_seclevel);
}
break;
case 'h':
usage (stdout);
return 0;
case 'v':
display_findutils_version ("frcode");
return 0;
default:
usage (stderr);
return 1;
}
/* We expect to have no arguments. */
if (optind != argc)
{
usage (stderr);
return 1;
}
if (slocate_compat)
{
fputc (slocate_seclevel ? '1' : '0', stdout);
fputc (0, stdout);
}
else
{
/* GNU LOCATE02 format */
if (fwrite (LOCATEDB_MAGIC, 1, sizeof (LOCATEDB_MAGIC), stdout)
!= sizeof (LOCATEDB_MAGIC))
{
error (EXIT_FAILURE, errno, _("Failed to write to standard output"));
}
}
while ((line_len = getdelim (&path, &pathsize, delimiter, stdin)) > 0)
{
path[line_len - 1] = '\0'; /* FIXME temporary: nuke the newline. */
count = prefix_length (oldpath, path);
diffcount = count - oldcount;
if ( (diffcount > SHRT_MAX) || (diffcount < SHRT_MIN) )
{
/* We do this to prevent overflow of the value we
* write with put_short ()
*/
count = 0;
diffcount = (-oldcount);
}
oldcount = count;
if (slocate_compat)
{
/* Emit no count for the first pathname. */
slocate_compat = 0;
}
else
{
/* If the difference is small, it fits in one byte;
otherwise, two bytes plus a marker noting that fact. */
if (diffcount < LOCATEDB_ONEBYTE_MIN
|| diffcount > LOCATEDB_ONEBYTE_MAX)
{
if (EOF == putc (LOCATEDB_ESCAPE, stdout))
outerr ();
if (!put_short (diffcount, stdout))
outerr ();
}
else
{
if (EOF == putc (diffcount, stdout))
outerr ();
}
}
if ( (EOF == fputs (path + count, stdout))
|| (EOF == putc ('\0', stdout)))
{
outerr ();
}
if (1)
{
/* Swap path and oldpath and their sizes. */
char *tmppath = oldpath;
size_t tmppathsize = oldpathsize;
oldpath = path;
oldpathsize = pathsize;
path = tmppath;
pathsize = tmppathsize;
}
}
free (path);
free (oldpath);
return 0;
}