/*
* csplit - context split
*
* Gunnar Ritter, Freiburg i. Br., Germany, May 2003.
*/
/*
* Copyright (c) 2003 Gunnar Ritter
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute
* it freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source distribution.
*/
#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
#define USED __attribute__ ((used))
#elif defined __GNUC__
#define USED __attribute__ ((unused))
#else
#define USED
#endif
#if defined (SU3)
static const char sccsid[] USED = "@(#)csplit_su3.sl 1.10 (gritter) 5/29/05";
#elif defined (SUS)
static const char sccsid[] USED = "@(#)csplit_sus.sl 1.10 (gritter) 5/29/05";
#else
static const char sccsid[] USED = "@(#)csplit.sl 1.10 (gritter) 5/29/05";
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libgen.h>
#include <unistd.h>
#include <stdarg.h>
#include <math.h>
#include <locale.h>
#include <signal.h>
#include "sigset.h"
#include "atoll.h"
#if defined (__GLIBC__)
#if defined (_IO_getc_unlocked)
#undef getc
#define getc(f) _IO_getc_unlocked(f)
#endif
#if defined (_IO_putc_unlocked)
#undef putc
#define putc(c, f) _IO_putc_unlocked(c, f)
#endif
#endif
#if defined (SUS) || defined (SU3)
#include <regex.h>
#else
#include <regexpr.h>
#endif
#include <iblok.h>
static struct arg {
long long a_no;
long long a_nx;
long long a_ro;
long long a_once;
const char *a_op;
const char *a_rp;
#if defined (SUS) || defined (SU3)
regex_t *a_re;
#else
char *a_re;
#endif
} **args;
static const char *progname;
static const char *prefix = "xx";
static const char *curarg;
static int filec;
static int suffixlength = 2;
static int status;
static int kflag;
static int sflag;
static void usage(void);
static void msg(int, const char *, ...);
static void scan(char *);
static void csplit(const char *);
static int match(const char *, long long, long long *, int *);
static FILE *nextfile(void);
static const char *makename(int, int);
static void delfiles(void);
static void onint(int);
static struct iblok *copytemp(struct iblok *);
static void *smalloc(size_t);
static void *srealloc(void *, size_t);
static void *scalloc(size_t, size_t);
int
main(int argc, char **argv)
{
int i, illegal = 0;
char *fn;
progname = basename(argv[0]);
#if defined (SUS) || defined (SU3)
setlocale(LC_COLLATE, "");
#endif
setlocale(LC_CTYPE, "");
while ((i = getopt(argc, argv, "ksf:n:")) != EOF) {
switch (i) {
case 'k':
kflag = 1;
break;
case 's':
sflag = 1;
break;
case 'f':
prefix = optarg;
break;
case 'n':
suffixlength = atoi(optarg);
break;
default:
illegal++;
}
}
if (illegal || optind + 2 > argc)
usage();
fn = argv[optind++];
args = scalloc(argc - optind + 1, sizeof *args);
for (i = 0; i + optind < argc; i++)
scan(argv[i+optind]);
if (sigset(SIGINT, SIG_IGN) != SIG_IGN)
sigset(SIGINT, onint);
csplit(fn);
return status;
}
static void
usage(void)
{
fprintf(stderr, "%s: Usage: %s [-s] [-k] [-f prefix] file args ...\n",
progname, progname);
exit(2);
}
static void
msg(int term, const char *fmt, ...)
{
va_list ap;
fprintf(stderr, "%s: ", progname);
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
putc('\n', stderr);
delfiles();
if (term)
exit(1);
else
status |= 1;
}
static void
scan(char *s)
{
static int ncur = -1;
long long c;
char *sp, *x;
#if defined (SUS) || defined (SU3)
int reflags;
#endif
if (*s != '{') {
args[++ncur] = scalloc(1, sizeof *args[ncur]);
args[ncur]->a_op = s;
}
switch (*s) {
case '/':
case '%':
for (sp = &s[1]; *sp; sp++) {
if (sp[0] == '\\' && sp[1])
sp++;
else if (*sp == *s)
break;
}
if (*sp == '\0')
msg(1, "%s: missing delimiter", s);
*sp = '\0';
args[ncur]->a_nx = strtoll(&sp[1], &x, 10);
if (*x != '\0')
msg(1, "%s: illegal offset", s);
#if defined (SUS) || defined (SU3)
args[ncur]->a_re = smalloc(sizeof *args[ncur]->a_re);
reflags = REG_ANGLES|REG_NOSUB;
#if defined (SU3)
reflags |= REG_AVOIDNULL;
#endif /* SU3 */
if (regcomp(args[ncur]->a_re, &s[1], reflags) != 0)
#else /* !SUS, !SU3 */
if ((args[ncur]->a_re = compile(&s[1], 0, 0)) == 0)
#endif /* !SUS, !SU3 */
msg(1, "%s: Illegal Regular Expression", s);
break;
default:
args[ncur]->a_nx = args[ncur]->a_no = strtoll(s, &x, 10);
if (*x != '\0')
msg(1, "%s: bad line number", s);
break;
case '{':
c = strtoll(&s[1], &x, 10);
if (x > &s[1] && x[0] != '}')
msg(1, "%s: missing '}'", s);
if (s[1] == '-' || x[0] != '}')
msg(1, "Illegal repeat count: %s", s);
if (ncur < 0 || args[ncur] == 0 || args[ncur]->a_rp)
msg(1, "No operation for %s", s);
args[ncur]->a_ro = c;
args[ncur]->a_rp = s;
}
}
static void
csplit(const char *fn)
{
struct iblok *ip;
int skip = 0, osk = 0, gotcha;
FILE *op = NULL;
char *line = 0, c = 0;
size_t linesize = 0, linelen;
long long lineno = 0, oln = 1, xln = 1,
bytes = 0, noffs = 0, on, brks = 0;
if ((ip = fn[0] == '-' && fn[1] == '\0' ? ib_alloc(0, 0) :
ib_open(fn, 0)) == NULL)
msg(1, "Cannot open %s", fn);
if (ib_seek(ip, 0, SEEK_CUR) != 0)
ip = copytemp(ip);
do {
if ((linelen=ib_getlin(ip, &line, &linesize, srealloc)) != 0) {
lineno++;
if ((c = line[linelen-1]) == '\n')
line[linelen-1] = '\0';
} else
lineno = -1;
while (on = noffs, osk = skip,
(gotcha=match(line, lineno, &noffs, &skip)) != 0) {
if (on) {
if (osk == 0)
op = nextfile();
ib_seek(ip, brks, SEEK_SET);
on = xln - on + (lineno <= 0);
lineno = oln - 1;
if (lineno > on)
msg(1, "%s - out of range", curarg);
while ((linelen = ib_getlin(ip, &line,
&linesize,
srealloc)) != 0 &&
lineno++ < on) {
if (osk == 0) {
fwrite(line, sizeof *line,
linelen, op);
bytes += linelen;
}
}
if (linelen != 0 && (c=line[linelen-1]) == '\n')
line[linelen-1] = '\0';
}
if (lineno==1 && op==NULL && *args[0]->a_op != '%' &&
(args[0]->a_re==0 || args[0]->a_nx>=0))
op = nextfile();
if (op) {
if (!sflag)
printf("%lld\n", bytes);
bytes = 0;
fclose(op);
}
if (lineno <= 0)
break;
if (!skip) {
if (noffs) {
op = NULL;
brks = ib_seek(ip, 0, SEEK_CUR) -
linelen;
oln = lineno;
} else
op = nextfile();
} else
op = NULL;
if (gotcha < 2)
break;
}
if (!skip) {
if (lineno == 1 && op == NULL && noffs == 0)
op = nextfile();
if (op && linelen != 0) {
if (c == '\n')
line[linelen-1] = '\n';
fwrite(line, sizeof *line, linelen, op);
bytes += linelen;
}
}
xln = lineno;
} while (linelen != 0);
}
static int
match(const char *line, long long lineno, long long *noffp, int *skip)
{
static int ncur = -1;
if (ncur == -1) {
ncur = 0;
curarg = args[ncur]->a_op;
if (args[ncur]->a_re)
*skip = *args[ncur]->a_op == '%';
if (args[ncur]->a_re && args[ncur]->a_nx < 0)
*noffp = -args[ncur]->a_nx;
else
*noffp = 0;
}
if (args[ncur] == NULL)
return lineno <= 0;
if (args[ncur]->a_once == lineno)
return 0;
args[ncur]->a_once = 0;
*skip = *args[ncur]->a_op == '%';
if (args[ncur]->a_re != NULL && lineno > 0) {
if (args[ncur]->a_no) {
if (--args[ncur]->a_no > 0)
return 0;
} else {
#if defined (SUS) || defined (SU3)
if (regexec(args[ncur]->a_re, line, 0, NULL, 0) != 0)
#else
if (step(line, args[ncur]->a_re) == 0)
#endif
return 0;
if (args[ncur]->a_nx > 0) {
args[ncur]->a_no = args[ncur]->a_nx;
return 0;
}
if (args[ncur]->a_nx < 0 &&
args[ncur]->a_nx+lineno < 0)
msg(1, "%s - out of range", curarg);
}
} else {
if (lineno > 0 && lineno < args[ncur]->a_no)
return 0;
else if (lineno < 0 && args[ncur]->a_nx < 0)
/*EMPTY*/;
else if (lineno < 0 || lineno > args[ncur]->a_no)
msg(kflag==0||lineno>0, "%s - out of range", curarg);
}
if (args[ncur]->a_re && args[ncur]->a_nx >= 0)
args[ncur]->a_once = lineno;
if (args[ncur]->a_ro) {
args[ncur]->a_ro--;
curarg = args[ncur]->a_rp;
if (args[ncur]->a_re == 0)
args[ncur]->a_no += args[ncur]->a_nx;
} else {
if (args[++ncur]) {
curarg = args[ncur]->a_op;
if (args[ncur]->a_re)
*skip = *args[ncur]->a_op == '%';
else
*skip = 0;
if (args[ncur]->a_re && args[ncur]->a_nx < 0)
*noffp = -args[ncur]->a_nx;
else
*noffp = 0;
} else {
*noffp = 0;
*skip = 0;
}
}
return 1 + ((lineno==1&&ncur==0) || args[ncur]==0 ||
args[ncur]->a_re==0 || args[ncur]->a_nx < 0);
}
static FILE *
nextfile(void)
{
const char *name;
FILE *fp;
name = makename(filec++, 1);
if ((fp = fopen(name, "w")) == NULL)
msg(1, "Cannot create %s", name);
return fp;
}
static const char *
makename(int n, int prnt)
{
static char *name, *sufp;
int c;
const char *cp;
char *sp;
if (name == 0) {
name = smalloc(strlen(prefix)+suffixlength+1);
sufp = name;
for (cp = prefix; *cp; cp++)
*sufp++ = *cp;
}
c = n;
sp = &sufp[suffixlength];
*sp = '\0';
while (--sp >= sufp) {
*sp = (c % 10) + '0';
c /= 10;
}
if (c) {
if (prnt)
msg(1, "%0.0f file limit reached at arg %s",
pow(10, suffixlength), curarg);
return 0;
}
return name;
}
static void
delfiles(void)
{
const char *np;
int i;
if (!kflag)
for (i = 0; i < filec && (np = makename(i, 0)); i++)
unlink(np);
}
static struct iblok *
copytemp(struct iblok *ip)
{
char tfname[] = "/tmp/csplitXXXXXX";
char buf[4096];
struct iblok *op = NULL;
int fd;
ssize_t rd;
if ((fd = mkstemp(tfname)) < 0 || unlink(tfname) < 0)
goto err;
while ((rd = read(ip->ib_fd, buf, sizeof buf)) > 0)
if (write(fd, buf, rd) != rd)
goto err;
if (ip->ib_fd)
ib_close(ip);
else
ib_free(ip);
if (lseek(fd, 0, SEEK_SET) != 0 || (op = ib_alloc(fd, 0)) == NULL)
err: msg(1, "Bad write to temporary file");
return op;
}
/*ARGSUSED*/
static void
onint(int signo)
{
msg(1, "Interrupt - program aborted at arg '%s'", curarg);
}
static void *
smalloc(size_t size)
{
return srealloc(0, size);
}
static void *
srealloc(void *op, size_t size)
{
void *np;
if ((np = realloc(op, size)) == NULL) {
write(2, "no memory\n", 10);
_exit(077);
}
return np;
}
static void *
scalloc(size_t nmemb, size_t size)
{
void *vp;
if ((vp = calloc(nmemb, size)) == NULL) {
write(2, "no memory\n", 10);
_exit(077);
}
return vp;
}
syntax highlighted by Code2HTML, v. 0.9.1