/*
 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, March 2003.
 */
/*	from Unix 32V /usr/src/cmd/join.c	*/
/*
 * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   Redistributions of source code and documentation must retain the
 *    above copyright notice, this list of conditions and the following
 *    disclaimer.
 *   Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *   All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed or owned by Caldera
 *      International, Inc.
 *   Neither the name of Caldera International, Inc. nor the names of
 *    other contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE
 * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
#define	USED	__attribute__ ((used))
#elif defined __GNUC__
#define	USED	__attribute__ ((unused))
#else
#define	USED
#endif
static const char sccsid[] USED = "@(#)join.sl	1.15 (gritter) 5/29/05";

/*	join F1 F2 on stuff */

#include	<stdio.h>
#include	<stdlib.h>
#include	<string.h>
#include	<libgen.h>
#include	<locale.h>
#include	<wchar.h>
#include	<limits.h>
#include	<unistd.h>

#include	"iblok.h"
#include	"mbtowi.h"

enum {
	F1 = 0,
	F2 = 1,
	JF = -1
};
#define	ppi(f, j)	((j) >= 0 && (j) < ppisize[f] ? ppibuf[f][j] : null)
#define comp() strcoll(ppi(F1, j1),ppi(F2, j2))

#define	next(wc, s, n)	(*(s) & 0200 ? ((n) = mbtowi(&(wc), (s), mb_cur_max), \
		(n) = ((n) > 0 ? (n) : (n) < 0 ? (wc=WEOF, 1) : 1)) : \
	((wc) = *(s) & 0377, (n) = 1))

static struct iblok	 *f[2];
static char *buf[2];		/*input lines */
static size_t bufsize[2];
static const char **ppibuf[2];	/* pointers to fields in lines */
static long ppisize[2];
static const char *s1,*s2;
static long	j1	= 1;	/* join of this field of file 1 */
static long	j2	= 1;	/* join of this field of file 2 */
static long	*olist;		/* output these fields */
static int	*olistf;	/* from these files */
static long	no;		/* number of entries in olist */
static wint_t	sep1	= ' ';	/* default field separator */
static wint_t	sep2	= '\t';
static const char*	null	= "";
static int	aflg;
static int	vflg;
static char	*progname;
static int	mb_cur_max;

static int input(int);
static void output(int, int);
static void error(const char *, const char *);
static void setppi(int, long, const char *);
static void *srealloc(void *, size_t);

static void
usage(void)
{
	fprintf(stderr,
	"%s: usage: %s [-an] [-e s] [-jn m] [-tc] [-o list] file1 file2\n",
		progname, progname);
	exit(2);
}

int
main(int argc, char **argv)
{
	int i;
	int n1, n2;
	off_t top2 = 0, bot2;
	char	*arg = 0, *x;

	progname = basename(argv[0]);
	setlocale(LC_COLLATE, "");
	setlocale(LC_CTYPE, "");
	mb_cur_max = MB_CUR_MAX;
	while (argc > 1 && argv[1][0] == '-') {
		if (argv[1][1] == '\0')
			break;
		switch (argv[1][1]) {
		case 'a':
			if (argv[1][2])
				arg = &argv[1][2];
			else if (argv[2]) {
				arg = argv[2];
				argv++, argc--;
			} else
				arg = "3";
			switch(*arg) {
			case '1':
				aflg |= 1;
				break;
			case '2':
				aflg |= 2;
				break;
			default:
				aflg |= 3;
			}
			break;
		case 'e':
			if (argv[1][2])
				null = &argv[1][2];
			else if (argv[2]) {
				null = argv[2];
				argv++;
				argc--;
			} else
				usage();
			break;
		case 't':
			if (argv[1][2]) {
				int n;
				next(sep1, &argv[1][2], n);
				sep2 = sep1;
			} else if (argv[2]) {
				int n;
				next(sep1, argv[2], n);
				sep2 = sep1;
				argv++, argc--;
			} else
				usage();
			break;
		case 'o':
			if (argv[2] == NULL)
				usage();
			arg = argv[1][2] ? &argv[1][2] : argv[2];
			for (no = 0;
				olist = srealloc(olist, (no+1)*sizeof *olist),
				olistf = srealloc(olistf,(no+1)*sizeof *olistf),
				olist[no] = 0, olistf[no] = 0,
					arg; no++) {
				if (arg[0] == '1' && arg[1] == '.') {
					olistf[no] = F1;
					olist[no] = strtol(&arg[2], &x, 10);
				} else if (arg[0] == '2' && arg[1] == '.') {
					olist[no] = strtol(&arg[2], &x, 10);
					olistf[no] = F2;
				} else if (arg[0] == '0') {
					olistf[no] = JF;
					x = &arg[1];
				} else
					break;
				while (*x == ' ' || *x == ',')
					x++;
				if (*x)
					arg = x;
				else {
					argc--;
					argv++;
					arg = argv[2];
				}
			}
			if (no == 0) {
				fprintf(stderr, "%s: invalid file number (%s) "
						"for -o\n", progname, arg);
				exit(2);
			}
			break;
		case 'j':
			if (argv[2] == NULL)
				usage();
			if (argv[1][2] == '1')
				j1 = atoi(argv[2]);
			else if (argv[1][2] == '2')
				j2 = atoi(argv[2]);
			else
				j1 = j2 = atoi(argv[2]);
			argc--;
			argv++;
			break;
		case '1':
			if (argv[1][2])
				arg = &argv[1][2];
			else if (argv[2]) {
				arg = argv[2];
				argv++, argc--;
			} else
				usage();
			j1 = atoi(arg);
			break;
		case '2':
			if (argv[1][2])
				arg = &argv[1][2];
			else if (argv[2]) {
				arg = argv[2];
				argv++, argc--;
			} else
				usage();
			j2 = atoi(arg);
			break;
		case 'v':
			if (argv[1][2])
				arg = &argv[1][2];
			else if (argv[2]) {
				arg = argv[2];
				argv++, argc--;
			} else
				usage();
			if (*arg == '1')
				vflg |= 1;
			else if (*arg == '2')
				vflg |= 2;
			break;
		}
		argc--;
		argv++;
	}
	for (i = 0; i < no; i++)
		olist[i]--;	/* 0 origin */
	if (argc != 3)
		usage();
	j1--;
	j2--;	/* everyone else believes in 0 origin */
	s1 = ppi(F1, j1);
	s2 = ppi(F2, j2);
	if (argv[1][0] == '-' && argv[1][1] == '\0')
		f[F1] = ib_alloc(0, 0);
	else if ((f[F1] = ib_open(argv[1], 0)) == NULL)
		error("can't open ", argv[1]);
	if (argv[2][0] == '-' && argv[2][1] == '\0')
		f[F2] = ib_alloc(0, 0);
	else if ((f[F2] = ib_open(argv[2], 0)) == NULL)
		error("can't open ", argv[2]);

#define get1() n1=input(F1)
#define get2() n2=input(F2)
	get1();
	bot2 = ib_seek(f[F2], 0, SEEK_CUR);
	get2();
	while(n1>=0 && n2>=0 || (aflg|vflg)!=0 && (n1>=0||n2>=0)) {
		if(n1>=0 && n2>=0 && comp()>0 || n1<0) {
			if(aflg&2||vflg&2) output(0, n2);
			bot2 = ib_seek(f[F2], 0, SEEK_CUR);
			get2();
		} else if(n1>=0 && n2>=0 && comp()<0 || n2<0) {
			if(aflg&1||vflg&1) output(n1, 0);
			get1();
		} else /*(n1>=0 && n2>=0 && comp()==0)*/ {
			while(n2>=0 && comp()==0) {
				if(vflg==0) output(n1, n2);
				top2 = ib_seek(f[F2], 0, SEEK_CUR);
				get2();
			}
			ib_seek(f[F2], bot2, SEEK_SET);
			get2();
			get1();
			for(;;) {
				if(n1>=0 && n2>=0 && comp()==0) {
					if(vflg==0) output(n1, n2);
					get2();
				} else if(n1>=0 && n2>=0 && comp()<0 || n2<0) {
					ib_seek(f[F2], bot2, SEEK_SET);
					get2();
					get1();
				} else /*(n1>=0 && n2>=0 && comp()>0 || n1<0)*/{
					ib_seek(f[F2], top2, SEEK_SET);
					bot2 = top2;
					get2();
					break;
				}
			}
		}
	}
	return(0);
}

static int
input(int n)		/* get input line and split into fields */
{
	register int i;
	wint_t	wc;
	int	m;
	char	*bp;
	size_t	length;
	long pc;

	if ((length = ib_getlin(f[n], &buf[n], &bufsize[n], srealloc)) == 0)
		return(-1);
	bp = buf[n];
	pc = 0;
	for (i = 0; ; i++) {
		if (sep1 == ' ')	/* strip multiples */
			while (next(wc, bp, m), wc == sep1 || wc == sep2)
				bp += m;	/* skip blanks */
		else
			next(wc, bp, m);
		if (wc == '\n' || wc == '\0')
			break;
		setppi(n, pc++, bp);	/* record beginning */
		while (next(wc, bp, m), wc != sep1 && wc != '\n' &&
				wc != sep2 && wc != '\0')
			bp += m;
		*bp++ = '\0';	/* mark end by overwriting blank */
			/* fails badly if string doesn't have \n at end */
	}
	if (pc == 0)
		setppi(n, pc++, "");
	setppi(n, pc, 0);
	return(i);
}

static void
output(int on1, int on2)	/* print items from olist */
{
	int i;
	const char *temp;

	if (no <= 0) {	/* default case */
		printf("%s", on1? ppi(F1, j1): on2? ppi(F2, j2) : null);
		for (i = 0; i < on1; i++)
			if (i != j1) {
				if (mb_cur_max > 1)
					printf("%lc%s",(wint_t)sep1,ppi(F1, i));
				else
					printf("%c%s", (int)sep1, ppi(F1, i));
			}
		for (i = 0; i < on2; i++)
			if (i != j2) {
				if (mb_cur_max > 1)
					printf("%lc%s",(wint_t)sep1,ppi(F2, i));
				else
					printf("%c%s", (int)sep1, ppi(F2, i));
			}
		printf("\n");
	} else {
		for (i = 0; i < no; i++) {
			temp = ppi(olistf[i], olist[i]);
			if(olistf[i]==F1 && on1<=olist[i] ||
			   olistf[i]==F2 && on2<=olist[i])
				temp = null;
			else if (olistf[i]==JF) {
				if (on1)
					temp = ppi(F1, j1);
				else if (on2)
					temp = ppi(F2, j2);
				else
					temp = null;
			}
			if (temp == 0 || *temp == 0)
				temp = null;
			printf("%s", temp ? temp : null);
			if (i == no - 1)
				printf("\n");
			else if (mb_cur_max > 1)
				printf("%lc", (wint_t)sep1);
			else
				printf("%c", (int)sep1);
		}
	}
}

static void
error(const char *s1, const char *s2)
{
	fprintf(stderr, "%s: %s%s\n", progname, s1, s2);
	exit(1);
}

static void
setppi(int f, long j, const char *p)
{
	if (j >= ppisize[f]) {
		ppisize[f] = j + 1;
		ppibuf[f] = srealloc(ppibuf[f], ppisize[f] * sizeof *ppibuf[f]);
	}
	ppibuf[f][j] = p;
}

static void *
srealloc(void *vp, size_t nbytes)
{
	if ((vp = realloc(vp, nbytes)) == NULL) {
		write(2, "no memory\n", 10);
		_exit(077);
	}
	return vp;
}


syntax highlighted by Code2HTML, v. 0.9.1