/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
/*	  All Rights Reserved  	*/


/*
 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*	from OpenSolaris "bdiff.c	1.15	05/06/08 SMI"	*/

/*
 * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany
 */
#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
#define	USED	__attribute__ ((used))
#elif defined __GNUC__
#define	USED	__attribute__ ((unused))
#else
#define	USED
#endif
static const char sccsid[] USED = "@(#)bdiff.c	1.8 (gritter) 7/2/05";

#include "fatal.h"
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <libgen.h>
#include "sigset.h"

#define	ONSIG	16

/*
 *	This program segments two files into pieces of <= seglim lines
 *	(which is passed as a third argument or defaulted to some number)
 *	and then executes diff upon the pieces. The output of
 *	'diff' is then processed to make it look as if 'diff' had
 *	processed the files whole. The reason for all this is that seglim
 *	is a reasonable upper limit on the size of files that diff can
 *	process.
 *	NOTE -- by segmenting the files in this manner, it cannot be
 *	guaranteed that the 'diffing' of the segments will generate
 *	a minimal set of differences.
 *	This process is most definitely not equivalent to 'diffing'
 *	the files whole, assuming 'diff' could handle such large files.
 *
 *	'diff' is executed by a child process, generated by forking,
 *	and communicates with this program through pipes.
 */

static char Error[128];

static int seglim;	/* limit of size of file segment to be generated */

static const char diff[]  =  DIFF;
static const char tempskel[] = "/tmp/bdXXXXXX";
static char tempfile[32];
static char otmp[32], ntmp[32];
static int	fflags;
static int	fatal_num = 1;		/* exit number for fatal exit */
static off_t	linenum;
static size_t obufsiz, nbufsiz, dbufsiz;
static size_t obuflen, nbuflen, dbuflen;
static char *readline(char **, size_t *, size_t *, FILE *);
static void addgen(char **, size_t *, size_t *, FILE *);
static void delgen(char **, size_t *, size_t *, FILE *);
static void fixnum(const char *);
static void fatal(const char *);
static void setsig(void);
static void setsig1(int);
static char *satoi(const char *, off_t *);
static FILE *maket(char *);
#define	smalloc(n)	srealloc(NULL, n)
static void *srealloc(void *, size_t);

static char *prognam;

int
main(int argc, char *argv[])
{
	FILE *poldfile, *pnewfile;
	char *oline, *nline, *diffline;
	char *olp, *nlp, *dp;
	int otcnt, ntcnt;
	pid_t i;
	int pfd[2];
	FILE *poldtemp, *pnewtemp, *pipeinp;
	int status;

	prognam = basename(argv[0]);
	/*
	 * Set flags for 'fatal' so that it will clean up,
	 * produce a message, and terminate.
	 */
	fflags = FTLMSG | FTLCLN | FTLEXIT;

	setsig();

	if (argc < 3 || argc > 5)
		fatal("arg count");

	if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0)
		fatal("both files standard input");
	if (strcmp(argv[1], "-") == 0)
		poldfile = stdin;
	else
		if ((poldfile = fopen(argv[1], "r")) == NULL) {
			snprintf(Error, sizeof (Error),
				"Can not open '%s'", argv[1]);
			fatal(Error);
		}
	if (strcmp(argv[2], "-") == 0)
		pnewfile = stdin;
	else
		if ((pnewfile = fopen(argv[2], "r")) == NULL) {
			snprintf(Error, sizeof (Error),
				"Can not open '%s'", argv[2]);
			fatal(Error);
		}

	seglim = 3500;

	if (argc > 3) {
		if (argv[3][0] == '-' && argv[3][1] == 's')
			fflags &= ~FTLMSG;
		else {
			if ((seglim = atoi(argv[3])) == 0)
				fatal("non-numeric limit");
			if (argc == 5 && argv[4][0] == '-' &&
					argv[4][1] == 's')
				fflags &= ~FTLMSG;
		}
	}

	linenum = 0;

	/* Allocate the buffers and initialize their lengths */

	obufsiz = BUFSIZ;
	nbufsiz = BUFSIZ;
	dbufsiz = BUFSIZ;

	oline = smalloc(obufsiz);
	nline = smalloc(nbufsiz);
	diffline = smalloc(dbufsiz);

	/*
	 * The following while-loop will prevent any lines
	 * common to the beginning of both files from being
	 * sent to 'diff'. Since the running time of 'diff' is
	 * non-linear, this will help improve performance.
	 * If, during this process, both files reach EOF, then
	 * the files are equal and the program will terminate.
	 * If either file reaches EOF before the other, the
	 * program will generate the appropriate 'diff' output
	 * itself, since this can be easily determined and will
	 * avoid executing 'diff' completely.
	 */
	for (;;) {
		olp = readline(&oline, &obufsiz, &obuflen, poldfile);
		nlp = readline(&nline, &nbufsiz, &nbuflen, pnewfile);

		if (!olp && !nlp)	/* EOF found on both:  files equal */
			return (0);

		if (!olp) {
			/*
			 * The entire old file is a prefix of the
			 * new file. Generate the appropriate "append"
			 * 'diff'-like output, which is of the form:
			 * 		nan, n
			 * where 'n' represents a line-number.
			 */
			addgen(&nline, &nbufsiz, &nbuflen, pnewfile);
		}

		if (!nlp) {
			/*
			 * The entire new file is a prefix of the
			 * old file. Generate the appropriate "delete"
			 * 'diff'-like output, which is of the form:
			 * 		n, ndn
			 * where 'n' represents a line-number.
			 */
			delgen(&oline, &obufsiz, &obuflen, poldfile);
		}

		if (obuflen == nbuflen && memcmp(olp, nlp, obuflen) == 0)
			linenum++;
		else
			break;
	}

	/*
	 * Here, first 'linenum' lines are equal.
	 * The following while-loop segments both files into
	 * seglim segments, forks and executes 'diff' on the
	 * segments, and processes the resulting output of
	 * 'diff', which is read from a pipe.
	 */
	for (;;) {
		/* If both files are at EOF, everything is done. */
		if (!olp && !nlp)	/* finished */
			return (0);

		if (!olp) {
			/*
			 * Generate appropriate "append"
			 * output without executing 'diff'.
			 */
			addgen(&nline, &nbufsiz, &nbuflen, pnewfile);
		}

		if (!nlp) {
			/*
			 * Generate appropriate "delete"
			 * output without executing 'diff'.
			 */
			delgen(&oline, &obufsiz, &obuflen, poldfile);
		}

		/*
		 * Create a temporary file to hold a segment
		 * from the old file, and write it.
		 */
		poldtemp = maket(otmp);
		otcnt = 0;
		while (olp && otcnt < seglim) {
			fwrite(oline, sizeof *oline, obuflen, poldtemp);
			if (ferror(poldtemp) != 0) {
				fflags |= FTLMSG;
				fatal("Can not write to temporary file");
			}
			olp = readline(&oline, &obufsiz, &obuflen, poldfile);
			otcnt++;
		}
		fclose(poldtemp);

		/*
		 * Create a temporary file to hold a segment
		 * from the new file, and write it.
		 */
		pnewtemp = maket(ntmp);
		ntcnt = 0;
		while (nlp && ntcnt < seglim) {
			fwrite(nline, sizeof *nline, nbuflen, pnewtemp);
			if (ferror(pnewtemp) != 0) {
				fflags |= FTLMSG;
				fatal("Can not write to temporary file");
			}
			nlp = readline(&nline, &nbufsiz, &nbuflen, pnewfile);
			ntcnt++;
		}
		fclose(pnewtemp);

		/* Create pipes and fork.  */
		if ((pipe(pfd)) == -1)
			fatal("Can not create pipe");
		if ((i = fork()) < 0) {
			close(pfd[0]);
			close(pfd[1]);
			fatal("Can not fork, try again");
		} else if (i == 0) {	/* child process */
			close(pfd[0]);
			close(1);
			dup(pfd[1]);
			close(pfd[1]);

			putenv("LC_ALL=C");

			/* Execute 'diff' on the segment files. */
			execlp(diff, diff, "-a", otmp, ntmp, NULL);

			/*
			 * Exit code here must be > 1.
			 * Parent process treats exit code of 1 from the child
			 * as non-error because the child process "diff" exits
			 * with a status of 1 when a difference is encountered.
			 * The error here is a true error--the parent process
			 * needs to detect it and exit with a non-zero status.
			 */
			close(1);
			snprintf(Error, sizeof (Error),
			    "Can not execute '%s'", diff);
			fatal_num = 2;
			fatal(Error);
		} else {			/* parent process */
			close(pfd[1]);
			pipeinp = fdopen(pfd[0], "r");

			/* Process 'diff' output. */
			while ((dp = readline(&diffline, &dbufsiz, &dbuflen,
							pipeinp))) {
				if (isdigit(*dp))
					fixnum(diffline);
				else
					fwrite(diffline, sizeof *diffline,
							dbuflen, stdout);
			}

			fclose(pipeinp);

			/* EOF on pipe. */
			while (wait(&status) != i);
			if (status && (!WIFEXITED(status) ||
						WEXITSTATUS(status) != 1)) {
				snprintf(Error, sizeof (Error),
				    "'%s' failed", diff);
				fatal(Error);
			}
		}
		linenum += seglim;

		/* Remove temporary files. */
		unlink(otmp);
		unlink(ntmp);
	}
}

/* Routine to save remainder of a file. */
static void
saverest(char **linep, size_t *bufsizp, size_t *buflenp, FILE *iptr)
{
	char *lp;
	FILE *temptr;

	temptr = maket(tempfile);

	lp = *linep;

	while (lp) {
		fwrite(*linep, sizeof **linep, *buflenp, temptr);
		linenum++;
		lp = readline(linep, bufsizp, buflenp, iptr);
	}
	fclose(temptr);
}

/* Routine to write out data saved by 'saverest' and to remove the file. */
static void
putsave(char **linep, size_t *bufsizp, size_t *buflenp, char type)
{
	FILE *temptr;

	if ((temptr = fopen(tempfile, "r")) == NULL) {
		snprintf(Error, sizeof (Error),
		    "Can not open tempfile ('%s')", tempfile); fatal(Error);
	}

	while (readline(linep, bufsizp, buflenp, temptr)) {
		printf("%c ", type);
		fwrite(*linep, sizeof **linep, *buflenp, stdout);
	}

	fclose(temptr);

	unlink(tempfile);
}

static void
fixnum(const char *lp)
{
	off_t num;

	while (*lp) {
		switch (*lp) {

		case 'a':
		case 'c':
		case 'd':
		case ',':
		case '\n':
			printf("%c", *lp);
			lp++;
			break;

		default:
			lp = satoi(lp, &num);
			num += linenum;
			printf("%lld", (long long)num);
		}
	}
}

static void
addgen(char **lpp, size_t *bufsizp, size_t *buflenp, FILE *fp)
{
	off_t oldline;
	printf("%llda%lld", (long long)linenum, (long long)linenum+1);

	/* Save lines of new file. */
	oldline = linenum + 1;
	saverest(lpp, bufsizp, buflenp, fp);

	if (oldline < linenum)
		printf(",%lld\n", (long long)linenum);
	else
		printf("\n");

	/* Output saved lines, as 'diff' would. */
	putsave(lpp, bufsizp, buflenp, '>');

	exit(0);
}

static void
delgen(char **lpp, size_t *bufsizp, size_t *buflenp, FILE *fp)
{
	off_t savenum;

	printf("%lld", (long long)linenum+1);
	savenum = linenum;

	/* Save lines of old file. */
	saverest(lpp, bufsizp, buflenp, fp);

	if (savenum +1 != linenum)
		printf(",%lldd%lld\n", (long long)linenum, (long long)savenum);
	else
		printf("d%lld\n", (long long)savenum);

	/* Output saved lines, as 'diff' would.  */
	putsave(lpp, bufsizp, buflenp, '<');

	exit(0);
}

static void
clean_up(void)
{
	unlink(tempfile);
	unlink(otmp);
	unlink(ntmp);
}

static FILE *
maket(char *file)
{
	FILE *iop = NULL;
	int fd;

	strcpy(file, tempskel);
	if ((fd = mkstemp(file)) == -1 || (iop = fdopen(fd, "w+")) == NULL) {
		snprintf(Error, sizeof (Error),
		    "Can not open/create temp file ('%s')", file);
		fatal(Error);
	}
	return (iop);
}

static void
fatal(const char *msg)
/*
 *	General purpose error handler.
 *
 *	The argument to fatal is a pointer to an error message string.
 *	The action of this routine is driven completely from
 *	the "fflags" global word (see <fatal.h>).
 *
 *	The FTLMSG bit controls the writing of the error
 *	message on file descriptor 2.  A newline is written
 *	after the user supplied message.
 *
 *	If the FTLCLN bit is on, clean_up is called.
 */
{
	if (fflags & FTLMSG)
		fprintf(stderr, "%s: %s\n", prognam, msg);
	if (fflags & FTLCLN)
		clean_up();
	if (fflags & FTLEXIT)
		exit(fatal_num);
}

static void
setsig(void)
/*
 *	General-purpose signal setting routine.
 *	All non-ignored, non-caught signals are caught.
 *	If a signal other than hangup, interrupt, or quit is caught,
 *	a "user-oriented" message is printed on file descriptor 2.
 *	If hangup, interrupt or quit is caught, that signal
 *	is set to ignore.
 *	Termination is like that of "fatal",
 *	via "clean_up()"
 */
{
	void (*act)(int);
	int j;

	for (j = 1; j < ONSIG; j++) {
		act = sigset(j, setsig1);
		if (act == SIG_ERR)
			continue;
		if (act == SIG_DFL)
			continue;
		sigset(j, act);
	}
}

/*ARGSUSED*/
static void
setsig1(int sig)
{

	clean_up();
	exit(1);
}

static char *
satoi(const char *p, off_t *ip)
{
	off_t sum;

	sum = 0;
	while (isdigit(*p))
		sum = sum * 10 + (*p++ - '0');
	*ip = sum;
	return (char *)p;
}

/*
 * Read a line of data from a file.  If the current buffer is not large enough
 * to contain the line, double the size of the buffer and continue reading.
 * Loop until either the entire line is read or until there is no more space
 * to be malloc'd.
 */

#if defined (__GLIBC__) && defined (_IO_getc_unlocked)
#undef	getc
#define	getc(f)	_IO_getc_unlocked(f)
#endif

#define	LSIZE	128
static char *
readline(char **line, size_t *linesize, size_t *length, FILE *fp)
{
	int c;
	size_t n = 0;

	if (*line == NULL || *linesize < LSIZE + n + 1)
		*line = srealloc(*line, *linesize = LSIZE + n + 1);
	for (;;) {
		if (n >= *linesize - LSIZE / 2)
			*line = srealloc(*line, *linesize += LSIZE);
		c = getc(fp);
		if (c != EOF) {
			(*line)[n++] = c;
			(*line)[n] = '\0';
			if (c == '\n')
				break;
		} else {
			if (n > 0)
				break;
			else
				return NULL;
		}
	}
	*length = n;
	return *line;
}

static void *
srealloc(void *p, size_t n)
{
	if ((p = realloc(p, n)) == NULL) {
		write(2, "Out of memory\n", 14);
		_exit(077);
	}
	return p;
}


syntax highlighted by Code2HTML, v. 0.9.1