/* Time-stamp: <2007-02-05 22:04:42 poser>
 *
 * This program is a filter that reverses its input character by character.
 * It works on both ASCII and UTF-8 Unicode.
 * 
 * Copyright (C) 2007 William J. Poser (billposer@alum.mit.edu)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * or go to the web page:  http://www.gnu.org/licenses/gpl.txt.
 */

#include "config.h"
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
#ifdef HAVE_LIBINTL_H
#include <libintl.h>
#else
#define gettext(x) (x)
#endif

char compdate[]="Compiled " __DATE__ " " __TIME__ ;
char pgname[]="unirev";

void
ShowUsage(void){
  fprintf(stderr,"Read UTF-8 input line-by-line and emit reversed character-by-character.\n");
  fprintf(stderr,"       -h Print help information.\n");
  fprintf(stderr,"       -v Print version information.\n");
  putc('\n',stderr);
}

void
ShowVersion(void){
  fprintf(stderr,"\n%s  %s\n",pgname,PACKAGE_VERSION);
  fprintf(stderr,"%s\n",compdate);
  fprintf(stderr,"Copyright (C) 2007 William J. Poser\n");
  fprintf(stderr,"This program is free software; you can redistribute it and/or modify\n");
  fprintf(stderr,"it under the terms of version 2 of the GNU General Public License\n");
  fprintf(stderr,"as published by the Free Software Foundation.\n");
  fprintf(stderr,"Report bugs to: billposer@alum.mit.edu\n");
}

static const char TrailingBytes[256] = {
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};

int main(int ac, char **av) {
  int len;
  int i;
  int j;
  int PreviousLeadByte;
  char *obuf;
  char *optr;
  int BytesInChar;
  char *ibuf;
  unsigned long LineCnt = 0L;

  char * GetLine(FILE *, int *);

  if (ac > 1) {
    if (av[1][0] == '-') {
      if(av[1][1] == 'v') {ShowVersion(); exit(1);}
      else if(av[1][1] == 'h') {ShowUsage(); exit(1);}
      else {fprintf(stderr,"Option %c not recognized.\n",av[1][1]); exit(2);}
    }
    ShowUsage();exit(1);
  }

  /* 
   * The stratagey here is to work from the end of the line looking for
   * lead bytes. On finding one, we emit it plus the requisite number of
   * continuation bytes.
   */
  while(1) {
    ibuf = GetLine(stdin,&len);
    if(len < 0) break;
    LineCnt++; 
    if(len == 0) putchar('\n'); 

    obuf = malloc((len + 1) * sizeof(char));
    if(!obuf) {
      fprintf(stderr,"unirev: failed to allocate storage.\n");
      exit(2);
    }
    optr = obuf;
    PreviousLeadByte = len;
    for (i = len-1; i >= 0; i--) {
      if (ibuf[i] >= 0) {
	*optr++ = ibuf[i]; /* ASCII character */
	PreviousLeadByte = i;
      }
      else if (ibuf[i] & 0x40)  {
	BytesInChar = 1 + (int) TrailingBytes[(unsigned char) ibuf[i]];
	if (i + BytesInChar > PreviousLeadByte) {
	  fprintf(stderr,"Truncated UTF-8 sequence at byte %d of line %lu\n",i+1,LineCnt);
	  fprintf(stderr,"%d continuation bytes %s required but only %d %s present.\n",
		  BytesInChar-1,
		  (BytesInChar-1) >1?"are":"is",
		  PreviousLeadByte-i-1,
		  (PreviousLeadByte-i-i)>1?"are":"is");
	  exit(3);
	}
	PreviousLeadByte = i;
	for(j=0; j < BytesInChar; j++) *optr++ = ibuf[i+j];
      }
    }
    *optr = '\0';
    puts(obuf);
    free(obuf);
    free(ibuf);
  }
  exit(0);
}


/* 
 * Read a line of arbitrary length from a file.
 *
 * Return a pointer to the null-terminated string allocated, or null on failure
 * to allocate sufficient storage.
 * It is the responsibility of the caller to free the space allocated.
 *
 * The length of the line is placed in the variable pointed to by
 * the second argument. (-1) is placed in this variable on EOF.
 */

#define INITLENGTH 32

char * GetLine(FILE *fp, int *LineLength)
{
  char c;
  int Available;
  int CharsRead;
  char *Line;
  int BytesRead;

  Available = INITLENGTH;
  CharsRead=0;
  BytesRead=0;  
  Line = (char *) malloc((size_t)Available);
  if(Line == (char *) 0) return (Line);

  while(1){
    c=getc(fp);
    if(c == '\n'){
      Line[CharsRead]='\0';
      *LineLength=CharsRead;
      return(Line);
    }
    if(c == EOF){
      Line[CharsRead]='\0';
      if(BytesRead == 0) *LineLength = (-1); /* Signal EOF */
      else *LineLength=CharsRead;
      return(Line);
    }
    BytesRead++;
    if(CharsRead == (Available-1)){ /* -1 because of null */
      Available += INITLENGTH/2;
      Line = (char *) realloc( (void *) Line, (size_t) (Available * sizeof (char)));
      if(Line == (char *) 0) return(Line);
    }
    Line[CharsRead++]=c;
  }
}


syntax highlighted by Code2HTML, v. 0.9.1