/*
 * Modifications Copyright 1993, 1994, 1995, 1996, 1999,
 *  2000, 2001, 2004 by Paul Mattes.
 * Original X11 Port Copyright 1990 by Jeff Sparkes.
 *  Permission to use, copy, modify, and distribute this software and its
 *  documentation for any purpose and without fee is hereby granted,
 *  provided that the above copyright notice appear in all copies and that
 *  both that copyright notice and this permission notice appear in
 *  supporting documentation.
 *
 * Copyright 1989 by Georgia Tech Research Corporation, Atlanta, GA 30332.
 *  All Rights Reserved.  GTRC hereby grants public use of this software.
 *  Derivative works based on this software must incorporate this copyright
 *  notice.
 *
 * x3270, c3270, s3270 and tcl3270 are distributed in the hope that they will
 * be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file LICENSE
 * for more details.
 */

/*
 *	charset.c
 *		This module handles character sets.
 */

#include "globals.h"

#include "resources.h"
#include "appres.h"
#include "cg.h"

#include "charsetc.h"
#include "kybdc.h"
#include "popupsc.h"
#if defined(X3270_DISPLAY) /*[*/
#include "screenc.h"
#endif /*]*/
#include "tablesc.h"
#include "utilc.h"
#include "widec.h"

#include <errno.h>

#define EURO_SUFFIX	"-euro"
#define ES_SIZE		(sizeof(EURO_SUFFIX) - 1)

/* Globals. */
Boolean charset_changed = False;
#define DEFAULT_CGEN	0x02b90000
#define DEFAULT_CSET	0x00000025
unsigned long cgcsgid = DEFAULT_CGEN | DEFAULT_CSET;
unsigned long cgcsgid_dbcs = 0L;
char *default_display_charset = "3270cg-1a,3270cg-1,iso8859-1";
char *converter_names;
char *encoding;

/* Statics. */
static enum cs_result resource_charset(char *csname, char *cs, char *ftcs);
typedef enum { CS_ONLY, FT_ONLY, BOTH } remap_scope;
static enum cs_result remap_chars(char *csname, char *spec, remap_scope scope,
    int *ne);
static void remap_one(unsigned char ebc, KeySym iso, remap_scope scope,
    Boolean one_way);
#if defined(DEBUG_CHARSET) /*[*/
static enum cs_result check_charset(void);
static char *char_if_ascii7(unsigned long l);
#endif /*]*/
static void set_cgcsgids(char *spec);
static int set_cgcsgid(char *spec, unsigned long *idp);
static void set_charset_name(char *csname);

static char *charset_name = CN;

static void
charset_defaults(void)
{
	/* Go to defaults first. */
	(void) memcpy((char *)ebc2cg, (char *)ebc2cg0, 256);
	(void) memcpy((char *)cg2ebc, (char *)cg2ebc0, 256);
	(void) memcpy((char *)ebc2asc, (char *)ebc2asc0, 256);
	(void) memcpy((char *)asc2ebc, (char *)asc2ebc0, 256);
#if defined(X3270_FT) /*[*/
	(void) memcpy((char *)ft2asc, (char *)ft2asc0, 256);
	(void) memcpy((char *)asc2ft, (char *)asc2ft0, 256);
#endif /*]*/
	clear_xks();
}

static unsigned char save_ebc2cg[256];
static unsigned char save_cg2ebc[256];
static unsigned char save_ebc2asc[256];
static unsigned char save_asc2ebc[256];
#if defined(X3270_FT) /*[*/
static unsigned char save_ft2asc[256];
static unsigned char save_asc2ft[256];
#endif /*]*/

static void
save_charset(void)
{
	(void) memcpy((char *)save_ebc2cg, (char *)ebc2cg, 256);
	(void) memcpy((char *)save_cg2ebc, (char *)cg2ebc, 256);
	(void) memcpy((char *)save_ebc2asc, (char *)ebc2asc, 256);
	(void) memcpy((char *)save_asc2ebc, (char *)asc2ebc, 256);
#if defined(X3270_FT) /*[*/
	(void) memcpy((char *)save_ft2asc, (char *)ft2asc, 256);
	(void) memcpy((char *)save_asc2ft, (char *)asc2ft, 256);
#endif /*]*/
}

static void
restore_charset(void)
{
	(void) memcpy((char *)ebc2cg, (char *)save_ebc2cg, 256);
	(void) memcpy((char *)cg2ebc, (char *)save_cg2ebc, 256);
	(void) memcpy((char *)ebc2asc, (char *)save_ebc2asc, 256);
	(void) memcpy((char *)asc2ebc, (char *)save_asc2ebc, 256);
#if defined(X3270_FT) /*[*/
	(void) memcpy((char *)ft2asc, (char *)save_ft2asc, 256);
	(void) memcpy((char *)asc2ft, (char *)save_asc2ft, 256);
#endif /*]*/
}

/* Get a character set definition. */
static char *
get_charset_def(const char *csname)
{
	return get_fresource("%s.%s", ResCharset, csname);
}

#if defined(X3270_DBCS) /*[*/
/*
 * Initialize the DBCS conversion functions, based on resource values.
 */
static int
wide_resource_init(char *csname)
{
	char *cn, *en;

	cn = get_fresource("%s.%s", ResDbcsConverters, csname);
	if (cn == CN)
		return 0;

	en = get_fresource("%s.%s", ResLocalEncoding, csname);
	if (en == CN)
		en = appres.local_encoding;
	Replace(converter_names, cn);
	Replace(encoding, en);

	return wide_init(cn, en);

}
#endif /*]*/

/*
 * Change character sets.
 */
enum cs_result
charset_init(char *csname)
{
	char *cs, *ftcs;
	enum cs_result rc;
	char *ccs, *cftcs;

	/* Do nothing, successfully. */
	if (csname == CN || !strcasecmp(csname, "us")) {
		charset_defaults();
		set_cgcsgids(CN);
		set_charset_name(CN);
#if defined(X3270_DISPLAY) /*[*/
		(void) screen_new_display_charsets(default_display_charset,
		    "us");
#endif /*]*/
		return CS_OKAY;
	}

	/* Figure out if it's already in a resource or in a file. */
	cs = get_charset_def(csname);
	if (cs == CN &&
	    strlen(csname) > ES_SIZE &&
	    !strcasecmp(csname + strlen(csname) - ES_SIZE, EURO_SUFFIX)) {
		char *basename;

		/* Grab the non-Euro definition. */
		basename = xs_buffer("%.*s", strlen(csname) - ES_SIZE, csname);
		cs = get_charset_def(basename);
		Free(basename);
	}
	if (cs == CN)
		return CS_NOTFOUND;

	/* Grab the File Transfer character set. */
	ftcs = get_fresource("%s.%s", ResFtCharset, csname);

	/* Copy strings. */
	ccs = NewString(cs);
	cftcs = (ftcs == NULL)? NULL: NewString(ftcs);

	/* Save the current definitions, and start over with the defaults. */
	save_charset();
	charset_defaults();

	/* Interpret them. */
	rc = resource_charset(csname, ccs, cftcs);

	/* Free them. */
	Free(ccs);
	Free(cftcs);

#if defined(DEBUG_CHARSET) /*[*/
	if (rc == CS_OKAY)
		rc = check_charset();
#endif /*]*/

	if (rc != CS_OKAY)
		restore_charset();
#if defined(X3270_DBCS) /*[*/
	else if (wide_resource_init(csname) < 0) {
		restore_charset();
		return CS_NOTFOUND;
	}
#endif /*]*/

	return rc;
}

/* Set a CGCSGID.  Return 0 for success, -1 for failure. */
static int
set_cgcsgid(char *spec, unsigned long *r)
{
	unsigned long cp;
	char *ptr;

	if (spec != CN &&
	    (cp = strtoul(spec, &ptr, 0)) &&
	    ptr != spec &&
	    *ptr == '\0') {
		if (!(cp & ~0xffffL))
			*r = DEFAULT_CGEN | cp;
		else
			*r = cp;
		return 0;
	} else
		return -1;
}

/* Set the CGCSGIDs. */
static void
set_cgcsgids(char *spec)
{
	int n_ids = 0;
	char *spec_copy;
	char *buf;
	char *token;

	if (spec != CN) {
		buf = spec_copy = NewString(spec);
		while (n_ids >= 0 && (token = strtok(buf, "+")) != CN) {
			unsigned long *idp = NULL;

			buf = CN;
			switch (n_ids) {
			case 0:
			    idp = &cgcsgid;
			    break;
#if defined(X3270_DBCS) /*[*/
			case 1:
			    idp = &cgcsgid_dbcs;
			    break;
#endif /*]*/
			default:
			    popup_an_error("Extra CGCSGID(s), ignoring");
			    break;
			}
			if (idp == NULL)
				break;
			if (set_cgcsgid(token, idp) < 0) {
				popup_an_error("Invalid CGCSGID '%s', ignoring",
				    token);
				n_ids = -1;
				break;
			}
			n_ids++;
		}
		Free(spec_copy);
		if (n_ids > 0)
			return;
	}

	cgcsgid = DEFAULT_CGEN | DEFAULT_CSET;
#if defined(X3270_DBCS) /*[*/
	cgcsgid_dbcs = 0L;
#endif /*]*/
}

/* Set the global charset name. */
static void
set_charset_name(char *csname)
{
	if (csname == CN) {
		Replace(charset_name, NewString("us"));
		charset_changed = False;
		return;
	}
	if ((charset_name != CN && strcmp(charset_name, csname)) ||
	    (appres.charset != CN && strcmp(appres.charset, csname))) {
		Replace(charset_name, NewString(csname));
		charset_changed = True;
	}
}

/* Define a charset from resources. */
static enum cs_result
resource_charset(char *csname, char *cs, char *ftcs)
{
	enum cs_result rc;
	int ne = 0;
	char *rcs = CN;
	int n_rcs = 0;

	/* Interpret the spec. */
	rc = remap_chars(csname, cs, (ftcs == NULL)? BOTH: CS_ONLY, &ne);
	if (rc != CS_OKAY)
		return rc;
	if (ftcs != NULL) {
		rc = remap_chars(csname, ftcs, FT_ONLY, &ne);
		if (rc != CS_OKAY)
			return rc;
	}

	rcs = get_fresource("%s.%s", ResDisplayCharset, csname);

	/* Isolate the pieces. */
	if (rcs != CN) {
		char *rcs_copy, *buf, *token;

		buf = rcs_copy = NewString(rcs);
		while ((token = strtok(buf, "+")) != CN) {
			buf = CN;
			switch (n_rcs) {
			case 0:
#if defined(X3270_DBCS) /*[*/
			case 1:
#endif /*]*/
			    break;
			default:
			    popup_an_error("Extra %s value(s), ignoring",
				ResDisplayCharset);
			    break;
			}
			n_rcs++;
		}
	}

#if defined(X3270_DBCS) /*[*/
	/* Can't swap DBCS modes while connected. */
	if (IN_3270 && (n_rcs == 2) != dbcs) {
		popup_an_error("Can't change DBCS modes while connected");
		return CS_ILLEGAL;
	}
#endif /*]*/

#if defined(X3270_DISPLAY) /*[*/
	if (!screen_new_display_charsets(
		    rcs? rcs: default_display_charset,
		    csname)) {
		return CS_PREREQ;
	}
#else /*][*/
#if defined(X3270_DBCS) /*[*/
	if (n_rcs > 1)
		dbcs = True;
	else
		dbcs = False;
#endif /*]*/
#endif /*]*/

	/* Set up the cgcsgid. */
	set_cgcsgids(get_fresource("%s.%s", ResCodepage, csname));

	/* Set up the character set name. */
	set_charset_name(csname);

	return CS_OKAY;
}

/*
 * Map a keysym name or literal string into a character.
 * Returns NoSymbol if there is a problem.
 */
static KeySym
parse_keysym(char *s, Boolean extended)
{
	KeySym	k;

	k = StringToKeysym(s);
	if (k == NoSymbol) {
		if (strlen(s) == 1)
			k = *s & 0xff;
		else if (s[0] == '0' && s[1] == 'x') {
			unsigned long l;
			char *ptr;

			l = strtoul(s, &ptr, 16);
			if (*ptr != '\0' || (l & ~0xff))
				return NoSymbol;
			return (KeySym)l;
		} else
			return NoSymbol;
	}
	if (k < ' ' || (!extended && k > 0xff))
		return NoSymbol;
	else
		return k;
}

/* Process a single character definition. */
static void
remap_one(unsigned char ebc, KeySym iso, remap_scope scope, Boolean one_way)
{
	unsigned char cg;

	/* Ignore mappings of EBCDIC control codes and the space character. */
	if (ebc <= 0x40)
		return;

	/* If they want to map to a NULL or a blank, make it a one-way blank. */
	if (iso == 0x0)
		iso = 0x20;
	if (iso == 0x20)
		one_way = True;

	if (iso <= 0xff) {
#if defined(X3270_FT) /*[*/
		unsigned char aa;
#endif /*]*/

		if (scope == BOTH || scope == CS_ONLY) {
			cg = asc2cg[iso];

			if (cg2asc[cg] == iso || iso == 0) {
				/* well-defined */
				ebc2cg[ebc] = cg;
				if (!one_way)
					cg2ebc[cg] = ebc;
			} else {
				/* into a hole */
				ebc2cg[ebc] = CG_boxsolid;
			}
			if (ebc > 0x40) {
				ebc2asc[ebc] = iso;
				if (!one_way)
					asc2ebc[iso] = ebc;
			}
		}
#if defined(X3270_FT) /*[*/
		if (ebc > 0x40) {
			/* Change the file transfer translation table. */
			if (scope == BOTH) {
				/*
				 * We have an alternate mapping of an EBCDIC
				 * code to an ASCII code.  Modify the existing
				 * ASCII(ft)-to-ASCII(desired) maps.
				 *
				 * This is done by figuring out which ASCII
				 * code the host usually translates the given
				 * EBCDIC code to (asc2ft0[ebc2asc0[ebc]]).
				 * Now we want to translate that code to the
				 * given ISO code, and vice-versa.
				 */
				aa = asc2ft0[ebc2asc0[ebc]];
				if (aa != ' ') {
					ft2asc[aa] = iso;
					asc2ft[iso] = aa;
				}
			} else if (scope == FT_ONLY) {
				/*
				 * We have a map of how the host translates
				 * the given EBCDIC code to an ASCII code.
				 * Generate the translation between that code
				 * and the ISO code that we would normally
				 * use to display that EBCDIC code.
				 */
				ft2asc[iso] = ebc2asc[ebc];
				asc2ft[ebc2asc[ebc]] = iso;
			}
		}
#endif /*]*/
	} else {
		add_xk(iso, (KeySym)ebc2asc[ebc]);
	}
}

/*
 * Parse an EBCDIC character set map, a series of pairs of numeric EBCDIC codes
 * and keysyms.
 *
 * If the keysym is in the range 1..255, it is a remapping of the EBCDIC code
 * for a standard Latin-1 graphic, and the CG-to-EBCDIC map will be modified
 * to match.
 *
 * Otherwise (keysym > 255), it is a definition for the EBCDIC code to use for
 * a multibyte keysym.  This is intended for 8-bit fonts that with special
 * characters that replace certain standard Latin-1 graphics.  The keysym
 * will be entered into the extended keysym translation table.
 */
static enum cs_result
remap_chars(char *csname, char *spec, remap_scope scope, int *ne)
{
	char *s;
	char *ebcs, *isos;
	unsigned char ebc;
	KeySym iso;
	int ns;
	enum cs_result rc = CS_OKAY;
	Boolean is_table = False;
	Boolean one_way = False;

	/* Pick apart a copy of the spec. */
	s = spec = NewString(spec);
	while (isspace(*s)) {
		s++;
	}
	if (!strncmp(s, "#table", 6)) {
		is_table = True;
		s += 6;
	}

	if (is_table) {
		int ebc = 0;
		char *tok;
		char *ptr;

		while ((tok = strtok(s, " \t\n")) != CN) {
			if (ebc >= 256) {
				popup_an_error("Charset has more than 256 "
				    "entries");
				rc = CS_BAD;
				break;
			}
			if (tok[0] == '*') {
				one_way = True;
				tok++;
			} else
				one_way = False;
			iso = strtoul(tok, &ptr, 0);
			if (ptr == tok || *ptr != '\0' || iso > 256L) {
				if (strlen(tok) == 1)
					iso = tok[0] & 0xff;
				else {
					popup_an_error("Invalid charset "
					    "entry '%s' (#%d)",
					    tok, ebc);
					rc = CS_BAD;
					break;
				}
			}
			remap_one(ebc, iso, scope, one_way);

			ebc++;
			s = CN;
		}
		if (ebc != 256) {
			popup_an_error("Charset has %d entries, need 256", ebc);
			rc = CS_BAD;
		} else {
			/*
			 * The entire EBCDIC-to-ASCII mapping has been defined.
			 * Make sure that any printable ASCII character that
			 * doesn't now map back onto itself is mapped onto an
			 * EBCDIC NUL.
			 */
			int i;

			for (i = 0; i < 256; i++) {
				if ((i & 0x7f) > 0x20 && i != 0x7f &&
						asc2ebc[i] != 0 &&
						ebc2asc[asc2ebc[i]] != i) {
					asc2ebc[i] = 0;
				}
			}
		}
	} else {
		while ((ns = split_dresource(&s, &ebcs, &isos))) {
			char *ptr;

			(*ne)++;
			if (ebcs[0] == '*') {
				one_way = True;
				ebcs++;
			} else
				one_way = False;
			if (ns < 0 ||
			    ((ebc = strtoul(ebcs, &ptr, 0)),
			     ptr == ebcs || *ptr != '\0') ||
			    (iso = parse_keysym(isos, True)) == NoSymbol) {
				popup_an_error("Cannot parse %s \"%s\", entry %d",
				    ResCharset, csname, *ne);
				rc = CS_BAD;
				break;
			}
			remap_one(ebc, iso, scope, one_way);
		}
	}
	Free(spec);
	return rc;
}

#if defined(DEBUG_CHARSET) /*[*/
static char *
char_if_ascii7(unsigned long l)
{
	static char buf[6];

	if (((l & 0x7f) > ' ' && (l & 0x7f) < 0x7f) || l == 0xff) {
		(void) sprintf(buf, " ('%c')", (char)l);
		return buf;
	} else
		return "";
}
#endif /*]*/


#if defined(DEBUG_CHARSET) /*[*/
/*
 * Verify that a character set is not ambiguous.
 * (All this checks is that multiple EBCDIC codes map onto the same ISO code.
 *  Hmm.  God, I find the CG stuff confusing.)
 */
static enum cs_result
check_charset(void)
{
	unsigned long iso;
	unsigned char ebc;
	enum cs_result rc = CS_OKAY;

	for (iso = 1; iso <= 255; iso++) {
		unsigned char multi[256];
		int n_multi = 0;

		if (iso == ' ')
			continue;

		for (ebc = 0x41; ebc < 0xff; ebc++) {
			if (cg2asc[ebc2cg[ebc]] == iso) {
				multi[n_multi] = ebc;
				n_multi++;
			}
		}
		if (n_multi > 1) {
			xs_warning("Display character 0x%02x%s has multiple "
			    "EBCDIC definitions: X'%02X', X'%02X'%s",
			    iso, char_if_ascii7(iso),
			    multi[0], multi[1], (n_multi > 2)? ", ...": "");
			rc = CS_BAD;
		}
	}
	return rc;
}
#endif /*]*/

/* Return the current character set name. */
char *
get_charset_name(void)
{
	return (charset_name != CN)? charset_name:
	    ((appres.charset != CN)? appres.charset: "us");
}


syntax highlighted by Code2HTML, v. 0.9.1