/* @(#)unicode.c 1.4 02/06/10 Copyright 2001 J. Schilling */ #ifndef lint static char sccsid[] = "@(#)unicode.c 1.4 02/06/10 Copyright 2001 J. Schilling"; #endif /* * Routines to convert from/to UNICODE * * This is currently a very simple implementation that only * handles ISO-8859-1 coding. There should be a better solution * in the future. * * Copyright (c) 2001 J. Schilling */ /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include "star.h" #include #include #include "starsubs.h" EXPORT int to_utf8 __PR((Uchar *to, Uchar *from)); EXPORT int to_utf8l __PR((Uchar *to, Uchar *from, int len)); EXPORT BOOL from_utf8 __PR((Uchar *to, Uchar *from)); EXPORT BOOL from_utf8l __PR((Uchar *to, Uchar *from, int *len)); EXPORT int to_utf8(to, from) register Uchar *to; register Uchar *from; { register Uchar *oto = to; register Uchar c; while ((c = *from++) != '\0') { if (c <= 0x7F) { *to++ = c; } else if (c <= 0xBF) { *to++ = 0xC2; *to++ = c; } else { /*c <= 0xFF */ *to++ = 0xC3; *to++ = c & 0xBF; } } *to = '\0'; return (to - oto); } EXPORT int to_utf8l(to, from, len) register Uchar *to; register Uchar *from; register int len; { register Uchar *oto = to; register Uchar c; while (--len >= 0) { c = *from++; if (c <= 0x7F) { *to++ = c; } else if (c <= 0xBF) { *to++ = 0xC2; *to++ = c; } else { /*c <= 0xFF */ *to++ = 0xC3; *to++ = c & 0xBF; } } *to = '\0'; return (to - oto); } EXPORT BOOL from_utf8(to, from) register Uchar *to; register Uchar *from; { register Uchar c; register BOOL ret = TRUE; while ((c = *from++) != '\0') { if (c <= 0x7F) { *to++ = c; } else if (c == 0xC0) { *to++ = *from++ & 0x7F; } else if (c == 0xC1) { *to++ = (*from++ | 0x40) & 0x7F; } else if (c == 0xC2) { *to++ = *from++; } else if (c == 0xC3) { *to++ = *from++ | 0x40; } else { ret = FALSE; /* unknown/illegal UTF-8 char*/ *to++ = '_'; /* use default character */ if (c < 0xE0) { from++; /* 2 bytes in total */ } else if (c < 0xF0) { from += 2; /* 3 bytes in total */ } else if (c < 0xF8) { from += 3; /* 4 bytes in total */ } else if (c < 0xFC) { from += 4; /* 5 bytes in total */ } else if (c < 0xFE) { from += 5; /* 6 bytes in total */ } else { while ((c = *from) != '\0') { /* * Test for 7 bit ASCII + non prefix */ if (c <= 0xBF) break; from++; } } } } *to = '\0'; return (ret); } EXPORT BOOL from_utf8l(to, from, lenp) register Uchar *to; register Uchar *from; int *lenp; { register Uchar *oto = to; register Uchar c; register BOOL ret = TRUE; register int len = *lenp; while (--len >= 0) { c = *from++; if (c <= 0x7F) { *to++ = c; } else if (c == 0xC0) { *to++ = *from++ & 0x7F; len--; } else if (c == 0xC1) { *to++ = (*from++ | 0x40) & 0x7F; len--; } else if (c == 0xC2) { *to++ = *from++; len--; } else if (c == 0xC3) { *to++ = *from++ | 0x40; len--; } else { ret = FALSE; /* unknown/illegal UTF-8 char*/ *to++ = '_'; /* use default character */ if (c < 0xE0) { from++; /* 2 bytes in total */ len--; } else if (c < 0xF0) { from += 2; /* 3 bytes in total */ len -= 2; } else if (c < 0xF8) { from += 3; /* 4 bytes in total */ len -= 3; } else if (c < 0xFC) { from += 4; /* 5 bytes in total */ len -= 4; } else if (c < 0xFE) { from += 5; /* 6 bytes in total */ len -= 5; } else { while (len > 0) { c = *from; /* * Test for 7 bit ASCII + non prefix */ if (c <= 0xBF) break; from++; len--; } } } } *to = '\0'; *lenp = (to - oto); return (ret); }