/* ** THIS SOFTWARE IS SUBJECT TO COPYRIGHT PROTECTION AND IS OFFERED ONLY ** PURSUANT TO THE 3DFX GLIDE GENERAL PUBLIC LICENSE. THERE IS NO RIGHT ** TO USE THE GLIDE TRADEMARK WITHOUT PRIOR WRITTEN PERMISSION OF 3DFX ** INTERACTIVE, INC. A COPY OF THIS LICENSE MAY BE OBTAINED FROM THE ** DISTRIBUTOR OR BY CONTACTING 3DFX INTERACTIVE INC(info@3dfx.com). ** THIS PROGRAM IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER ** EXPRESSED OR IMPLIED. SEE THE 3DFX GLIDE GENERAL PUBLIC LICENSE FOR A ** FULL TEXT OF THE NON-WARRANTY PROVISIONS. ** ** USE, DUPLICATION OR DISCLOSURE BY THE GOVERNMENT IS SUBJECT TO ** RESTRICTIONS AS SET FORTH IN SUBDIVISION (C)(1)(II) OF THE RIGHTS IN ** TECHNICAL DATA AND COMPUTER SOFTWARE CLAUSE AT DFARS 252.227-7013, ** AND/OR IN SIMILAR OR SUCCESSOR CLAUSES IN THE FAR, DOD OR NASA FAR ** SUPPLEMENT. UNPUBLISHED RIGHTS RESERVED UNDER THE COPYRIGHT LAWS OF ** THE UNITED STATES. ** ** COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED ** ** $Revision: 1.2.2.2 $ ** $Date: 2000/11/14 14:11:20 $ */ #include #include #include #include #include "texusint.h" /* * This file implements the neural net quantizer, which takes an image in * ARGB8888 format and produces an optimal YAB table, and an 8 bit image * in YAB format that best represents the original image. A very detailed * explanation of the algorithm is available in * /tdfx/engr/devel/sst1/docs/yab.doc. The summary follows. * * Neural net algorithms first determine a "representative sample" of the * input image. This representative sample is repeatedly run through the net * during the network learning stage, and the neural net "learns" which colors * are important and which ones are not. It's quite possible to feed every * pixel in the original image repeatedly into the neural net * to make it learn; however, this can be extremely time consuming. * * So, we prefer to make a representative sample of colors for the input image. * The original yab.doc suggests we try to derive sample colors by reducing the * ARGB8888 colors to RGB555. I've found that simply color quantizing to 256 * colors (just like for the 8-bit palettized case) works quite well, and * so we first quantize to 8 bit palette, and use the palette as the sample * colors to feed the neural network. This also makes using 256-palette * textures very easy. * * After the "representative colors" are determined, we train the neural net, * and obtain the optimal YAB table. Each sample color in the palette, * which was originally in ARGB8888 format is now replaced with IRGB8888, * where the RGB is the same as before, but the alpha channel is replaced * with an 8 bit number I which is the YAB index corresponding to this * representative color. * * So now it's possible to translate the original image into an 8 bit image * by first looking up the original pixel in the representative colors table, * extracting the alpha channel, and using it as the index into the YAB table * delivered by the algorithm. * * In the process of converting the original image to the YAB format, we could * optionally dither the image. Ordered dithering doesn't quite work, so we * use error-diffusion dithering. * * I've found that there are three speed bottlenecks to overcome. The first * time consuming operation is the computation of representative image colors. * 256 color quantization is used for this part. The second bottleneck is the * training of the neural net algorithm itself, and I've optimized this as * much as possible. The third bottleneck is the translation of the original * image into the 8 bit YAB indexed image; this still needs work, especially * when error diffusion dithering is enabled. * * So, now, onto business. */ /****************************************************************************** * * The hardcore neural net stuff begins here. * */ #define MAX_ITERATIONS 4000 #define MAX_DRYSPELLS 2000 #define MAX_NEURONS 256 typedef struct _weight { int r, g, b; // fixed point, SUBPIXEL precision bits int ir, ig, ib; // pure integers, maybe -256 to 255. } Weight; typedef struct _vector { Weight *py, *pa, *pb; int r, g, b; // pure integers, 0 to 255. } Neuron; static Weight Y[16], A[4], B[4]; static Neuron N[MAX_NEURONS]; static int errR, errG, errB, errMax; static int totR, totG, totB; #define SUBPIXEL 22 #define ABS(x) (((x) < 0) ? (-(x)) : (x)) #define INT(x) ((x) >> SUBPIXEL) #define CLAMP_255(x) if (x < 0) x = 0; else if (x > 255) x = 255 #define CLAMP_PLUS(x) if (x < 0) x = 0; else if (x > ((256 << SUBPIXEL)-1)) \ x = ((256 << SUBPIXEL) -1) #define CLAMP_BOTH(x) if (x < (-256 << SUBPIXEL)) x = (-256 << SUBPIXEL); \ else if (x > ((256 << SUBPIXEL) - 1)) \ x = ((256 << SUBPIXEL) -1) static int _nn_modifyNeurons(int ir, int ig, int ib) { int i; int d0, d1; // closest & next closest distance to input int p0, p1; // index into the 256 color table. int d, dr, dg, db; Weight *py, *pa, *pb; Neuron *n; d0 = d1 = 0x7fffffff; p0 = p1 = 0; /* Find 2 neurons with smallest distances to r, g, b */ for (i=0, n=N; i<256; i++, n++) { py = n->py; pa = n->pa; pb = n->pb; n->r = py->ir + pa->ir + pb->ir; CLAMP_255(n->r); n->g = py->ir + pa->ig + pb->ig; CLAMP_255(n->g); n->b = py->ir + pa->ib + pb->ib; CLAMP_255(n->b); d = DISTANCE(n->r, n->g, n->b, ir, ig, ib); if (d < d0) { d1 = d0; d0 = d; p1 = p0; p0 = i; } else if (d < d1) { d1 = d; p1 = i; } } /* track errors */ dr = ABS(N[p0].r - ir); dg = ABS(N[p0].g - ig); db = ABS(N[p0].b - ib); totR += dr; totG += dg; totB += db; if (errMax < d0) { errMax = d0; errR = dr; errG = dg; errB = db; } // Modify weights for d0 and d1 at positions p0 and p1. // The 16 comes from 8.16 format, 4 comes from alpha, the learing rate, set // at 1/16 for now. // py->r += (ar * 0.25f + ag * 0.5f + ab * 0.25f) * alpha; // pa->r += ar * 0.25f * alpha; // pb->r += ar * 0.25f * alpha; // For the closest neuron, apply maximal learning rate. dr = (ir - N[p0].r) << (SUBPIXEL - 1); dg = (ig - N[p0].g) << (SUBPIXEL - 1); db = (ib - N[p0].b) << (SUBPIXEL - 1); py = N[p0].py; pa = N[p0].pa; pb = N[p0].pb; py->r += (dr >> 2) + (dg >> 1) + (db >> 2); CLAMP_PLUS(py->r); pa->r += (dr >> 2) ; CLAMP_BOTH(pa->r); pa->g += (dg >> 2) ; CLAMP_BOTH(pa->g); pa->b += (db >> 2) ; CLAMP_BOTH(pa->b); pb->r += (dr >> 2) ; CLAMP_BOTH(pb->r); pb->g += (dg >> 2) ; CLAMP_BOTH(pb->g); pb->b += (db >> 2) ; CLAMP_BOTH(pb->b); py->ir = INT(py->r); pa->ir = INT(pa->r); pa->ig = INT(pa->g); pa->ib = INT(pa->b); pb->ir = INT(pb->r); pb->ig = INT(pb->g); pb->ib = INT(pb->b); // For the next closest neuron, apply 1/4 the learning rate. dr = (ir - N[p1].r) << (SUBPIXEL - 2); dg = (ig - N[p1].g) << (SUBPIXEL - 2); db = (ib - N[p1].b) << (SUBPIXEL - 2); py = N[p1].py; pa = N[p1].pa; pb = N[p1].pb; py->r += (dr >> 2) + (dg >> 1) + (db >> 2); CLAMP_PLUS(py->r); pa->r += (dr >> 2) ; CLAMP_BOTH(pa->r); pa->g += (dg >> 2) ; CLAMP_BOTH(pa->g); pa->b += (db >> 2) ; CLAMP_BOTH(pa->b); pb->r += (dr >> 2) ; CLAMP_BOTH(pb->r); pb->g += (dg >> 2) ; CLAMP_BOTH(pb->g); pb->b += (db >> 2) ; CLAMP_BOTH(pb->b); py->ir = INT(py->r); pa->ir = INT(pa->r); pa->ig = INT(pa->g); pa->ib = INT(pa->b); pb->ir = INT(pb->r); pb->ig = INT(pb->g); pb->ib = INT(pb->b); return p0; // best fit neuron is returned. } static void _nn_initTables() { int i; // Intensity ramp for (i=0; i<16; i++) { Y[i].r = ((long) ((255.0f * i)/15.0f)) << SUBPIXEL; Y[i].ir = INT(Y[i].r); } for (i=0; i< 4; i++) { A[i].r = A[i].g = A[i].b = 0; A[i].ir = INT(A[i].r); A[i].ig = INT(A[i].g); A[i].ib = INT(A[i].b); B[i].r = B[i].g = B[i].b = 0; B[i].ir = INT(B[i].r); B[i].ig = INT(B[i].g); B[i].ib = INT(B[i].b); } for (i=0; i> 4) & 0x0f; ia = (i >> 2) & 0x03; ib = (i >> 0) & 0x03; N[i].py = &Y[iy]; N[i].pa = &A[ia]; N[i].pb = &B[ib]; N[i].r = INT(Y[iy].r)+INT(A[ia].r)+INT(B[ib].r);CLAMP_255(N[i].r); N[i].g = INT(Y[iy].r)+INT(A[ia].g)+INT(B[ib].g);CLAMP_255(N[i].g); N[i].b = INT(Y[iy].r)+INT(A[ia].b)+INT(B[ib].b);CLAMP_255(N[i].b); } } static int order[256]; static int _nn_randomOrder(const void *a, const void *b) { a = b; // no compiler warnings return (rand() % 3) - 1; } static void txMapPal256toYAB(FxU32 *YAB, FxU8 *map, int nsamples, FxU32 *samples) { int i; int bstR, bstG, bstB, bstMax; int iterations; // track how many inputs have been fed to NN int drySpells; // how many inputs since last best case. int yab2pal[256]; _nn_initTables(); /* * Select a number which is relatively prime to nsamples. */ for (i=0; i> 16) & 0xFF), ((*pRGB >> 8) & 0xFF), ((*pRGB) & 0xFF)); } iterations += nsamples; if (errMax < bstMax) { /* * A lower total error than before, take a Snapshot * * YAB[] has 16 entries for Y, 12 entries each for A & B. */ for (i=0; i<16; i++) { YAB[i] = Y[i].ir; if ((Y[i].ir < 0) || (Y[i].ir > 255)) { txPanic("Bad Y!\n"); } } for (i=0; i<4; i++) { YAB[16 + 3*i + 0] = A[i].ir; YAB[16 + 3*i + 1] = A[i].ig; YAB[16 + 3*i + 2] = A[i].ib; if ((A[i].ir < -256) || (A[i].ir > 255) || (A[i].ig < -256) || (A[i].ig > 255) || (A[i].ib < -256) || (A[i].ib > 255)) txPanic("Bad A!\n"); } for (i=0; i<4; i++) { YAB[28 + 3*i + 0] = B[i].ir; YAB[28 + 3*i + 1] = B[i].ig; YAB[28 + 3*i + 2] = B[i].ib; if ((B[i].ir < -256) || (B[i].ir > 255) || (B[i].ig < -256) || (B[i].ig > 255) || (B[i].ib < -256) || (B[i].ib > 255)) txPanic("Bad B!\n"); } bstMax = errMax; bstR = errR; bstG = errG; bstB = errB; #if 0 printf("%8d%, dry=%8d, eMax=%8x eMax=%3d%3d%3d eAvg=%3d%3d%3d\n", iterations, drySpells, errMax, errG, errR, errB, totG/nsamples, totR/nsamples, totB/nsamples ); #endif drySpells = 0; } else { drySpells += nsamples; } if (errMax == 0) { // printf("******Exact Solution in %d iterations\n", iterations); // _nn_Dump(); break; } } /* * At this point YAB has the YAB table, samples has input palette, * Replace MSB of samples with index to be used with YAB table. */ txYABtoPal256(yab2pal, YAB); for (i=0; i> 16) & 0xFF; ig = (samples[i] >> 8) & 0xFF; ib = (samples[i] ) & 0xFF; // Find closest color in the yab2pal tables map[i] = (FxU8) txNearestColor(ir, ig, ib, (const FxU32 *)yab2pal, 256); } } void txMipNccNNet(TxMip *pxMip, TxMip *txMip, int format, FxU32 dither, FxU32 comp) { int i, w, h; int ncolors; int pixsize = (pxMip->format == GR_TEXFMT_YIQ_422) ? 1 : 2; int yabTable[16+12+12]; FxU8 map[256]; /* * Get a 256 color palette, to be used as samples * Incidentally, convert src 32 bit image to dst 8 bit indexed image, * with indices referring to the 256 color palette. * Also incidentally, pack the alpha channel if necessary. */ if( txVerbose ) { printf("NCC Neural nets..."); fflush(stdout); } pxMip->format = (format == GR_TEXFMT_YIQ_422) ? GR_TEXFMT_P_8 : GR_TEXFMT_AP_88; ncolors = txMipPal256(pxMip, txMip, pxMip->format, 0, 0); if( txVerbose ) { printf("%d samples...", ncolors); fflush(stdout); } txMapPal256toYAB((FxU32 *)yabTable, (FxU8 *)map, ncolors, (FxU32 *)pxMip->pal); if( txVerbose ) { printf("eMax=(%3ld%3ld%3ld)...eAvg=(%3ld%3ld%3ld)\n", errG, errR, errB, totG/ncolors, totR/ncolors, totB/ncolors ); } if ((dither & TX_DITHER_MASK) != TX_DITHER_NONE) { /* * At this point, we can lose the 256 color palette, and replace it with * the 256 color palette generated from the YAB table. This will be * useful for error diffusion dithering. */ txYABtoPal256(pxMip->pal, yabTable); txDiffuseIndex(pxMip, txMip, pixsize, pxMip->pal, 256); } else { /* Translate image to YAB format */ w = txMip->width; h = txMip->height; for (i=0; i< txMip->depth; i++) { int npixels; FxU8 *src8; FxU16 *src16; npixels = w * h; if (pixsize == 2) { src16 = (FxU16 *) pxMip->data[i]; while (npixels--) { *src16 = (*src16 & 0xFF00) | (map[*src16 & 0x00FF]); src16++; } } else { src8 = (FxU8 *) pxMip->data[i]; while (npixels--) { *src8 = map[*src8]; src8++; } } if (w > 1) w >>= 1; if (h > 1) h >>= 1; } } /* Copy the YAB table to pal. */ pxMip->format = format; for (i=0; i< (16+12+12); i++) pxMip->pal[i] = yabTable[i]; }