From 0093952872cceba33a4c45438cf4162a1f5b6508 Mon Sep 17 00:00:00 2001 From: LaurentGom Date: Tue, 18 Aug 2009 07:31:15 +0000 Subject: [PATCH] FS#127 - Make image loading thread-safe git-svn-id: https://sfml.svn.sourceforge.net/svnroot/sfml/trunk@1205 4e206d99-4929-0410-ac5d-dfc041789085 --- src/SFML/Graphics/SOIL/SOIL.c | 23 +- src/SFML/Graphics/SOIL/SOIL.h | 1 - src/SFML/Graphics/SOIL/stb_image_aug.c | 2085 +++++++++++++---------- src/SFML/Graphics/SOIL/stb_image_aug.h | 138 +- src/SFML/Graphics/SOIL/stbi_DDS_aug.h | 4 +- src/SFML/Graphics/SOIL/stbi_DDS_aug_c.h | 130 +- 6 files changed, 1383 insertions(+), 998 deletions(-) diff --git a/src/SFML/Graphics/SOIL/SOIL.c b/src/SFML/Graphics/SOIL/SOIL.c index b46eccc9..b4b24e5b 100644 --- a/src/SFML/Graphics/SOIL/SOIL.c +++ b/src/SFML/Graphics/SOIL/SOIL.c @@ -13,18 +13,6 @@ * everybody at gamedev.net */ -#define SOIL_CHECK_FOR_GL_ERRORS 0 - -#ifdef WIN32 - #define WIN32_LEAN_AND_MEAN - #include - #include -#elif defined(__APPLE__) || defined(__APPLE_CC__) - /* I can't test this Apple stuff! */ - #define APIENTRY -#else -#endif - #include "SOIL.h" #include "stb_image_aug.h" #include "image_DXT.h" @@ -35,7 +23,6 @@ /* error reporting */ char *result_string_pointer = "SOIL initialized"; - unsigned char* SOIL_load_image ( @@ -44,7 +31,7 @@ unsigned char* int force_channels ) { - unsigned char *result = stbi_load( (char*)filename, + unsigned char *result = stbi_load( filename, width, height, channels, force_channels ); if( result == NULL ) { @@ -66,7 +53,7 @@ unsigned char* ) { unsigned char *result = stbi_load_from_memory( - (stbi_uc *)buffer, buffer_length, + buffer, buffer_length, width, height, channels, force_channels ); if( result == NULL ) @@ -100,17 +87,17 @@ int } if( image_type == SOIL_SAVE_TYPE_BMP ) { - save_result = stbi_write_bmp( (char*)filename, + save_result = stbi_write_bmp( filename, width, height, channels, (void*)data ); } else if( image_type == SOIL_SAVE_TYPE_TGA ) { - save_result = stbi_write_tga( (char*)filename, + save_result = stbi_write_tga( filename, width, height, channels, (void*)data ); } else if( image_type == SOIL_SAVE_TYPE_DDS ) { - save_result = save_image_as_DDS( (const char*)filename, + save_result = save_image_as_DDS( filename, width, height, channels, (const unsigned char *const)data ); } else { diff --git a/src/SFML/Graphics/SOIL/SOIL.h b/src/SFML/Graphics/SOIL/SOIL.h index 22f2bdc1..4e049d35 100644 --- a/src/SFML/Graphics/SOIL/SOIL.h +++ b/src/SFML/Graphics/SOIL/SOIL.h @@ -73,7 +73,6 @@ enum SOIL_SAVE_TYPE_BMP = 1, SOIL_SAVE_TYPE_DDS = 2 }; - /** Loads an image from disk into an array of unsigned chars. Note that *channels return the original channel count of the diff --git a/src/SFML/Graphics/SOIL/stb_image_aug.c b/src/SFML/Graphics/SOIL/stb_image_aug.c index c92f833b..d0f3f0e9 100644 --- a/src/SFML/Graphics/SOIL/stb_image_aug.c +++ b/src/SFML/Graphics/SOIL/stb_image_aug.c @@ -1,4 +1,4 @@ -/* stbi-1.08 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c +/* stbi-1.18 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c when you control the images you're loading QUICK NOTES: @@ -6,18 +6,30 @@ avoid problematic images and only need the trivial interface JPEG baseline (no JPEG progressive, no oddball channel decimations) - PNG non-interlaced + PNG 8-bit only BMP non-1bpp, non-RLE TGA (not sure what subset, if a subset) - PSD (composite view only, no extra channels) + PSD (composited view only, no extra channels) HDR (radiance rgbE format) writes BMP,TGA (define STBI_NO_WRITE to remove code) decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code) + supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD) TODO: stbi_info_* history: + 1.18 fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz 1.07 attempt to fix C++ warning/errors again 1.06 attempt to fix C++ warning/errors again @@ -55,6 +67,11 @@ #include "stb_image_aug.h" +#ifndef STBI_NO_HDR +#include // ldexp +#include // strcmp +#endif + #ifndef STBI_NO_STDIO #include #endif @@ -64,9 +81,14 @@ #include #ifndef _MSC_VER -#define __forceinline + #ifdef __cplusplus + #define __forceinline inline + #else + #define __forceinline + #endif #endif + // implementation: typedef unsigned char uint8; typedef unsigned short uint16; @@ -87,13 +109,14 @@ typedef unsigned char validate_uint32[sizeof(uint32)==4]; #endif // I (JLD) want full messages for SOIL -//#define STBI_FAILURE_USERMSG 1 +#define STBI_FAILURE_USERMSG 1 ////////////////////////////////////////////////////////////////////////////// // // Generic API that works on all image types // +// this is not threadsafe static char *failure_reason; char *stbi_failure_reason(void) @@ -151,7 +174,7 @@ static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp); #endif #ifndef STBI_NO_STDIO -unsigned char *stbi_load(char *filename, int *x, int *y, int *comp, int req_comp) +unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) { FILE *f = fopen(filename, "rb"); unsigned char *result; @@ -192,7 +215,7 @@ unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_c } #endif -unsigned char *stbi_load_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp) +unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { int i; if (stbi_jpeg_test_memory(buffer,len)) @@ -225,7 +248,7 @@ unsigned char *stbi_load_from_memory(stbi_uc *buffer, int len, int *x, int *y, i #ifndef STBI_NO_HDR #ifndef STBI_NO_STDIO -float *stbi_loadf(char *filename, int *x, int *y, int *comp, int req_comp) +float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) { FILE *f = fopen(filename, "rb"); float *result; @@ -249,7 +272,7 @@ float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) } #endif -float *stbi_loadf_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp) +float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { stbi_uc *data; #ifndef STBI_NO_HDR @@ -267,7 +290,7 @@ float *stbi_loadf_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *com // defined, for API simplicity; if STBI_NO_HDR is defined, it always // reports false! -extern int stbi_is_hdr_from_memory(stbi_uc *buffer, int len) +int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) { #ifndef STBI_NO_HDR return stbi_hdr_test_memory(buffer, len); @@ -277,7 +300,7 @@ extern int stbi_is_hdr_from_memory(stbi_uc *buffer, int len) } #ifndef STBI_NO_STDIO -extern int stbi_is_hdr (char *filename) +extern int stbi_is_hdr (char const *filename) { FILE *f = fopen(filename, "rb"); int result=0; @@ -301,10 +324,10 @@ extern int stbi_is_hdr_from_file(FILE *f) // @TODO: get image dimensions & components without fully decoding #ifndef STBI_NO_STDIO -extern int stbi_info (char *filename, int *x, int *y, int *comp); +extern int stbi_info (char const *filename, int *x, int *y, int *comp); extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); #endif -extern int stbi_info_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp); +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); #ifndef STBI_NO_HDR static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f; @@ -323,10 +346,6 @@ void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; } // Common code used by all image loaders // -// image width, height, # components -static uint32 img_x, img_y; -static int img_n, img_out_n; - enum { SCAN_load=0, @@ -334,99 +353,104 @@ enum SCAN_header, }; -// An API for reading either from memory or file. -#ifndef STBI_NO_STDIO -static FILE *img_file; -#endif -static uint8 *img_buffer, *img_buffer_end; +typedef struct +{ + uint32 img_x, img_y; + int img_n, img_out_n; + + #ifndef STBI_NO_STDIO + FILE *img_file; + #endif + uint8 *img_buffer, *img_buffer_end; +} stbi; #ifndef STBI_NO_STDIO -static void start_file(FILE *f) +static void start_file(stbi *s, FILE *f) { - img_file = f; + s->img_file = f; } #endif -static void start_mem(uint8 *buffer, int len) +static void start_mem(stbi *s, uint8 const *buffer, int len) { #ifndef STBI_NO_STDIO - img_file = NULL; + s->img_file = NULL; #endif - img_buffer = buffer; - img_buffer_end = buffer+len; + s->img_buffer = (uint8 *) buffer; + s->img_buffer_end = (uint8 *) buffer+len; } -static int get8(void) +__forceinline static int get8(stbi *s) { #ifndef STBI_NO_STDIO - if (img_file) { - int c = fgetc(img_file); + if (s->img_file) { + int c = fgetc(s->img_file); return c == EOF ? 0 : c; } #endif - if (img_buffer < img_buffer_end) - return *img_buffer++; + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; return 0; } -static int at_eof(void) +__forceinline static int at_eof(stbi *s) { #ifndef STBI_NO_STDIO - if (img_file) - return feof(img_file); + if (s->img_file) + return feof(s->img_file); #endif - return img_buffer >= img_buffer_end; + return s->img_buffer >= s->img_buffer_end; } -static uint8 get8u(void) +__forceinline static uint8 get8u(stbi *s) { - return (uint8) get8(); + return (uint8) get8(s); } -static void skip(int n) +static void skip(stbi *s, int n) { #ifndef STBI_NO_STDIO - if (img_file) - fseek(img_file, n, SEEK_CUR); + if (s->img_file) + fseek(s->img_file, n, SEEK_CUR); else #endif - img_buffer += n; + s->img_buffer += n; } -static int get16(void) +static int get16(stbi *s) { - int z = get8(); - return (z << 8) + get8(); + int z = get8(s); + return (z << 8) + get8(s); } -static uint32 get32(void) +static uint32 get32(stbi *s) { - uint32 z = get16(); - return (z << 16) + get16(); + uint32 z = get16(s); + return (z << 16) + get16(s); } -static int get16le(void) +static int get16le(stbi *s) { - int z = get8(); - return z + (get8() << 8); + int z = get8(s); + return z + (get8(s) << 8); } -static uint32 get32le(void) +static uint32 get32le(stbi *s) { - uint32 z = get16le(); - return z + (get16le() << 16); + uint32 z = get16le(s); + return z + (get16le(s) << 16); } -static void getn(stbi_uc *buffer, int n) +static void getn(stbi *s, stbi_uc *buffer, int n) { #ifndef STBI_NO_STDIO - if (img_file) { - fread(buffer, 1, n, img_file); + if (s->img_file) { + fread(buffer, 1, n, s->img_file); return; } #endif - memcpy(buffer, img_buffer, n); - img_buffer += n; + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; } ////////////////////////////////////////////////////////////////////////////// @@ -445,27 +469,26 @@ static uint8 compute_y(int r, int g, int b) return (uint8) (((r*77) + (g*150) + (29*b)) >> 8); } -static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp) +static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y) { - uint i,j; + int i,j; unsigned char *good; if (req_comp == img_n) return data; assert(req_comp >= 1 && req_comp <= 4); - good = (unsigned char *) malloc(req_comp * img_x * img_y); + good = (unsigned char *) malloc(req_comp * x * y); if (good == NULL) { free(data); return epuc("outofmem", "Out of memory"); } - for (j=0; j < img_y; ++j) { - unsigned char *src = data + j * img_x * img_n ; - unsigned char *dest = good + j * img_x * req_comp; + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; #define COMBO(a,b) ((a)*8+(b)) - #define CASE(a,b) case COMBO(a,b): for(i=0; i < img_x; ++i, src += a, dest += b) - + #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) // convert source image with img_n components to one with req_comp components; // avoid switch per pixel, so use switch per scanline and massive macros switch(COMBO(img_n, req_comp)) { @@ -487,7 +510,6 @@ static unsigned char *convert_format(unsigned char *data, int img_n, int req_com } free(data); - img_out_n = req_comp; return good; } @@ -563,8 +585,6 @@ static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp) // IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG) // IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro) -int stbi_jpeg_dc_only; - // huffman decoding acceleration #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache @@ -579,9 +599,44 @@ typedef struct int delta[17]; // old 'firstsymbol' - old 'firstcode' } huffman; -static huffman huff_dc[4]; // baseline is 2 tables, extended is 4 -static huffman huff_ac[4]; -static uint8 dequant[4][64]; +typedef struct +{ + #if STBI_SIMD + unsigned short dequant2[4][64]; + #endif + stbi s; + huffman huff_dc[4]; + huffman huff_ac[4]; + uint8 dequant[4][64]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + uint8 *data; + void *raw_data; + uint8 *linebuf; + } img_comp[4]; + + uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int scan_n, order[4]; + int restart_interval, todo; +} jpeg; static int build_huffman(huffman *h, int *count) { @@ -624,65 +679,42 @@ static int build_huffman(huffman *h, int *count) return 1; } -// sizes for components, interleaved MCUs -static int img_h_max, img_v_max; -static int img_mcu_x, img_mcu_y; -static int img_mcu_w, img_mcu_h; - -// definition of jpeg image component -static struct -{ - int id; - int h,v; - int tq; - int hd,ha; - int dc_pred; - - int x,y,w2,h2; - uint8 *data; -} img_comp[4]; - -static uint32 code_buffer; // jpeg entropy-coded buffer -static int code_bits; // number of valid bits -static unsigned char marker; // marker seen while filling entropy buffer -static int nomore; // flag if we saw a marker so must stop - -static void grow_buffer_unsafe(void) +static void grow_buffer_unsafe(jpeg *j) { do { - int b = nomore ? 0 : get8(); + int b = j->nomore ? 0 : get8(&j->s); if (b == 0xff) { - int c = get8(); + int c = get8(&j->s); if (c != 0) { - marker = (unsigned char) c; - nomore = 1; + j->marker = (unsigned char) c; + j->nomore = 1; return; } } - code_buffer = (code_buffer << 8) | b; - code_bits += 8; - } while (code_bits <= 24); + j->code_buffer = (j->code_buffer << 8) | b; + j->code_bits += 8; + } while (j->code_bits <= 24); } // (1 << n) - 1 static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; // decode a jpeg huffman value from the bitstream -__forceinline static int decode(huffman *h) +__forceinline static int decode(jpeg *j, huffman *h) { unsigned int temp; int c,k; - if (code_bits < 16) grow_buffer_unsafe(); + if (j->code_bits < 16) grow_buffer_unsafe(j); // look at the top FAST_BITS and determine what symbol ID it is, // if the code is <= FAST_BITS - c = (code_buffer >> (code_bits - FAST_BITS)) & ((1 << FAST_BITS)-1); + c = (j->code_buffer >> (j->code_bits - FAST_BITS)) & ((1 << FAST_BITS)-1); k = h->fast[c]; if (k < 255) { - if (h->size[k] > code_bits) + if (h->size[k] > j->code_bits) return -1; - code_bits -= h->size[k]; + j->code_bits -= h->size[k]; return h->values[k]; } @@ -692,40 +724,40 @@ __forceinline static int decode(huffman *h) // end; in other words, regardless of the number of bits, it // wants to be compared against something shifted to have 16; // that way we don't need to shift inside the loop. - if (code_bits < 16) - temp = (code_buffer << (16 - code_bits)) & 0xffff; + if (j->code_bits < 16) + temp = (j->code_buffer << (16 - j->code_bits)) & 0xffff; else - temp = (code_buffer >> (code_bits - 16)) & 0xffff; + temp = (j->code_buffer >> (j->code_bits - 16)) & 0xffff; for (k=FAST_BITS+1 ; ; ++k) if (temp < h->maxcode[k]) break; if (k == 17) { // error! code not found - code_bits -= 16; + j->code_bits -= 16; return -1; } - if (k > code_bits) + if (k > j->code_bits) return -1; // convert the huffman code to the symbol id - c = ((code_buffer >> (code_bits - k)) & bmask[k]) + h->delta[k]; - assert((((code_buffer) >> (code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]); + c = ((j->code_buffer >> (j->code_bits - k)) & bmask[k]) + h->delta[k]; + assert((((j->code_buffer) >> (j->code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]); // convert the id to a symbol - code_bits -= k; + j->code_bits -= k; return h->values[c]; } // combined JPEG 'receive' and JPEG 'extend', since baseline // always extends everything it receives. -__forceinline static int extend_receive(int n) +__forceinline static int extend_receive(jpeg *j, int n) { unsigned int m = 1 << (n-1); unsigned int k; - if (code_bits < n) grow_buffer_unsafe(); - k = (code_buffer >> (code_bits - n)) & bmask[n]; - code_bits -= n; + if (j->code_bits < n) grow_buffer_unsafe(j); + k = (j->code_buffer >> (j->code_bits - n)) & bmask[n]; + j->code_bits -= n; // the following test is probably a random branch that won't // predict well. I tried to table accelerate it but failed. // maybe it's compiling as a conditional move? @@ -753,25 +785,25 @@ static uint8 dezigzag[64+15] = }; // decode one 64-entry block-- -static int decode_block(short data[64], huffman *hdc, huffman *hac, int b) +static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b) { int diff,dc,k; - int t = decode(hdc); + int t = decode(j, hdc); if (t < 0) return e("bad huffman code","Corrupt JPEG"); // 0 all the ac values now so we can do it 32-bits at a time memset(data,0,64*sizeof(data[0])); - diff = t ? extend_receive(t) : 0; - dc = img_comp[b].dc_pred + diff; - img_comp[b].dc_pred = dc; + diff = t ? extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; data[0] = (short) dc; // decode AC components, see JPEG spec k = 1; do { int r,s; - int rs = decode(hac); + int rs = decode(j, hac); if (rs < 0) return e("bad huffman code","Corrupt JPEG"); s = rs & 15; r = rs >> 4; @@ -781,7 +813,7 @@ static int decode_block(short data[64], huffman *hdc, huffman *hac, int b) } else { k += r; // decode into unzigzag'd location - data[dezigzag[k++]] = (short) extend_receive(s); + data[dezigzag[k++]] = (short) extend_receive(j,s); } } while (k < 64); return 1; @@ -840,6 +872,7 @@ __forceinline static uint8 clamp(int x) t1 += p2+p4; \ t0 += p1+p3; +#if !STBI_SIMD // .344 seconds on 3*anemones.jpg static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize) { @@ -847,16 +880,6 @@ static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequan uint8 *o,*dq = dequantize; short *d = data; - if (stbi_jpeg_dc_only) { - // ok, I don't really know why this is right, but it seems to be: - int z = 128 + ((d[0] * dq[0]) >> 3); - for (i=0; i < 8; ++i) { - out[0] = out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = z; - out += out_stride; - } - return; - } - // columns for (i=0; i < 8; ++i,++d,++dq, ++v) { // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing @@ -902,96 +925,165 @@ static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequan o[4] = clamp((x3-t0) >> 17); } } +#else +static void idct_block(uint8 *out, int out_stride, short data[64], unsigned short *dequantize) +{ + int i,val[64],*v=val; + uint8 *o; + unsigned short *dq = dequantize; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d,++dq, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * dq[0] << 2; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], + d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536; + o[0] = clamp((x0+t3) >> 17); + o[7] = clamp((x0-t3) >> 17); + o[1] = clamp((x1+t2) >> 17); + o[6] = clamp((x1-t2) >> 17); + o[2] = clamp((x2+t1) >> 17); + o[5] = clamp((x2-t1) >> 17); + o[3] = clamp((x3+t0) >> 17); + o[4] = clamp((x3-t0) >> 17); + } +} +static stbi_idct_8x8 stbi_idct_installed = idct_block; + +extern void stbi_install_idct(stbi_idct_8x8 func) +{ + stbi_idct_installed = func; +} +#endif #define MARKER_none 0xff // if there's a pending marker from the entropy stream, return that // otherwise, fetch from the stream and get a marker. if there's no // marker, return 0xff, which is never a valid marker value -static uint8 get_marker(void) +static uint8 get_marker(jpeg *j) { uint8 x; - if (marker != MARKER_none) { x = marker; marker = MARKER_none; return x; } - x = get8u(); + if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; } + x = get8u(&j->s); if (x != 0xff) return MARKER_none; while (x == 0xff) - x = get8u(); + x = get8u(&j->s); return x; } // in each scan, we'll have scan_n components, and the order // of the components is specified by order[] -static int scan_n, order[4]; -static int restart_interval, todo; #define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) // after a restart interval, reset the entropy decoder and // the dc prediction -static void reset(void) +static void reset(jpeg *j) { - code_bits = 0; - code_buffer = 0; - nomore = 0; - img_comp[0].dc_pred = img_comp[1].dc_pred = img_comp[2].dc_pred = 0; - marker = MARKER_none; - todo = restart_interval ? restart_interval : 0x7fffffff; + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0; + j->marker = MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, // since we don't even allow 1<<30 pixels } -static int parse_entropy_coded_data(void) +static int parse_entropy_coded_data(jpeg *z) { - reset(); - if (scan_n == 1) { + reset(z); + if (z->scan_n == 1) { int i,j; + #if STBI_SIMD + __declspec(align(16)) + #endif short data[64]; - int n = order[0]; + int n = z->order[0]; // non-interleaved data, we just need to process one block at a time, // in trivial scanline order // number of blocks to do just depends on how many actual "pixels" this // component has, independent of interleaved MCU blocking and such - int w = (img_comp[n].x+7) >> 3; - int h = (img_comp[n].y+7) >> 3; + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; for (j=0; j < h; ++j) { for (i=0; i < w; ++i) { - if (!decode_block(data, huff_dc+img_comp[n].hd, huff_ac+img_comp[n].ha, n)) return 0; - idct_block(img_comp[n].data+img_comp[n].w2*j*8+i*8, img_comp[n].w2, data, dequant[img_comp[n].tq]); + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #if STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif // every data block is an MCU, so countdown the restart interval - if (--todo <= 0) { - if (code_bits < 24) grow_buffer_unsafe(); + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); // if it's NOT a restart, then just bail, so we get corrupt data // rather than no data - if (!RESTART(marker)) return 1; - reset(); + if (!RESTART(z->marker)) return 1; + reset(z); } } } } else { // interleaved! int i,j,k,x,y; short data[64]; - for (j=0; j < img_mcu_y; ++j) { - for (i=0; i < img_mcu_x; ++i) { + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { // scan an interleaved mcu... process scan_n components in order - for (k=0; k < scan_n; ++k) { - int n = order[k]; + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; // scan out an mcu's worth of this component; that's just determined // by the basic H and V specified for the component - for (y=0; y < img_comp[n].v; ++y) { - for (x=0; x < img_comp[n].h; ++x) { - int x2 = (i*img_comp[n].h + x)*8; - int y2 = (j*img_comp[n].v + y)*8; - if (!decode_block(data, huff_dc+img_comp[n].hd, huff_ac+img_comp[n].ha, n)) return 0; - idct_block(img_comp[n].data+img_comp[n].w2*y2+x2, img_comp[n].w2, data, dequant[img_comp[n].tq]); + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #if STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif } } } // after all interleaved components, that's an interleaved MCU, // so now count down the restart interval - if (--todo <= 0) { - if (code_bits < 24) grow_buffer_unsafe(); + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); // if it's NOT a restart, then just bail, so we get corrupt data // rather than no data - if (!RESTART(marker)) return 1; - reset(); + if (!RESTART(z->marker)) return 1; + reset(z); } } } @@ -999,7 +1091,7 @@ static int parse_entropy_coded_data(void) return 1; } -static int process_marker(int m) +static int process_marker(jpeg *z, int m) { int L; switch (m) { @@ -1010,141 +1102,156 @@ static int process_marker(int m) return e("progressive jpeg","JPEG format not supported (progressive)"); case 0xDD: // DRI - specify restart interval - if (get16() != 4) return e("bad DRI len","Corrupt JPEG"); - restart_interval = get16(); + if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG"); + z->restart_interval = get16(&z->s); return 1; case 0xDB: // DQT - define quantization table - L = get16()-2; + L = get16(&z->s)-2; while (L > 0) { - int z = get8(); - int p = z >> 4; - int t = z & 15,i; + int q = get8(&z->s); + int p = q >> 4; + int t = q & 15,i; if (p != 0) return e("bad DQT type","Corrupt JPEG"); if (t > 3) return e("bad DQT table","Corrupt JPEG"); for (i=0; i < 64; ++i) - dequant[t][dezigzag[i]] = get8u(); + z->dequant[t][dezigzag[i]] = get8u(&z->s); + #if STBI_SIMD + for (i=0; i < 64; ++i) + z->dequant2[t][i] = z->dequant[t][i]; + #endif L -= 65; } return L==0; case 0xC4: // DHT - define huffman table - L = get16()-2; + L = get16(&z->s)-2; while (L > 0) { uint8 *v; int sizes[16],i,m=0; - int z = get8(); - int tc = z >> 4; - int th = z & 15; + int q = get8(&z->s); + int tc = q >> 4; + int th = q & 15; if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG"); for (i=0; i < 16; ++i) { - sizes[i] = get8(); + sizes[i] = get8(&z->s); m += sizes[i]; } L -= 17; if (tc == 0) { - if (!build_huffman(huff_dc+th, sizes)) return 0; - v = huff_dc[th].values; + if (!build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; } else { - if (!build_huffman(huff_ac+th, sizes)) return 0; - v = huff_ac[th].values; + if (!build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; } for (i=0; i < m; ++i) - v[i] = get8u(); + v[i] = get8u(&z->s); L -= m; } return L==0; } // check for comment block or APP blocks if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { - skip(get16()-2); + skip(&z->s, get16(&z->s)-2); return 1; } return 0; } // after we see SOS -static int process_scan_header(void) +static int process_scan_header(jpeg *z) { int i; - int Ls = get16(); - scan_n = get8(); - if (scan_n < 1 || scan_n > 4 || scan_n > (int) img_n) return e("bad SOS component count","Corrupt JPEG"); - if (Ls != 6+2*scan_n) return e("bad SOS len","Corrupt JPEG"); - for (i=0; i < scan_n; ++i) { - int id = get8(), which; - int z = get8(); - for (which = 0; which < img_n; ++which) - if (img_comp[which].id == id) + int Ls = get16(&z->s); + z->scan_n = get8(&z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = get8(&z->s), which; + int q = get8(&z->s); + for (which = 0; which < z->s.img_n; ++which) + if (z->img_comp[which].id == id) break; - if (which == img_n) return 0; - img_comp[which].hd = z >> 4; if (img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG"); - img_comp[which].ha = z & 15; if (img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG"); - order[i] = which; + if (which == z->s.img_n) return 0; + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG"); + z->order[i] = which; } - if (get8() != 0) return e("bad SOS","Corrupt JPEG"); - get8(); // should be 63, but might be 0 - if (get8() != 0) return e("bad SOS","Corrupt JPEG"); + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + get8(&z->s); // should be 63, but might be 0 + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); return 1; } -static int process_frame_header(int scan) +static int process_frame_header(jpeg *z, int scan) { - int Lf,p,i,z, h_max=1,v_max=1; - Lf = get16(); if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG - p = get8(); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline - img_y = get16(); if (img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG - img_x = get16(); if (img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires - img_n = get8(); - if (img_n != 3 && img_n != 1) return e("bad component count","Corrupt JPEG"); // JFIF requires + stbi *s = &z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = get16(s); if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG + p = get8(s); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = get16(s); if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = get16(s); if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires + c = get8(s); + if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG"); // JFIF requires + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } - if (Lf != 8+3*img_n) return e("bad SOF len","Corrupt JPEG"); + if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG"); - for (i=0; i < img_n; ++i) { - img_comp[i].id = get8(); - if (img_comp[i].id != i+1) // JFIF requires - if (img_comp[i].id != i) // jpegtran outputs non-JFIF-compliant files! + for (i=0; i < s->img_n; ++i) { + z->img_comp[i].id = get8(s); + if (z->img_comp[i].id != i+1) // JFIF requires + if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files! return e("bad component ID","Corrupt JPEG"); - z = get8(); - img_comp[i].h = (z >> 4); if (!img_comp[i].h || img_comp[i].h > 4) return e("bad H","Corrupt JPEG"); - img_comp[i].v = z & 15; if (!img_comp[i].v || img_comp[i].v > 4) return e("bad V","Corrupt JPEG"); - img_comp[i].tq = get8(); if (img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG"); + q = get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG"); + z->img_comp[i].tq = get8(s); if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG"); } if (scan != SCAN_load) return 1; - if ((1 << 30) / img_x / img_n < img_y) return e("too large", "Image too large to decode"); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); - for (i=0; i < img_n; ++i) { - if (img_comp[i].h > h_max) h_max = img_comp[i].h; - if (img_comp[i].v > v_max) v_max = img_comp[i].v; + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; } // compute interleaved mcu info - img_h_max = h_max; - img_v_max = v_max; - img_mcu_w = h_max * 8; - img_mcu_h = v_max * 8; - img_mcu_x = (img_x + img_mcu_w-1) / img_mcu_w; - img_mcu_y = (img_y + img_mcu_h-1) / img_mcu_h; + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; - for (i=0; i < img_n; ++i) { + for (i=0; i < s->img_n; ++i) { // number of effective pixels (e.g. for non-interleaved MCU) - img_comp[i].x = (img_x * img_comp[i].h + h_max-1) / h_max; - img_comp[i].y = (img_y * img_comp[i].v + v_max-1) / v_max; + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; // to simplify generation, we'll allocate enough memory to decode // the bogus oversized data from using interleaved MCUs and their // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't // discard the extra data until colorspace conversion - img_comp[i].w2 = img_mcu_x * img_comp[i].h * 8; - img_comp[i].h2 = img_mcu_y * img_comp[i].v * 8; - img_comp[i].data = (uint8 *) malloc(img_comp[i].w2 * img_comp[i].h2); - if (img_comp[i].data == NULL) { - for(--i; i >= 0; --i) - free(img_comp[i].data); + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].raw_data = malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15); + if (z->img_comp[i].raw_data == NULL) { + for(--i; i >= 0; --i) { + free(z->img_comp[i].raw_data); + z->img_comp[i].data = NULL; + } return e("outofmem", "Out of memory"); } + // align blocks for installable-idct using mmx/sse + z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + z->img_comp[i].linebuf = NULL; } return 1; @@ -1157,143 +1264,127 @@ static int process_frame_header(int scan) #define SOF(x) ((x) == 0xc0 || (x) == 0xc1) #define SOS(x) ((x) == 0xda) -static int decode_jpeg_header(int scan) +static int decode_jpeg_header(jpeg *z, int scan) { int m; - marker = MARKER_none; // initialize cached marker to empty - m = get_marker(); + z->marker = MARKER_none; // initialize cached marker to empty + m = get_marker(z); if (!SOI(m)) return e("no SOI","Corrupt JPEG"); if (scan == SCAN_type) return 1; - m = get_marker(); + m = get_marker(z); while (!SOF(m)) { - if (!process_marker(m)) return 0; - m = get_marker(); + if (!process_marker(z,m)) return 0; + m = get_marker(z); while (m == MARKER_none) { // some files have extra padding after their blocks, so ok, we'll scan - if (at_eof()) return e("no SOF", "Corrupt JPEG"); - m = get_marker(); + if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG"); + m = get_marker(z); } } - if (!process_frame_header(scan)) return 0; + if (!process_frame_header(z, scan)) return 0; return 1; } -static int decode_jpeg_image(void) +static int decode_jpeg_image(jpeg *j) { int m; - restart_interval = 0; - if (!decode_jpeg_header(SCAN_load)) return 0; - m = get_marker(); + j->restart_interval = 0; + if (!decode_jpeg_header(j, SCAN_load)) return 0; + m = get_marker(j); while (!EOI(m)) { if (SOS(m)) { - if (!process_scan_header()) return 0; - if (!parse_entropy_coded_data()) return 0; + if (!process_scan_header(j)) return 0; + if (!parse_entropy_coded_data(j)) return 0; } else { - if (!process_marker(m)) return 0; + if (!process_marker(j, m)) return 0; } - m = get_marker(); + m = get_marker(j); } return 1; } -// static jfif-centered resampling with cross-block smoothing -// here by cross-block smoothing what I mean is that the resampling -// is bilerp and crosses blocks; I dunno what IJG means +// static jfif-centered resampling (across block boundaries) + +typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1, + int w, int hs); #define div4(x) ((uint8) ((x) >> 2)) -static void resample_v_2(uint8 *out1, uint8 *input, int w, int h, int s) +static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + return in_near; +} + +static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) { // need to generate two samples vertically for every one in input - uint8 *above; - uint8 *below; - uint8 *source; - uint8 *out2; - int i,j; - source = input; - out2 = out1+w; - for (j=0; j < h; ++j) { - above = source; - source = input + j*s; - below = source + s; if (j == h-1) below = source; - for (i=0; i < w; ++i) { - int n = source[i]*3; - out1[i] = div4(above[i] + n); - out2[i] = div4(below[i] + n); - } - out1 += w*2; - out2 += w*2; - } + int i; + for (i=0; i < w; ++i) + out[i] = div4(3*in_near[i] + in_far[i] + 2); + return out; } -static void resample_h_2(uint8 *out, uint8 *input, int w, int h, int s) +static uint8* resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) { // need to generate two samples horizontally for every one in input - int i,j; + int i; + uint8 *input = in_near; if (w == 1) { - for (j=0; j < h; ++j) - out[j*2+0] = out[j*2+1] = input[j*s]; - return; + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; } - for (j=0; j < h; ++j) { - out[0] = input[0]; - out[1] = div4(input[0]*3 + input[1]); - for (i=1; i < w-1; ++i) { - int n = input[i]*3; - out[i*2-2] = div4(input[i-1] + n); - out[i*2-1] = div4(input[i+1] + n); - } - out[w*2-2] = div4(input[w-2]*3 + input[w-1]); - out[w*2-1] = input[w-1]; - out += w*2; - input += s; + + out[0] = input[0]; + out[1] = div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = div4(n+input[i-1]); + out[i*2+1] = div4(n+input[i+1]); } + out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + return out; } -// .172 seconds on 3*anemones.jpg -static void resample_hv_2(uint8 *out, uint8 *input, int w, int h, int s) +#define div16(x) ((uint8) ((x) >> 4)) + +static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) { // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = div16(3*t0 + t1 + 8); + out[i*2 ] = div16(3*t1 + t0 + 8); + } + out[w*2-1] = div4(t1+2); + return out; +} + +static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // resample with nearest-neighbor int i,j; - int os = w*2; - // generate edge samples... @TODO lerp them! - for (i=0; i < w; ++i) { - out[i*2+0] = out[i*2+1] = input[i]; - out[i*2+(2*h-1)*os+0] = out[i*2+(2*h-1)*os+1] = input[i+(h-1)*w]; - } - for (j=0; j < h; ++j) { - out[j*os*2+0] = out[j*os*2+os+0] = input[j*w]; - out[j*os*2+os-1] = out[j*os*2+os+os-1] = input[j*w+i-1]; - } - // now generate interior samples; i & j point to top left of input - for (j=0; j < h-1; ++j) { - uint8 *in1 = input+j*s; - uint8 *in2 = in1 + s; - uint8 *out1 = out + (j*2+1)*os + 1; - uint8 *out2 = out1 + os; - for (i=0; i < w-1; ++i) { - int p00 = in1[0], p01=in1[1], p10=in2[0], p11=in2[1]; - int p00_3 = p00*3, p01_3 = p01*3, p10_3 = p10*3, p11_3 = p11*3; - - #define div16(x) ((uint8) ((x) >> 4)) - - out1[0] = div16(p00*9 + p01_3 + p10_3 + p11); - out1[1] = div16(p01*9 + p00_3 + p01_3 + p10); - out2[0] = div16(p10*9 + p11_3 + p00_3 + p01); - out2[1] = div16(p11*9 + p10_3 + p01_3 + p00); - out1 += 2; - out2 += 2; - ++in1; - ++in2; - } - } + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; } #define float2fixed(x) ((int) ((x) * 65536 + 0.5)) // 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro) // VC6 without processor=Pro is generating multiple LEAs per multiply! -static void YCbCr_to_RGB_row(uint8 *out, uint8 *y, uint8 *pcb, uint8 *pcr, int count, int step) +static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step) { int i; for (i=0; i < count; ++i) { @@ -1313,109 +1404,143 @@ static void YCbCr_to_RGB_row(uint8 *out, uint8 *y, uint8 *pcb, uint8 *pcr, int c out[0] = (uint8)r; out[1] = (uint8)g; out[2] = (uint8)b; - if (step == 4) out[3] = 255; + out[3] = 255; out += step; } } +#if STBI_SIMD +static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row; + +void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func) +{ + stbi_YCbCr_installed = func; +} +#endif + + // clean up the temporary component buffers -static void cleanup_jpeg(void) +static void cleanup_jpeg(jpeg *j) { int i; - for (i=0; i < img_n; ++i) { - if (img_comp[i].data) { - free(img_comp[i].data); - img_comp[i].data = NULL; + for (i=0; i < j->s.img_n; ++i) { + if (j->img_comp[i].data) { + free(j->img_comp[i].raw_data); + j->img_comp[i].data = NULL; + } + if (j->img_comp[i].linebuf) { + free(j->img_comp[i].linebuf); + j->img_comp[i].linebuf = NULL; } } } -static uint8 *load_jpeg_image(int *out_x, int *out_y, int *comp, int req_comp) +typedef struct { - int i, n; + resample_row_func resample; + uint8 *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi_resample; + +static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n; // validate req_comp if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + z->s.img_n = 0; // load a jpeg image from whichever source - if (!decode_jpeg_image()) { cleanup_jpeg(); return NULL; } + if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; } // determine actual number of components to generate - n = req_comp ? req_comp : img_n; + n = req_comp ? req_comp : z->s.img_n; - // resample components to full size... memory wasteful, but this - // lets us bilerp across blocks while upsampling - for (i=0; i < img_n; ++i) { - // if we're outputting fewer than 3 components, we're grey not RGB; - // in that case, don't bother upsampling Cb or Cr - if (n < 3 && i) continue; + if (z->s.img_n == 3 && n < 3) + decode_n = 1; + else + decode_n = z->s.img_n; - // check if the component scale is less than max; if so it needs upsampling - if (img_comp[i].h != img_h_max || img_comp[i].v != img_v_max) { - int stride = img_x; - // allocate final size; make sure it's big enough for upsampling off - // the edges with upsample up to 4x4 (although we only support 2x2 - // currently) - uint8 *new_data = (uint8 *) malloc((img_x+3)*(img_y+3)); - if (new_data == NULL) { - cleanup_jpeg(); - return epuc("outofmem", "Out of memory (image too large?)"); - } - if (img_comp[i].h*2 == img_h_max && img_comp[i].v*2 == img_v_max) { - int tx = (img_x+1)>>1; - resample_hv_2(new_data, img_comp[i].data, tx,(img_y+1)>>1, img_comp[i].w2); - stride = tx*2; - } else if (img_comp[i].h == img_h_max && img_comp[i].v*2 == img_v_max) { - resample_v_2(new_data, img_comp[i].data, img_x,(img_y+1)>>1, img_comp[i].w2); - } else if (img_comp[i].h*2 == img_h_max && img_comp[i].v == img_v_max) { - int tx = (img_x+1)>>1; - resample_h_2(new_data, img_comp[i].data, tx,img_y, img_comp[i].w2); - stride = tx*2; - } else { - // @TODO resample uncommon sampling pattern with nearest neighbor - free(new_data); - cleanup_jpeg(); - return epuc("uncommon H or V", "JPEG not supported: atypical downsampling mode"); - } - img_comp[i].w2 = stride; - free(img_comp[i].data); - img_comp[i].data = new_data; - } - } - - // now convert components to output image + // resample and color-convert { - uint32 i,j; - uint8 *output = (uint8 *) malloc(n * img_x * img_y + 1); - if (n >= 3) { // output STBI_rgb_* - for (j=0; j < img_y; ++j) { - uint8 *y = img_comp[0].data + j*img_comp[0].w2; - uint8 *out = output + n * img_x * j; - if (img_n == 3) { - uint8 *cb = img_comp[1].data + j*img_comp[1].w2; - uint8 *cr = img_comp[2].data + j*img_comp[2].w2; - YCbCr_to_RGB_row(out, y, cb, cr, img_x, n); - } else { - for (i=0; i < img_x; ++i) { - out[0] = out[1] = out[2] = y[i]; - out[3] = 255; // not used if n == 3 - out += n; - } + int k; + uint i,j; + uint8 *output; + uint8 *coutput[4]; + + stbi_resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (uint8 *) malloc(z->s.img_x + 3); + if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s.img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2; + else r->resample = resample_row_generic; + } + + // can't error after this so, this is safe + output = (uint8 *) malloc(n * z->s.img_x * z->s.img_y + 1); + if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s.img_y; ++j) { + uint8 *out = output + n * z->s.img_x * j; + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; } } - } else { // output STBI_grey_* - for (j=0; j < img_y; ++j) { - uint8 *y = img_comp[0].data + j*img_comp[0].w2; - uint8 *out = output + n * img_x * j; + if (n >= 3) { + uint8 *y = coutput[0]; + if (z->s.img_n == 3) { + #if STBI_SIMD + stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n); + #else + YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n); + #endif + } else + for (i=0; i < z->s.img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + uint8 *y = coutput[0]; if (n == 1) - for (i=0; i < img_x; ++i) *out++ = *y++; + for (i=0; i < z->s.img_x; ++i) out[i] = y[i]; else - for (i=0; i < img_x; ++i) *out++ = *y++, *out++ = 255; + for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255; } } - cleanup_jpeg(); - *out_x = img_x; - *out_y = img_y; - if (comp) *comp = img_n; // report original components, not output + cleanup_jpeg(z); + *out_x = z->s.img_x; + *out_y = z->s.img_y; + if (comp) *comp = z->s.img_n; // report original components, not output return output; } } @@ -1423,11 +1548,12 @@ static uint8 *load_jpeg_image(int *out_x, int *out_y, int *comp, int req_comp) #ifndef STBI_NO_STDIO unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) { - start_file(f); - return load_jpeg_image(x,y,comp,req_comp); + jpeg j; + start_file(&j.s, f); + return load_jpeg_image(&j, x,y,comp,req_comp); } -unsigned char *stbi_jpeg_load(char *filename, int *x, int *y, int *comp, int req_comp) +unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp) { unsigned char *data; FILE *f = fopen(filename, "rb"); @@ -1438,36 +1564,39 @@ unsigned char *stbi_jpeg_load(char *filename, int *x, int *y, int *comp, int req } #endif -unsigned char *stbi_jpeg_load_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp) +unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { - start_mem(buffer,len); - return load_jpeg_image(x,y,comp,req_comp); + jpeg j; + start_mem(&j.s, buffer,len); + return load_jpeg_image(&j, x,y,comp,req_comp); } #ifndef STBI_NO_STDIO int stbi_jpeg_test_file(FILE *f) { int n,r; + jpeg j; n = ftell(f); - start_file(f); - r = decode_jpeg_header(SCAN_type); + start_file(&j.s, f); + r = decode_jpeg_header(&j, SCAN_type); fseek(f,n,SEEK_SET); return r; } #endif -int stbi_jpeg_test_memory(unsigned char *buffer, int len) +int stbi_jpeg_test_memory(stbi_uc const *buffer, int len) { - start_mem(buffer,len); - return decode_jpeg_header(SCAN_type); + jpeg j; + start_mem(&j.s, buffer,len); + return decode_jpeg_header(&j, SCAN_type); } // @TODO: #ifndef STBI_NO_STDIO -extern int stbi_jpeg_info (char *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); #endif -extern int stbi_jpeg_info_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); // public domain zlib decode v0.2 Sean Barrett 2006-11-18 // simple implementation @@ -1560,51 +1689,60 @@ static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num) // we require PNG read all the IDATs and combine them into a single // memory buffer -static uint8 *zbuffer, *zbuffer_end; - -__forceinline static int zget8(void) +typedef struct { - if (zbuffer >= zbuffer_end) return 0; - return *zbuffer++; + uint8 *zbuffer, *zbuffer_end; + int num_bits; + uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + zhuffman z_length, z_distance; +} zbuf; + +__forceinline static int zget8(zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; } -//static unsigned long code_buffer; -static int num_bits; - -static void fill_bits(void) +static void fill_bits(zbuf *z) { do { - assert(code_buffer < (1U << num_bits)); - code_buffer |= zget8() << num_bits; - num_bits += 8; - } while (num_bits <= 24); + assert(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); } -__forceinline static unsigned int zreceive(int n) +__forceinline static unsigned int zreceive(zbuf *z, int n) { unsigned int k; - if (num_bits < n) fill_bits(); - k = code_buffer & ((1 << n) - 1); - code_buffer >>= n; - num_bits -= n; + if (z->num_bits < n) fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; return k; } -__forceinline static int zhuffman_decode(zhuffman *z) +__forceinline static int zhuffman_decode(zbuf *a, zhuffman *z) { int b,s,k; - if (num_bits < 16) fill_bits(); - b = z->fast[code_buffer & ZFAST_MASK]; + if (a->num_bits < 16) fill_bits(a); + b = z->fast[a->code_buffer & ZFAST_MASK]; if (b < 0xffff) { s = z->size[b]; - code_buffer >>= s; - num_bits -= s; + a->code_buffer >>= s; + a->num_bits -= s; return z->value[b]; } // not resolved by fast table, so compute it the slow way // use jpeg approach, which requires MSbits at top - k = bit_reverse(code_buffer, 16); + k = bit_reverse(a->code_buffer, 16); for (s=ZFAST_BITS+1; ; ++s) if (k < z->maxcode[s]) break; @@ -1612,35 +1750,28 @@ __forceinline static int zhuffman_decode(zhuffman *z) // code size is s, so: b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; assert(z->size[b] == s); - code_buffer >>= s; - num_bits -= s; + a->code_buffer >>= s; + a->num_bits -= s; return z->value[b]; } -static char *zout; -static char *zout_start; -static char *zout_end; -static int z_expandable; - -static int expand(int n) // need to make room for n bytes +static int expand(zbuf *z, int n) // need to make room for n bytes { char *q; int cur, limit; - if (!z_expandable) return e("output buffer limit","Corrupt PNG"); - cur = (int) (zout - zout_start); - limit = (int) (zout_end - zout_start); + if (!z->z_expandable) return e("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = (int) (z->zout_end - z->zout_start); while (cur + n > limit) limit *= 2; - q = (char *) realloc(zout_start, limit); + q = (char *) realloc(z->zout_start, limit); if (q == NULL) return e("outofmem", "Out of memory"); - zout_start = q; - zout = q + cur; - zout_end = q + limit; + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; return 1; } -static zhuffman z_length, z_distance; - static int length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, @@ -1655,115 +1786,115 @@ static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, static int dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; -static int parse_huffman_block(void) +static int parse_huffman_block(zbuf *a) { for(;;) { - int z = zhuffman_decode(&z_length); + int z = zhuffman_decode(a, &a->z_length); if (z < 256) { if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes - if (zout >= zout_end) if (!expand(1)) return 0; - *zout++ = (char) z; + if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0; + *a->zout++ = (char) z; } else { uint8 *p; int len,dist; if (z == 256) return 1; z -= 257; len = length_base[z]; - if (length_extra[z]) len += zreceive(length_extra[z]); - z = zhuffman_decode(&z_distance); + if (length_extra[z]) len += zreceive(a, length_extra[z]); + z = zhuffman_decode(a, &a->z_distance); if (z < 0) return e("bad huffman code","Corrupt PNG"); dist = dist_base[z]; - if (dist_extra[z]) dist += zreceive(dist_extra[z]); - if (zout - zout_start < dist) return e("bad dist","Corrupt PNG"); - if (zout + len > zout_end) if (!expand(len)) return 0; - p = (uint8 *) (zout - dist); + if (dist_extra[z]) dist += zreceive(a, dist_extra[z]); + if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG"); + if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0; + p = (uint8 *) (a->zout - dist); while (len--) - *zout++ = *p++; + *a->zout++ = *p++; } } } -static int compute_huffman_codes(void) +static int compute_huffman_codes(zbuf *a) { static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; - static zhuffman z_codelength; // static just to save stack space + zhuffman z_codelength; uint8 lencodes[286+32+137];//padding for maximum single op uint8 codelength_sizes[19]; int i,n; - int hlit = zreceive(5) + 257; - int hdist = zreceive(5) + 1; - int hclen = zreceive(4) + 4; + int hlit = zreceive(a,5) + 257; + int hdist = zreceive(a,5) + 1; + int hclen = zreceive(a,4) + 4; memset(codelength_sizes, 0, sizeof(codelength_sizes)); for (i=0; i < hclen; ++i) { - int s = zreceive(3); + int s = zreceive(a,3); codelength_sizes[length_dezigzag[i]] = (uint8) s; } if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; n = 0; while (n < hlit + hdist) { - int c = zhuffman_decode(&z_codelength); + int c = zhuffman_decode(a, &z_codelength); assert(c >= 0 && c < 19); if (c < 16) lencodes[n++] = (uint8) c; else if (c == 16) { - c = zreceive(2)+3; + c = zreceive(a,2)+3; memset(lencodes+n, lencodes[n-1], c); n += c; } else if (c == 17) { - c = zreceive(3)+3; + c = zreceive(a,3)+3; memset(lencodes+n, 0, c); n += c; } else { assert(c == 18); - c = zreceive(7)+11; + c = zreceive(a,7)+11; memset(lencodes+n, 0, c); n += c; } } if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG"); - if (!zbuild_huffman(&z_length, lencodes, hlit)) return 0; - if (!zbuild_huffman(&z_distance, lencodes+hlit, hdist)) return 0; + if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; return 1; } -static int parse_uncompressed_block(void) +static int parse_uncompressed_block(zbuf *a) { uint8 header[4]; int len,nlen,k; - if (num_bits & 7) - zreceive(num_bits & 7); // discard + if (a->num_bits & 7) + zreceive(a, a->num_bits & 7); // discard // drain the bit-packed data into header k = 0; - while (num_bits > 0) { - header[k++] = (uint8) (code_buffer & 255); // wtf this warns? - code_buffer >>= 8; - num_bits -= 8; + while (a->num_bits > 0) { + header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns? + a->code_buffer >>= 8; + a->num_bits -= 8; } - assert(num_bits == 0); + assert(a->num_bits == 0); // now fill header the normal way while (k < 4) - header[k++] = (uint8) zget8(); + header[k++] = (uint8) zget8(a); len = header[1] * 256 + header[0]; nlen = header[3] * 256 + header[2]; if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG"); - if (zbuffer + len > zbuffer_end) return e("read past buffer","Corrupt PNG"); - if (zout + len > zout_end) - if (!expand(len)) return 0; - memcpy(zout, zbuffer, len); - zbuffer += len; - zout += len; + if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!expand(a, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; return 1; } -static int parse_zlib_header(void) +static int parse_zlib_header(zbuf *a) { - int cmf = zget8(); + int cmf = zget8(a); int cm = cmf & 15; /* int cinfo = cmf >> 4; */ - int flg = zget8(); + int flg = zget8(a); if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png @@ -1771,6 +1902,7 @@ static int parse_zlib_header(void) return 1; } +// @TODO: should statically initialize these for optimal thread safety static uint8 default_length[288], default_distance[32]; static void init_defaults(void) { @@ -1783,96 +1915,103 @@ static void init_defaults(void) for (i=0; i <= 31; ++i) default_distance[i] = 5; } -static int parse_zlib(int parse_header) +int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead +static int parse_zlib(zbuf *a, int parse_header) { int final, type; if (parse_header) - if (!parse_zlib_header()) return 0; - num_bits = 0; - code_buffer = 0; + if (!parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; do { - final = zreceive(1); - type = zreceive(2); + final = zreceive(a,1); + type = zreceive(a,2); if (type == 0) { - if (!parse_uncompressed_block()) return 0; + if (!parse_uncompressed_block(a)) return 0; } else if (type == 3) { return 0; } else { if (type == 1) { // use fixed code lengths - if (!default_length[0]) init_defaults(); - if (!zbuild_huffman(&z_length , default_length , 288)) return 0; - if (!zbuild_huffman(&z_distance, default_distance, 32)) return 0; + if (!default_distance[31]) init_defaults(); + if (!zbuild_huffman(&a->z_length , default_length , 288)) return 0; + if (!zbuild_huffman(&a->z_distance, default_distance, 32)) return 0; } else { - if (!compute_huffman_codes()) return 0; + if (!compute_huffman_codes(a)) return 0; } - if (!parse_huffman_block()) return 0; + if (!parse_huffman_block(a)) return 0; } + if (stbi_png_partial && a->zout - a->zout_start > 65536) + break; } while (!final); return 1; } -static int do_zlib(char *obuf, int olen, int exp, int parse_header) +static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header) { - zout_start = obuf; - zout = obuf; - zout_end = obuf + olen; - z_expandable = exp; + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; - return parse_zlib(parse_header); + return parse_zlib(a, parse_header); } -char *stbi_zlib_decode_malloc_guesssize(int initial_size, int *outlen) +char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) { + zbuf a; char *p = (char *) malloc(initial_size); if (p == NULL) return NULL; - if (do_zlib(p, initial_size, 1, 1)) { - *outlen = (int) (zout - zout_start); - return zout_start; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer + len; + if (do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; } else { - free(zout_start); + free(a.zout_start); return NULL; } } -char *stbi_zlib_decode_malloc(char *buffer, int len, int *outlen) +char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) { - zbuffer = (uint8 *) buffer; - zbuffer_end = (uint8 *) buffer+len; - return stbi_zlib_decode_malloc_guesssize(16384, outlen); + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); } -int stbi_zlib_decode_buffer(char *obuffer, int olen, char *ibuffer, int ilen) +int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) { - zbuffer = (uint8 *) ibuffer; - zbuffer_end = (uint8 *) ibuffer + ilen; - if (do_zlib(obuffer, olen, 0, 1)) - return (int) (zout - zout_start); + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); else return -1; } -char *stbi_zlib_decode_noheader_malloc(char *buffer, int len, int *outlen) +char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) { + zbuf a; char *p = (char *) malloc(16384); if (p == NULL) return NULL; - zbuffer = (uint8 *) buffer; - zbuffer_end = (uint8 *) buffer+len; - if (do_zlib(p, 16384, 1, 0)) { - *outlen = (int) (zout - zout_start); - return zout_start; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer+len; + if (do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; } else { - free(zout_start); + free(a.zout_start); return NULL; } } -int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, char *ibuffer, int ilen) +int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) { - zbuffer = (uint8 *) ibuffer; - zbuffer_end = (uint8 *) ibuffer + ilen; - if (do_zlib(obuffer, olen, 0, 0)) - return (int) (zout - zout_start); + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); else return -1; } @@ -1896,24 +2035,29 @@ typedef struct #define PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) -static chunk get_chunk_header(void) +static chunk get_chunk_header(stbi *s) { chunk c; - c.length = get32(); - c.type = get32(); + c.length = get32(s); + c.type = get32(s); return c; } -static int check_png_header(void) +static int check_png_header(stbi *s) { static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 }; int i; for (i=0; i < 8; ++i) - if (get8() != png_sig[i]) return e("bad png sig","Not a PNG"); + if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG"); return 1; } -static uint8 *idata, *expanded, *out; +typedef struct +{ + stbi s; + uint8 *idata, *expanded, *out; +} png; + enum { F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4, @@ -1937,16 +2081,24 @@ static int paeth(int a, int b, int c) } // create the png data from post-deflated data -static int create_png_image(uint8 *raw, uint32 raw_len, int out_n) +static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y) { - uint32 i,j,stride = img_x*out_n; + stbi *s = &a->s; + uint32 i,j,stride = x*out_n; int k; - assert(out_n == img_n || out_n == img_n+1); - out = (uint8 *) malloc(img_x * img_y * out_n); - if (!out) return e("outofmem", "Out of memory"); - if (raw_len != (img_n * img_x + 1) * img_y) return e("not enough pixels","Corrupt PNG"); - for (j=0; j < img_y; ++j) { - uint8 *cur = out + stride*j; + int img_n = s->img_n; // copy it into a local for later + assert(out_n == s->img_n || out_n == s->img_n+1); + if (stbi_png_partial) y = 1; + a->out = (uint8 *) malloc(x * y * out_n); + if (!a->out) return e("outofmem", "Out of memory"); + if (!stbi_png_partial) { + if (s->img_x == x && s->img_y == y) + if (raw_len != (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + else // interlaced: + if (raw_len < (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + } + for (j=0; j < y; ++j) { + uint8 *cur = a->out + stride*j; uint8 *prior = cur - stride; int filter = *raw++; if (filter > 4) return e("invalid filter","Corrupt PNG"); @@ -1972,7 +2124,7 @@ static int create_png_image(uint8 *raw, uint32 raw_len, int out_n) if (img_n == out_n) { #define CASE(f) \ case f: \ - for (i=1; i < img_x; ++i, raw+=img_n,cur+=img_n,prior+=img_n) \ + for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \ for (k=0; k < img_n; ++k) switch(filter) { CASE(F_none) cur[k] = raw[k]; break; @@ -1988,7 +2140,7 @@ static int create_png_image(uint8 *raw, uint32 raw_len, int out_n) assert(img_n+1 == out_n); #define CASE(f) \ case f: \ - for (i=1; i < img_x; ++i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \ + for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \ for (k=0; k < img_n; ++k) switch(filter) { CASE(F_none) cur[k] = raw[k]; break; @@ -2005,16 +2157,57 @@ static int create_png_image(uint8 *raw, uint32 raw_len, int out_n) return 1; } -static int compute_transparency(uint8 tc[3], int out_n) +static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced) { - uint32 i, pixel_count = img_x * img_y; - uint8 *p = out; + uint8 *final; + int p; + int save; + if (!interlaced) + return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y); + save = stbi_png_partial; + stbi_png_partial = 0; + + // de-interlacing + final = (uint8 *) malloc(a->s.img_x * a->s.img_y * out_n); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) { + free(final); + return 0; + } + for (j=0; j < y; ++j) + for (i=0; i < x; ++i) + memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n, + a->out + (j*x+i)*out_n, out_n); + free(a->out); + raw += (x*out_n+1)*y; + raw_len -= (x*out_n+1)*y; + } + } + a->out = final; + + stbi_png_partial = save; + return 1; +} + +static int compute_transparency(png *z, uint8 tc[3], int out_n) +{ + stbi *s = &z->s; + uint32 i, pixel_count = s->img_x * s->img_y; + uint8 *p = z->out; // compute color-based transparency, assuming we've // already got 255 as the alpha value in the output assert(out_n == 2 || out_n == 4); - p = out; if (out_n == 2) { for (i=0; i < pixel_count; ++i) { p[1] = (p[0] == tc[0] ? 0 : 255); @@ -2030,10 +2223,10 @@ static int compute_transparency(uint8 tc[3], int out_n) return 1; } -static int expand_palette(uint8 *palette, int len, int pal_img_n) +static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n) { - uint32 i, pixel_count = img_x * img_y; - uint8 *p, *temp_out, *orig = out; + uint32 i, pixel_count = a->s.img_x * a->s.img_y; + uint8 *p, *temp_out, *orig = a->out; p = (uint8 *) malloc(pixel_count * pal_img_n); if (p == NULL) return e("outofmem", "Out of memory"); @@ -2059,49 +2252,50 @@ static int expand_palette(uint8 *palette, int len, int pal_img_n) p += 4; } } - free(out); - out = temp_out; + free(a->out); + a->out = temp_out; return 1; } -static int parse_png_file(int scan, int req_comp) +static int parse_png_file(png *z, int scan, int req_comp) { uint8 palette[1024], pal_img_n=0; uint8 has_trans=0, tc[3]; uint32 ioff=0, idata_limit=0, i, pal_len=0; - int first=1,k; + int first=1,k,interlace=0; + stbi *s = &z->s; - if (!check_png_header()) return 0; + if (!check_png_header(s)) return 0; if (scan == SCAN_type) return 1; for(;;first=0) { - chunk c = get_chunk_header(); + chunk c = get_chunk_header(s); if (first && c.type != PNG_TYPE('I','H','D','R')) return e("first not IHDR","Corrupt PNG"); switch (c.type) { case PNG_TYPE('I','H','D','R'): { - int depth,color,interlace,comp,filter; + int depth,color,comp,filter; if (!first) return e("multiple IHDR","Corrupt PNG"); if (c.length != 13) return e("bad IHDR len","Corrupt PNG"); - img_x = get32(); if (img_x > (1 << 24)) return e("too large","Very large image (corrupt?)"); - img_y = get32(); if (img_y > (1 << 24)) return e("too large","Very large image (corrupt?)"); - depth = get8(); if (depth != 8) return e("8bit only","PNG not supported: 8-bit only"); - color = get8(); if (color > 6) return e("bad ctype","Corrupt PNG"); + s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)"); + s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)"); + depth = get8(s); if (depth != 8) return e("8bit only","PNG not supported: 8-bit only"); + color = get8(s); if (color > 6) return e("bad ctype","Corrupt PNG"); if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG"); - comp = get8(); if (comp) return e("bad comp method","Corrupt PNG"); - filter= get8(); if (filter) return e("bad filter method","Corrupt PNG"); - interlace = get8(); if (interlace) return e("interlaced","PNG not supported: interlaced mode"); - if (!img_x || !img_y) return e("0-pixel image","Corrupt PNG"); + comp = get8(s); if (comp) return e("bad comp method","Corrupt PNG"); + filter= get8(s); if (filter) return e("bad filter method","Corrupt PNG"); + interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG"); if (!pal_img_n) { - img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); - if ((1 << 30) / img_x / img_n < img_y) return e("too large", "Image too large to decode"); + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); if (scan == SCAN_header) return 1; } else { // if paletted, then pal_n is our final components, and // img_n is # components to decompress/filter. - img_n = 1; - if ((1 << 30) / img_x / 4 < img_y) return e("too large","Corrupt PNG"); + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG"); // if SCAN_header, have to scan to see if we have a tRNS } break; @@ -2112,54 +2306,54 @@ static int parse_png_file(int scan, int req_comp) pal_len = c.length / 3; if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG"); for (i=0; i < pal_len; ++i) { - palette[i*4+0] = get8u(); - palette[i*4+1] = get8u(); - palette[i*4+2] = get8u(); + palette[i*4+0] = get8u(s); + palette[i*4+1] = get8u(s); + palette[i*4+2] = get8u(s); palette[i*4+3] = 255; } break; } case PNG_TYPE('t','R','N','S'): { - if (idata) return e("tRNS after IDAT","Corrupt PNG"); + if (z->idata) return e("tRNS after IDAT","Corrupt PNG"); if (pal_img_n) { - if (scan == SCAN_header) { img_n = 4; return 1; } + if (scan == SCAN_header) { s->img_n = 4; return 1; } if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG"); if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG"); pal_img_n = 4; for (i=0; i < c.length; ++i) - palette[i*4+3] = get8u(); + palette[i*4+3] = get8u(s); } else { - if (!(img_n & 1)) return e("tRNS with alpha","Corrupt PNG"); - if (c.length != (uint32) img_n*2) return e("bad tRNS len","Corrupt PNG"); + if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG"); + if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG"); has_trans = 1; - for (k=0; k < img_n; ++k) - tc[k] = (uint8) get16(); // non 8-bit images will be larger + for (k=0; k < s->img_n; ++k) + tc[k] = (uint8) get16(s); // non 8-bit images will be larger } break; } case PNG_TYPE('I','D','A','T'): { if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG"); - if (scan == SCAN_header) { img_n = pal_img_n; return 1; } + if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; } if (ioff + c.length > idata_limit) { uint8 *p; if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; while (ioff + c.length > idata_limit) idata_limit *= 2; - p = (uint8 *) realloc(idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory"); - idata = p; + p = (uint8 *) realloc(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory"); + z->idata = p; } #ifndef STBI_NO_STDIO - if (img_file) + if (s->img_file) { - if (fread(idata+ioff,1,c.length,img_file) != c.length) return e("outofdata","Corrupt PNG"); + if (fread(z->idata+ioff,1,c.length,s->img_file) != c.length) return e("outofdata","Corrupt PNG"); } else #endif { - memcpy(idata+ioff, img_buffer, c.length); - img_buffer += c.length; + memcpy(z->idata+ioff, s->img_buffer, c.length); + s->img_buffer += c.length; } ioff += c.length; break; @@ -2168,26 +2362,26 @@ static int parse_png_file(int scan, int req_comp) case PNG_TYPE('I','E','N','D'): { uint32 raw_len; if (scan != SCAN_load) return 1; - if (idata == NULL) return e("no IDAT","Corrupt PNG"); - expanded = (uint8 *) stbi_zlib_decode_malloc((char *) idata, ioff, (int *) &raw_len); - if (expanded == NULL) return 0; // zlib should set error - free(idata); idata = NULL; - if ((req_comp == img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) - img_out_n = img_n+1; + if (z->idata == NULL) return e("no IDAT","Corrupt PNG"); + z->expanded = (uint8 *) stbi_zlib_decode_malloc((char *) z->idata, ioff, (int *) &raw_len); + if (z->expanded == NULL) return 0; // zlib should set error + free(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; else - img_out_n = img_n; - if (!create_png_image(expanded, raw_len, img_out_n)) return 0; + s->img_out_n = s->img_n; + if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0; if (has_trans) - if (!compute_transparency(tc, img_out_n)) return 0; + if (!compute_transparency(z, tc, s->img_out_n)) return 0; if (pal_img_n) { // pal_img_n == 3 or 4 - img_n = pal_img_n; // record the actual colors we had - img_out_n = pal_img_n; - if (req_comp >= 3) img_out_n = req_comp; - if (!expand_palette(palette, pal_len, img_out_n)) + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!expand_palette(z, palette, pal_len, s->img_out_n)) return 0; } - free(expanded); expanded = NULL; + free(z->expanded); z->expanded = NULL; return 1; } @@ -2195,6 +2389,7 @@ static int parse_png_file(int scan, int req_comp) // if critical, fail if ((c.type & (1 << 29)) == 0) { #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe static char invalid_chunk[] = "XXXX chunk not known"; invalid_chunk[0] = (uint8) (c.type >> 24); invalid_chunk[1] = (uint8) (c.type >> 16); @@ -2203,32 +2398,36 @@ static int parse_png_file(int scan, int req_comp) #endif return e(invalid_chunk, "PNG not supported: unknown chunk type"); } - skip(c.length); + skip(s, c.length); break; } // end of chunk, read and skip CRC - get8(); get8(); get8(); get8(); + get32(s); } } -static unsigned char *do_png(int *x, int *y, int *n, int req_comp) +static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp) { unsigned char *result=NULL; + p->expanded = NULL; + p->idata = NULL; + p->out = NULL; if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); - if (parse_png_file(SCAN_load, req_comp)) { - result = out; - out = NULL; - if (req_comp && req_comp != img_out_n) { - result = convert_format(result, img_out_n, req_comp); + if (parse_png_file(p, SCAN_load, req_comp)) { + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s.img_out_n) { + result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y); + p->s.img_out_n = req_comp; if (result == NULL) return result; } - *x = img_x; - *y = img_y; - if (n) *n = img_n; + *x = p->s.img_x; + *y = p->s.img_y; + if (n) *n = p->s.img_n; } - free(out); out = NULL; - free(expanded); expanded = NULL; - free(idata); idata = NULL; + free(p->out); p->out = NULL; + free(p->expanded); p->expanded = NULL; + free(p->idata); p->idata = NULL; return result; } @@ -2236,11 +2435,12 @@ static unsigned char *do_png(int *x, int *y, int *n, int req_comp) #ifndef STBI_NO_STDIO unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) { - start_file(f); - return do_png(x,y,comp,req_comp); + png p; + start_file(&p.s, f); + return do_png(&p, x,y,comp,req_comp); } -unsigned char *stbi_png_load(char *filename, int *x, int *y, int *comp, int req_comp) +unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp) { unsigned char *data; FILE *f = fopen(filename, "rb"); @@ -2251,49 +2451,68 @@ unsigned char *stbi_png_load(char *filename, int *x, int *y, int *comp, int req_ } #endif -unsigned char *stbi_png_load_from_memory(unsigned char *buffer, int len, int *x, int *y, int *comp, int req_comp) +unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { - start_mem(buffer,len); - return do_png(x,y,comp,req_comp); + png p; + start_mem(&p.s, buffer,len); + return do_png(&p, x,y,comp,req_comp); } #ifndef STBI_NO_STDIO int stbi_png_test_file(FILE *f) { + png p; int n,r; n = ftell(f); - start_file(f); - r = parse_png_file(SCAN_type,STBI_default); + start_file(&p.s, f); + r = parse_png_file(&p, SCAN_type,STBI_default); fseek(f,n,SEEK_SET); return r; } #endif -int stbi_png_test_memory(unsigned char *buffer, int len) +int stbi_png_test_memory(stbi_uc const *buffer, int len) { - start_mem(buffer, len); - return parse_png_file(SCAN_type,STBI_default); + png p; + start_mem(&p.s, buffer, len); + return parse_png_file(&p, SCAN_type,STBI_default); } // TODO: load header from png #ifndef STBI_NO_STDIO -extern int stbi_png_info (char *filename, int *x, int *y, int *comp); +int stbi_png_info (char const *filename, int *x, int *y, int *comp) +{ + png p; + FILE *f = fopen(filename, "rb"); + if (!f) return 0; + start_file(&p.s, f); + if (parse_png_file(&p, SCAN_header, 0)) { + if(x) *x = p.s.img_x; + if(y) *y = p.s.img_y; + if (comp) *comp = p.s.img_n; + fclose(f); + return 1; + } + fclose(f); + return 0; +} + extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); #endif -extern int stbi_png_info_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp); +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); // Microsoft/Windows BMP image -static int bmp_test(void) +static int bmp_test(stbi *s) { int sz; - if (get8() != 'B') return 0; - if (get8() != 'M') return 0; - get32le(); // discard filesize - get16le(); // discard reserved - get16le(); // discard reserved - get32le(); // discard data offset - sz = get32le(); + if (get8(s) != 'B') return 0; + if (get8(s) != 'M') return 0; + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + get32le(s); // discard data offset + sz = get32le(s); if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1; return 0; } @@ -2301,18 +2520,20 @@ static int bmp_test(void) #ifndef STBI_NO_STDIO int stbi_bmp_test_file (FILE *f) { + stbi s; int r,n = ftell(f); - start_file(f); - r = bmp_test(); + start_file(&s,f); + r = bmp_test(&s); fseek(f,n,SEEK_SET); return r; } #endif -int stbi_bmp_test_memory (stbi_uc *buffer, int len) +int stbi_bmp_test_memory (stbi_uc const *buffer, int len) { - start_mem(buffer, len); - return bmp_test(); + stbi s; + start_mem(&s, buffer, len); + return bmp_test(&s); } // returns 0..31 for the highest set bit @@ -2355,49 +2576,50 @@ static int shiftsigned(int v, int shift, int bits) return result; } -static stbi_uc *bmp_load(int *x, int *y, int *comp, int req_comp) +static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) { - unsigned int mr=0,mg=0,mb=0,ma=0; + uint8 *out; + unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0; stbi_uc pal[256][4]; int psize=0,i,j,compress=0,width; int bpp, flip_vertically, pad, target, offset, hsz; - if (get8() != 'B' || get8() != 'M') return epuc("not BMP", "Corrupt BMP"); - get32le(); // discard filesize - get16le(); // discard reserved - get16le(); // discard reserved - offset = get32le(); - hsz = get32le(); + if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP"); + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + offset = get32le(s); + hsz = get32le(s); if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown"); failure_reason = "bad BMP"; if (hsz == 12) { - img_x = get16le(); - img_y = get16le(); + s->img_x = get16le(s); + s->img_y = get16le(s); } else { - img_x = get32le(); - img_y = get32le(); + s->img_x = get32le(s); + s->img_y = get32le(s); } - if (get16le() != 1) return 0; - bpp = get16le(); + if (get16le(s) != 1) return 0; + bpp = get16le(s); if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit"); - flip_vertically = ((int) img_y) > 0; - img_y = abs((int) img_y); + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); if (hsz == 12) { if (bpp < 24) psize = (offset - 14 - 24) / 3; } else { - compress = get32le(); + compress = get32le(s); if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE"); - get32le(); // discard sizeof - get32le(); // discard hres - get32le(); // discard vres - get32le(); // discard colorsused - get32le(); // discard max important + get32le(s); // discard sizeof + get32le(s); // discard hres + get32le(s); // discard vres + get32le(s); // discard colorsused + get32le(s); // discard max important if (hsz == 40 || hsz == 56) { if (hsz == 56) { - get32le(); - get32le(); - get32le(); - get32le(); + get32le(s); + get32le(s); + get32le(s); + get32le(s); } if (bpp == 16 || bpp == 32) { mr = mg = mb = 0; @@ -2406,15 +2628,17 @@ static stbi_uc *bmp_load(int *x, int *y, int *comp, int req_comp) mr = 0xff << 16; mg = 0xff << 8; mb = 0xff << 0; + ma = 0xff << 24; + fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255 } else { mr = 31 << 10; mg = 31 << 5; mb = 31 << 0; } } else if (compress == 3) { - mr = get32le(); - mg = get32le(); - mb = get32le(); + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); // not documented, but generated by photoshop and handled by mspaint if (mr == mg && mg == mb) { // ?!?!? @@ -2425,42 +2649,42 @@ static stbi_uc *bmp_load(int *x, int *y, int *comp, int req_comp) } } else { assert(hsz == 108); - mr = get32le(); - mg = get32le(); - mb = get32le(); - ma = get32le(); - get32le(); // discard color space + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + ma = get32le(s); + get32le(s); // discard color space for (i=0; i < 12; ++i) - get32le(); // discard color space parameters + get32le(s); // discard color space parameters } if (bpp < 16) psize = (offset - 14 - hsz) >> 2; } - img_n = ma ? 4 : 3; + s->img_n = ma ? 4 : 3; if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 target = req_comp; else - target = img_n; // if they want monochrome, we'll post-convert - out = (stbi_uc *) malloc(target * img_x * img_y); + target = s->img_n; // if they want monochrome, we'll post-convert + out = (stbi_uc *) malloc(target * s->img_x * s->img_y); if (!out) return epuc("outofmem", "Out of memory"); if (bpp < 16) { int z=0; - if (psize == 0 || psize > 256) return epuc("invalid", "Corrupt BMP"); + if (psize == 0 || psize > 256) { free(out); return epuc("invalid", "Corrupt BMP"); } for (i=0; i < psize; ++i) { - pal[i][2] = get8(); - pal[i][1] = get8(); - pal[i][0] = get8(); - if (hsz != 12) get8(); + pal[i][2] = get8(s); + pal[i][1] = get8(s); + pal[i][0] = get8(s); + if (hsz != 12) get8(s); pal[i][3] = 255; } - skip(offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); - if (bpp == 4) width = (img_x + 1) >> 1; - else if (bpp == 8) width = img_x; - else return epuc("bad bpp", "Corrupt BMP"); + skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); + if (bpp == 4) width = (s->img_x + 1) >> 1; + else if (bpp == 8) width = s->img_x; + else { free(out); return epuc("bad bpp", "Corrupt BMP"); } pad = (-width)&3; - for (j=0; j < (int) img_y; ++j) { - for (i=0; i < (int) img_x; i += 2) { - int v=get8(),v2=0; + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=get8(s),v2=0; if (bpp == 4) { v2 = v & 15; v >>= 4; @@ -2469,22 +2693,22 @@ static stbi_uc *bmp_load(int *x, int *y, int *comp, int req_comp) out[z++] = pal[v][1]; out[z++] = pal[v][2]; if (target == 4) out[z++] = 255; - if (i+1 == (int) img_x) break; - v = (bpp == 8) ? get8() : v2; + if (i+1 == (int) s->img_x) break; + v = (bpp == 8) ? get8(s) : v2; out[z++] = pal[v][0]; out[z++] = pal[v][1]; out[z++] = pal[v][2]; if (target == 4) out[z++] = 255; } - skip(pad); + skip(s, pad); } } else { int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; int z = 0; int easy=0; - skip(offset - 14 - hsz); - if (bpp == 24) width = 3 * img_x; - else if (bpp == 16) width = 2*img_x; + skip(s, offset - 14 - hsz); + if (bpp == 24) width = 3 * s->img_x; + else if (bpp == 16) width = 2*s->img_x; else /* bpp = 32 and pad = 0 */ width=0; pad = (-width) & 3; if (bpp == 24) { @@ -2501,20 +2725,20 @@ static stbi_uc *bmp_load(int *x, int *y, int *comp, int req_comp) bshift = high_bit(mb)-7; bcount = bitcount(mr); ashift = high_bit(ma)-7; acount = bitcount(mr); } - for (j=0; j < (int) img_y; ++j) { + for (j=0; j < (int) s->img_y; ++j) { if (easy) { - for (i=0; i < (int) img_x; ++i) { + for (i=0; i < (int) s->img_x; ++i) { int a; - out[z+2] = get8(); - out[z+1] = get8(); - out[z+0] = get8(); + out[z+2] = get8(s); + out[z+1] = get8(s); + out[z+0] = get8(s); z += 3; - a = (easy == 2 ? get8() : 255); + a = (easy == 2 ? get8(s) : 255); if (target == 4) out[z++] = a; } } else { - for (i=0; i < (int) img_x; ++i) { - uint32 v = (bpp == 16 ? get16le() : get32le()); + for (i=0; i < (int) s->img_x; ++i) { + uint32 v = (bpp == 16 ? get16le(s) : get32le(s)); int a; out[z++] = shiftsigned(v & mr, rshift, rcount); out[z++] = shiftsigned(v & mg, gshift, gcount); @@ -2523,33 +2747,33 @@ static stbi_uc *bmp_load(int *x, int *y, int *comp, int req_comp) if (target == 4) out[z++] = a; } } - skip(pad); + skip(s, pad); } } if (flip_vertically) { stbi_uc t; - for (j=0; j < (int) img_y>>1; ++j) { - stbi_uc *p1 = out + j *img_x*target; - stbi_uc *p2 = out + (img_y-1-j)*img_x*target; - for (i=0; i < (int) img_x*target; ++i) { + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { t = p1[i], p1[i] = p2[i], p2[i] = t; } } } if (req_comp && req_comp != target) { - out = convert_format(out, target, req_comp); + out = convert_format(out, target, req_comp, s->img_x, s->img_y); if (out == NULL) return out; // convert_format frees input on failure } - *x = img_x; - *y = img_y; + *x = s->img_x; + *y = s->img_y; if (comp) *comp = target; return out; } #ifndef STBI_NO_STDIO -stbi_uc *stbi_bmp_load (char *filename, int *x, int *y, int *comp, int req_comp) +stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp) { stbi_uc *data; FILE *f = fopen(filename, "rb"); @@ -2561,36 +2785,38 @@ stbi_uc *stbi_bmp_load (char *filename, int *x, int *y, in stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) { - start_file(f); - return bmp_load(x,y,comp,req_comp); + stbi s; + start_file(&s, f); + return bmp_load(&s, x,y,comp,req_comp); } #endif -stbi_uc *stbi_bmp_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp) +stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { - start_mem(buffer, len); - return bmp_load(x,y,comp,req_comp); + stbi s; + start_mem(&s, buffer, len); + return bmp_load(&s, x,y,comp,req_comp); } // Targa Truevision - TGA // by Jonathan Dummer -static int tga_test(void) +static int tga_test(stbi *s) { int sz; - get8u(); // discard Offset - sz = get8u(); // color type + get8u(s); // discard Offset + sz = get8u(s); // color type if( sz > 1 ) return 0; // only RGB or indexed allowed - sz = get8u(); // image type + sz = get8u(s); // image type if( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE - get16(); // discard palette start - get16(); // discard palette length - get8(); // discard bits per palette color entry - get16(); // discard x origin - get16(); // discard y origin - if( get16() < 1 ) return 0; // test width - if( get16() < 1 ) return 0; // test height - sz = get8(); // bits per pixel + get16(s); // discard palette start + get16(s); // discard palette length + get8(s); // discard bits per palette color entry + get16(s); // discard x origin + get16(s); // discard y origin + if( get16(s) < 1 ) return 0; // test width + if( get16(s) < 1 ) return 0; // test height + sz = get8(s); // bits per pixel if( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0; // only RGB or RGBA or grey allowed return 1; // seems to have passed everything } @@ -2598,42 +2824,44 @@ static int tga_test(void) #ifndef STBI_NO_STDIO int stbi_tga_test_file (FILE *f) { + stbi s; int r,n = ftell(f); - start_file(f); - r = tga_test(); + start_file(&s, f); + r = tga_test(&s); fseek(f,n,SEEK_SET); return r; } #endif -int stbi_tga_test_memory (stbi_uc *buffer, int len) +int stbi_tga_test_memory (stbi_uc const *buffer, int len) { - start_mem(buffer, len); - return tga_test(); + stbi s; + start_mem(&s, buffer, len); + return tga_test(&s); } -static stbi_uc *tga_load(int *x, int *y, int *comp, int req_comp) +static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp) { // read in the TGA header stuff - int tga_offset = get8u(); - int tga_indexed = get8u(); - int tga_image_type = get8u(); + int tga_offset = get8u(s); + int tga_indexed = get8u(s); + int tga_image_type = get8u(s); int tga_is_RLE = 0; - int tga_palette_start = get16le(); - int tga_palette_len = get16le(); - int tga_palette_bits = get8u(); - int tga_x_origin = get16le(); - int tga_y_origin = get16le(); - int tga_width = get16le(); - int tga_height = get16le(); - int tga_bits_per_pixel = get8u(); - int tga_inverted = get8u(); + int tga_palette_start = get16le(s); + int tga_palette_len = get16le(s); + int tga_palette_bits = get8u(s); + int tga_x_origin = get16le(s); + int tga_y_origin = get16le(s); + int tga_width = get16le(s); + int tga_height = get16le(s); + int tga_bits_per_pixel = get8u(s); + int tga_inverted = get8u(s); // image data unsigned char *tga_data; unsigned char *tga_palette = NULL; int i, j; unsigned char raw_data[4]; - unsigned char trans_data[] = {0, 0, 0, 0}; + unsigned char trans_data[4]; int RLE_count = 0; int RLE_repeating = 0; int read_next_pixel = 1; @@ -2679,15 +2907,15 @@ static stbi_uc *tga_load(int *x, int *y, int *comp, int req_comp) tga_data = (unsigned char*)malloc( tga_width * tga_height * req_comp ); // skip to the data's starting position (offset usually = 0) - skip( tga_offset ); + skip(s, tga_offset ); // do I need to load a palette? if( tga_indexed ) { // any data to skip? (offset usually = 0) - skip( tga_palette_start ); + skip(s, tga_palette_start ); // load the palette tga_palette = (unsigned char*)malloc( tga_palette_len * tga_palette_bits / 8 ); - getn( tga_palette, tga_palette_len * tga_palette_bits / 8 ); + getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 ); } // load the data for( i = 0; i < tga_width * tga_height; ++i ) @@ -2698,7 +2926,7 @@ static stbi_uc *tga_load(int *x, int *y, int *comp, int req_comp) if( RLE_count == 0 ) { // yep, get the next byte as a RLE command - int RLE_cmd = get8u(); + int RLE_cmd = get8u(s); RLE_count = 1 + (RLE_cmd & 127); RLE_repeating = RLE_cmd >> 7; read_next_pixel = 1; @@ -2717,7 +2945,7 @@ static stbi_uc *tga_load(int *x, int *y, int *comp, int req_comp) if( tga_indexed ) { // read in 1 byte, then perform the lookup - int pal_idx = get8u(); + int pal_idx = get8u(s); if( pal_idx >= tga_palette_len ) { // invalid index @@ -2733,7 +2961,7 @@ static stbi_uc *tga_load(int *x, int *y, int *comp, int req_comp) // read in the data raw for( j = 0; j*8 < tga_bits_per_pixel; ++j ) { - raw_data[j] = get8u(); + raw_data[j] = get8u(s); } } // convert raw to the intermediate format @@ -2831,7 +3059,7 @@ static stbi_uc *tga_load(int *x, int *y, int *comp, int req_comp) } #ifndef STBI_NO_STDIO -stbi_uc *stbi_tga_load (char *filename, int *x, int *y, int *comp, int req_comp) +stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp) { stbi_uc *data; FILE *f = fopen(filename, "rb"); @@ -2843,73 +3071,78 @@ stbi_uc *stbi_tga_load (char *filename, int *x, int *y, in stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) { - start_file(f); - return tga_load(x,y,comp,req_comp); + stbi s; + start_file(&s, f); + return tga_load(&s, x,y,comp,req_comp); } #endif -stbi_uc *stbi_tga_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp) +stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { - start_mem(buffer, len); - return tga_load(x,y,comp,req_comp); + stbi s; + start_mem(&s, buffer, len); + return tga_load(&s, x,y,comp,req_comp); } // ************************************************************************************************* // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicholas Schulz, tweaked by STB -static int psd_test(void) +static int psd_test(stbi *s) { - if (get32() != 0x38425053) return 0; // "8BPS" + if (get32(s) != 0x38425053) return 0; // "8BPS" else return 1; } #ifndef STBI_NO_STDIO int stbi_psd_test_file(FILE *f) { + stbi s; int r,n = ftell(f); - start_file(f); - r = psd_test(); + start_file(&s, f); + r = psd_test(&s); fseek(f,n,SEEK_SET); return r; } #endif -int stbi_psd_test_memory(stbi_uc *buffer, int len) +int stbi_psd_test_memory(stbi_uc const *buffer, int len) { - start_mem(buffer, len); - return psd_test(); + stbi s; + start_mem(&s, buffer, len); + return psd_test(&s); } -static stbi_uc *psd_load(int *x, int *y, int *comp, int req_comp) +static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp) { int pixelCount; int channelCount, compression; int channel, i, count, len; int w,h; + uint8 *out; // Check identifier - if (get32() != 0x38425053) // "8BPS" + if (get32(s) != 0x38425053) // "8BPS" return epuc("not PSD", "Corrupt PSD image"); // Check file type version. - if (get16() != 1) + if (get16(s) != 1) return epuc("wrong version", "Unsupported version of PSD image"); // Skip 6 reserved bytes. - skip( 6 ); + skip(s, 6 ); // Read the number of channels (R, G, B, A, etc). - channelCount = get16(); + channelCount = get16(s); if (channelCount < 0 || channelCount > 16) return epuc("wrong channel count", "Unsupported number of channels in PSD image"); // Read the rows and columns of the image. - h = get32(); - w = get32(); + h = get32(s); + w = get32(s); // Make sure the depth is 8 bits. - if (get16() != 8) + if (get16(s) != 8) return epuc("unsupported bit depth", "PSD bit depth is not 8 bit"); // Make sure the color mode is RGB. @@ -2922,25 +3155,25 @@ static stbi_uc *psd_load(int *x, int *y, int *comp, int req_comp) // 7: Multichannel // 8: Duotone // 9: Lab color - if (get16() != 3) + if (get16(s) != 3) return epuc("wrong color format", "PSD is not in RGB color format"); // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) - skip(get32() ); + skip(s,get32(s) ); // Skip the image resources. (resolution, pen tool paths, etc) - skip( get32() ); + skip(s, get32(s) ); // Skip the reserved data. - skip( get32() ); + skip(s, get32(s) ); // Find out if the data is compressed. // Known values: // 0: no compression // 1: RLE compressed - compression = get16(); + compression = get16(s); if (compression > 1) - return epuc("unknown compression type", "PSD has an unknown compression format"); + return epuc("bad compression", "PSD has an unknown compression format"); // Create the destination image. out = (stbi_uc *) malloc(4 * w*h); @@ -2962,7 +3195,7 @@ static stbi_uc *psd_load(int *x, int *y, int *comp, int req_comp) // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, // which we're going to just skip. - skip( h * channelCount * 2 ); + skip(s, h * channelCount * 2 ); // Read the RLE data by channel. for (channel = 0; channel < 4; channel++) { @@ -2976,7 +3209,7 @@ static stbi_uc *psd_load(int *x, int *y, int *comp, int req_comp) // Read the RLE data. count = 0; while (count < pixelCount) { - len = get8(); + len = get8(s); if (len == 128) { // No-op. } else if (len < 128) { @@ -2984,7 +3217,7 @@ static stbi_uc *psd_load(int *x, int *y, int *comp, int req_comp) len++; count += len; while (len) { - *p = get8(); + *p = get8(s); p += 4; len--; } @@ -2994,7 +3227,7 @@ static stbi_uc *psd_load(int *x, int *y, int *comp, int req_comp) // (Interpret len as a negative 8-bit int.) len ^= 0x0FF; len += 2; - val = get8(); + val = get8(s); count += len; while (len) { *p = val; @@ -3022,13 +3255,13 @@ static stbi_uc *psd_load(int *x, int *y, int *comp, int req_comp) // Read the data. count = 0; for (i = 0; i < pixelCount; i++) - *p = get8(), p += 4; + *p = get8(s), p += 4; } } } if (req_comp && req_comp != 4) { - out = convert_format(out, 4, req_comp); + out = convert_format(out, 4, req_comp, w, h); if (out == NULL) return out; // convert_format frees input on failure } @@ -3040,7 +3273,7 @@ static stbi_uc *psd_load(int *x, int *y, int *comp, int req_comp) } #ifndef STBI_NO_STDIO -stbi_uc *stbi_psd_load(char *filename, int *x, int *y, int *comp, int req_comp) +stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp) { stbi_uc *data; FILE *f = fopen(filename, "rb"); @@ -3052,15 +3285,17 @@ stbi_uc *stbi_psd_load(char *filename, int *x, int *y, int *comp, int req_comp) stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) { - start_file(f); - return psd_load(x,y,comp,req_comp); + stbi s; + start_file(&s, f); + return psd_load(&s, x,y,comp,req_comp); } #endif -stbi_uc *stbi_psd_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp) +stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { - start_mem(buffer, len); - return psd_load(x,y,comp,req_comp); + stbi s; + start_mem(&s, buffer, len); + return psd_load(&s, x,y,comp,req_comp); } @@ -3068,50 +3303,52 @@ stbi_uc *stbi_psd_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, in // Radiance RGBE HDR loader // originally by Nicolas Schulz #ifndef STBI_NO_HDR -static int hdr_test(void) +static int hdr_test(stbi *s) { char *signature = "#?RADIANCE\n"; int i; for (i=0; signature[i]; ++i) - if (get8() != signature[i]) + if (get8(s) != signature[i]) return 0; return 1; } -int stbi_hdr_test_memory(stbi_uc *buffer, int len) +int stbi_hdr_test_memory(stbi_uc const *buffer, int len) { - start_mem(buffer, len); - return hdr_test(); + stbi s; + start_mem(&s, buffer, len); + return hdr_test(&s); } #ifndef STBI_NO_STDIO int stbi_hdr_test_file(FILE *f) { + stbi s; int r,n = ftell(f); - start_file(f); - r = hdr_test(); + start_file(&s, f); + r = hdr_test(&s); fseek(f,n,SEEK_SET); return r; } #endif #define HDR_BUFLEN 1024 -static char *hdr_gettoken(char *buffer) +static char *hdr_gettoken(stbi *z, char *buffer) { int len=0; char *s = buffer, c = '\0'; - c = get8(); + c = get8(z); - while (!at_eof() && c != '\n') { + while (!at_eof(z) && c != '\n') { buffer[len++] = c; if (len == HDR_BUFLEN-1) { // flush to end of line - while (!at_eof() && get8() != '\n') + while (!at_eof(z) && get8(z) != '\n') ; break; } - c = get8(); + c = get8(z); } buffer[len] = 0; @@ -3146,7 +3383,7 @@ static void hdr_convert(float *output, stbi_uc *input, int req_comp) } -static float *hdr_load(int *x, int *y, int *comp, int req_comp) +static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp) { char buffer[HDR_BUFLEN]; char *token; @@ -3160,12 +3397,12 @@ static float *hdr_load(int *x, int *y, int *comp, int req_comp) // Check identifier - if (strcmp(hdr_gettoken(buffer), "#?RADIANCE") != 0) + if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0) return epf("not HDR", "Corrupt HDR image"); // Parse header while(1) { - token = hdr_gettoken(buffer); + token = hdr_gettoken(s,buffer); if (token[0] == 0) break; if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; } @@ -3174,7 +3411,7 @@ static float *hdr_load(int *x, int *y, int *comp, int req_comp) // Parse width and height // can't use sscanf() if we're not using stdio! - token = hdr_gettoken(buffer); + token = hdr_gettoken(s,buffer); if (strncmp(token, "-Y ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); token += 3; height = strtol(token, &token, 10); @@ -3200,7 +3437,7 @@ static float *hdr_load(int *x, int *y, int *comp, int req_comp) for (i=0; i < width; ++i) { stbi_uc rgbe[4]; main_decode_loop: - getn(rgbe, 4); + getn(s, rgbe, 4); hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); } } @@ -3209,13 +3446,13 @@ static float *hdr_load(int *x, int *y, int *comp, int req_comp) scanline = NULL; for (j = 0; j < height; ++j) { - c1 = get8(); - c2 = get8(); - len = get8(); + c1 = get8(s); + c2 = get8(s); + len = get8(s); if (c1 != 2 || c2 != 2 || (len & 0x80)) { // not run-length encoded, so we have to actually use THIS data as a decoded // pixel (note this can't be a valid pixel--one of RGB must be >= 128) - stbi_uc rgbe[4] = { c1,c2,len, get8() }; + stbi_uc rgbe[4] = { c1,c2,len, get8(s) }; hdr_convert(hdr_data, rgbe, req_comp); i = 1; j = 0; @@ -3223,24 +3460,24 @@ static float *hdr_load(int *x, int *y, int *comp, int req_comp) goto main_decode_loop; // yes, this is fucking insane; blame the fucking insane format } len <<= 8; - len |= get8(); + len |= get8(s); if (len != width) { free(hdr_data); free(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); } if (scanline == NULL) scanline = (stbi_uc *) malloc(width * 4); for (k = 0; k < 4; ++k) { i = 0; while (i < width) { - count = get8(); + count = get8(s); if (count > 128) { // Run - value = get8(); + value = get8(s); count -= 128; for (z = 0; z < count; ++z) scanline[i++ * 4 + k] = value; } else { // Dump for (z = 0; z < count; ++z) - scanline[i++ * 4 + k] = get8(); + scanline[i++ * 4 + k] = get8(s); } } } @@ -3252,21 +3489,152 @@ static float *hdr_load(int *x, int *y, int *comp, int req_comp) return hdr_data; } +static stbi_uc *hdr_load_rgbe(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + char buffer[HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + stbi_uc *rgbe_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + + + // Check identifier + if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0) + return epuc("not HDR", "Corrupt HDR image"); + + // Parse header + while(1) { + token = hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return epuc("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return epuc("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return epuc("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = strtol(token, NULL, 10); + + *x = width; + *y = height; + + // RGBE _MUST_ come out as 4 components + *comp = 4; + req_comp = 4; + + // Read data + rgbe_data = (stbi_uc *) malloc(height * width * req_comp * sizeof(stbi_uc)); + // point to the beginning + scanline = rgbe_data; + + // Load image data + // image data is stored as some number of scan lines + if( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + main_decode_loop: + //getn(rgbe, 4); + getn(s,scanline, 4); + scanline += 4; + } + } + } else { + // Read RLE-encoded data + for (j = 0; j < height; ++j) { + c1 = get8(s); + c2 = get8(s); + len = get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + scanline[0] = c1; + scanline[1] = c2; + scanline[2] = len; + scanline[3] = get8(s); + scanline += 4; + i = 1; + j = 0; + goto main_decode_loop; // yes, this is insane; blame the insane format + } + len <<= 8; + len |= get8(s); + if (len != width) { free(rgbe_data); return epuc("invalid decoded scanline length", "corrupt HDR"); } + for (k = 0; k < 4; ++k) { + i = 0; + while (i < width) { + count = get8(s); + if (count > 128) { + // Run + value = get8(s); + count -= 128; + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = get8(s); + } + } + } + // move the scanline on + scanline += 4 * width; + } + } + + return rgbe_data; +} #ifndef STBI_NO_STDIO float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) { - start_file(f); - return hdr_load(x,y,comp,req_comp); + stbi s; + start_file(&s,f); + return hdr_load(&s,x,y,comp,req_comp); +} + +stbi_uc *stbi_hdr_load_rgbe_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s,f); + return hdr_load_rgbe(&s,x,y,comp,req_comp); +} + +stbi_uc *stbi_hdr_load_rgbe (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + unsigned char *result; + if (!f) return epuc("can't fopen", "Unable to open file"); + result = stbi_hdr_load_rgbe_file(f,x,y,comp,req_comp); + fclose(f); + return result; } #endif -float *stbi_hdr_load_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp) +float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { - start_mem(buffer, len); - return hdr_load(x,y,comp,req_comp); + stbi s; + start_mem(&s,buffer, len); + return hdr_load(&s,x,y,comp,req_comp); } +stbi_uc *stbi_hdr_load_rgbe_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s,buffer, len); + return hdr_load_rgbe(&s,x,y,comp,req_comp); +} #endif // STBI_NO_HDR /////////////////////// write image /////////////////////// @@ -3338,7 +3706,7 @@ static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, } } -static int outfile(char *filename, int rgb_dir, int vdir, int x, int y, int comp, void *data, int alpha, int pad, char *fmt, ...) +static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, void *data, int alpha, int pad, char *fmt, ...) { FILE *f = fopen(filename, "wb"); if (f) { @@ -3352,7 +3720,7 @@ static int outfile(char *filename, int rgb_dir, int vdir, int x, int y, int comp return f != NULL; } -int stbi_write_bmp(char *filename, int x, int y, int comp, void *data) +int stbi_write_bmp(char const *filename, int x, int y, int comp, void *data) { int pad = (-x*3) & 3; return outfile(filename,-1,-1,x,y,comp,data,0,pad, @@ -3361,7 +3729,7 @@ int stbi_write_bmp(char *filename, int x, int y, int comp, void *data) 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header } -int stbi_write_tga(char *filename, int x, int y, int comp, void *data) +int stbi_write_tga(char const *filename, int x, int y, int comp, void *data) { int has_alpha = !(comp & 1); return outfile(filename, -1,-1, x, y, comp, data, has_alpha, 0, @@ -3378,5 +3746,4 @@ int stbi_write_tga(char *filename, int x, int y, int comp, void *data) // add in my DDS loading support #ifndef STBI_NO_DDS #include "stbi_DDS_aug_c.h" -#endif - +#endif \ No newline at end of file diff --git a/src/SFML/Graphics/SOIL/stb_image_aug.h b/src/SFML/Graphics/SOIL/stb_image_aug.h index 279887a0..22ccc309 100644 --- a/src/SFML/Graphics/SOIL/stb_image_aug.h +++ b/src/SFML/Graphics/SOIL/stb_image_aug.h @@ -1,4 +1,4 @@ -/* stbi-1.08 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c +/* stbi-1.18 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c when you control the images you're loading QUICK NOTES: @@ -6,18 +6,30 @@ avoid problematic images and only need the trivial interface JPEG baseline (no JPEG progressive, no oddball channel decimations) - PNG non-interlaced + PNG 8-bit only BMP non-1bpp, non-RLE TGA (not sure what subset, if a subset) - PSD (composite view only, no extra channels) + PSD (composited view only, no extra channels) HDR (radiance rgbE format) writes BMP,TGA (define STBI_NO_WRITE to remove code) decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code) + supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD) TODO: stbi_info_* history: + 1.18 fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz 1.07 attempt to fix C++ warning/errors again 1.06 attempt to fix C++ warning/errors again @@ -53,8 +65,9 @@ on 'test' only check type, not whether we support this variant */ -#ifndef HEADER_STB_IMAGE_AUGMENTED -#define HEADER_STB_IMAGE_AUGMENTED + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H //// begin header file //////////////////////////////////////////////////// // @@ -144,15 +157,11 @@ // // stbi_is_hdr(char *filename); - #ifndef STBI_NO_STDIO #include #endif -#ifndef STBI_NO_HDR -#include // ldexp -#include // strcmp -#endif +#define STBI_VERSION 1 enum { @@ -176,27 +185,27 @@ extern "C" { // write a BMP/TGA file given tightly packed 'comp' channels (no padding, nor bmp-stride-padding) // (you must include the appropriate extension in the filename). // returns TRUE on success, FALSE if couldn't open file, error writing file -extern int stbi_write_bmp (char *filename, int x, int y, int comp, void *data); -extern int stbi_write_tga (char *filename, int x, int y, int comp, void *data); +extern int stbi_write_bmp (char const *filename, int x, int y, int comp, void *data); +extern int stbi_write_tga (char const *filename, int x, int y, int comp, void *data); #endif // PRIMARY API - works on images of any type // load image by filename, open file, or memory buffer #ifndef STBI_NO_STDIO -extern stbi_uc *stbi_load (char *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp); extern stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); #endif -extern stbi_uc *stbi_load_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); // for stbi_load_from_file, file pointer is left pointing immediately after image #ifndef STBI_NO_HDR #ifndef STBI_NO_STDIO -extern float *stbi_loadf (char *filename, int *x, int *y, int *comp, int req_comp); +extern float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp); extern float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); #endif -extern float *stbi_loadf_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); extern void stbi_hdr_to_ldr_gamma(float gamma); extern void stbi_hdr_to_ldr_scale(float scale); @@ -207,96 +216,96 @@ extern void stbi_ldr_to_hdr_scale(float scale); #endif // STBI_NO_HDR // get a VERY brief reason for failure -extern char *stbi_failure_reason (void); +// NOT THREADSAFE +extern char *stbi_failure_reason (void); // free the loaded image -- this is just free() extern void stbi_image_free (void *retval_from_stbi_load); // get image dimensions & components without fully decoding -extern int stbi_info_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp); -extern int stbi_is_hdr_from_memory(stbi_uc *buffer, int len); +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +extern int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); #ifndef STBI_NO_STDIO -extern int stbi_info (char *filename, int *x, int *y, int *comp); -extern int stbi_is_hdr (char *filename); +extern int stbi_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_is_hdr (char const *filename); extern int stbi_is_hdr_from_file(FILE *f); #endif // ZLIB client - used by PNG, available for other purposes -extern char *stbi_zlib_decode_malloc_guesssize(int initial_size, int *outlen); -extern char *stbi_zlib_decode_malloc(char *buffer, int len, int *outlen); -extern int stbi_zlib_decode_buffer(char *obuffer, int olen, char *ibuffer, int ilen); - -extern char *stbi_zlib_decode_noheader_malloc(char *buffer, int len, int *outlen); -extern int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, char *ibuffer, int ilen); +extern char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +extern char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); +extern char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); // TYPE-SPECIFIC ACCESS // is it a jpeg? -extern int stbi_jpeg_test_memory (stbi_uc *buffer, int len); -extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); -extern int stbi_jpeg_info_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp); +extern int stbi_jpeg_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); #ifndef STBI_NO_STDIO -extern stbi_uc *stbi_jpeg_load (char *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_jpeg_load (char const *filename, int *x, int *y, int *comp, int req_comp); extern int stbi_jpeg_test_file (FILE *f); extern stbi_uc *stbi_jpeg_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); -extern int stbi_jpeg_info (char *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); #endif -extern int stbi_jpeg_dc_only; // only decode DC component - // is it a png? -extern int stbi_png_test_memory (stbi_uc *buffer, int len); -extern stbi_uc *stbi_png_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); -extern int stbi_png_info_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp); +extern int stbi_png_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); #ifndef STBI_NO_STDIO -extern stbi_uc *stbi_png_load (char *filename, int *x, int *y, int *comp, int req_comp); -extern int stbi_png_info (char *filename, int *x, int *y, int *comp); +extern stbi_uc *stbi_png_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info (char const *filename, int *x, int *y, int *comp); extern int stbi_png_test_file (FILE *f); extern stbi_uc *stbi_png_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); #endif // is it a bmp? -extern int stbi_bmp_test_memory (stbi_uc *buffer, int len); +extern int stbi_bmp_test_memory (stbi_uc const *buffer, int len); -extern stbi_uc *stbi_bmp_load (char *filename, int *x, int *y, int *comp, int req_comp); -extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); #ifndef STBI_NO_STDIO extern int stbi_bmp_test_file (FILE *f); extern stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); #endif // is it a tga? -extern int stbi_tga_test_memory (stbi_uc *buffer, int len); +extern int stbi_tga_test_memory (stbi_uc const *buffer, int len); -extern stbi_uc *stbi_tga_load (char *filename, int *x, int *y, int *comp, int req_comp); -extern stbi_uc *stbi_tga_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); #ifndef STBI_NO_STDIO extern int stbi_tga_test_file (FILE *f); extern stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); #endif // is it a psd? -extern int stbi_psd_test_memory (stbi_uc *buffer, int len); +extern int stbi_psd_test_memory (stbi_uc const *buffer, int len); -extern stbi_uc *stbi_psd_load (char *filename, int *x, int *y, int *comp, int req_comp); -extern stbi_uc *stbi_psd_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_psd_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); #ifndef STBI_NO_STDIO extern int stbi_psd_test_file (FILE *f); extern stbi_uc *stbi_psd_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); #endif // is it an hdr? -extern int stbi_hdr_test_memory (stbi_uc *buffer, int len); +extern int stbi_hdr_test_memory (stbi_uc const *buffer, int len); -extern float * stbi_hdr_load (char *filename, int *x, int *y, int *comp, int req_comp); -extern float * stbi_hdr_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_hdr_load_rgbe (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); #ifndef STBI_NO_STDIO extern int stbi_hdr_test_file (FILE *f); extern float * stbi_hdr_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); @@ -305,8 +314,8 @@ extern float * stbi_hdr_load_from_file (FILE *f, int *x, int // define new loaders typedef struct { - int (*test_memory)(stbi_uc *buffer, int len); - stbi_uc * (*load_from_memory)(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); + int (*test_memory)(stbi_uc const *buffer, int len); + stbi_uc * (*load_from_memory)(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); #ifndef STBI_NO_STDIO int (*test_file)(FILE *f); stbi_uc * (*load_from_file)(FILE *f, int *x, int *y, int *comp, int req_comp); @@ -315,8 +324,28 @@ typedef struct // register a loader by filling out the above structure (you must defined ALL functions) // returns 1 if added or already added, 0 if not added (too many loaders) +// NOT THREADSAFE extern int stbi_register_loader(stbi_loader *loader); +// define faster low-level operations (typically SIMD support) +#if STBI_SIMD +typedef void (*stbi_idct_8x8)(uint8 *out, int out_stride, short data[64], unsigned short *dequantize); +// compute an integer IDCT on "input" +// input[x] = data[x] * dequantize[x] +// write results to 'out': 64 samples, each run of 8 spaced by 'out_stride' +// CLAMP results to 0..255 +typedef void (*stbi_YCbCr_to_RGB_run)(uint8 *output, uint8 const *y, uint8 const *cb, uint8 const *cr, int count, int step); +// compute a conversion from YCbCr to RGB +// 'count' pixels +// write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B +// y: Y input channel +// cb: Cb input channel; scale/biased to be 0..255 +// cr: Cr input channel; scale/biased to be 0..255 + +extern void stbi_install_idct(stbi_idct_8x8 func); +extern void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func); +#endif // STBI_SIMD + #ifdef __cplusplus } #endif @@ -324,6 +353,5 @@ extern int stbi_register_loader(stbi_loader *loader); // // //// end header file ///////////////////////////////////////////////////// - -#endif +#endif // STBI_INCLUDE_STB_IMAGE_H diff --git a/src/SFML/Graphics/SOIL/stbi_DDS_aug.h b/src/SFML/Graphics/SOIL/stbi_DDS_aug.h index c53e1a3b..7317d63b 100644 --- a/src/SFML/Graphics/SOIL/stbi_DDS_aug.h +++ b/src/SFML/Graphics/SOIL/stbi_DDS_aug.h @@ -6,10 +6,10 @@ #define HEADER_STB_IMAGE_DDS_AUGMENTATION // is it a DDS file? -extern int stbi_dds_test_memory (stbi_uc *buffer, int len); +extern int stbi_dds_test_memory (stbi_uc const *buffer, int len); extern stbi_uc *stbi_dds_load (char *filename, int *x, int *y, int *comp, int req_comp); -extern stbi_uc *stbi_dds_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_dds_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); #ifndef STBI_NO_STDIO extern int stbi_dds_test_file (FILE *f); extern stbi_uc *stbi_dds_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); diff --git a/src/SFML/Graphics/SOIL/stbi_DDS_aug_c.h b/src/SFML/Graphics/SOIL/stbi_DDS_aug_c.h index 024efd6e..683d1cf0 100644 --- a/src/SFML/Graphics/SOIL/stbi_DDS_aug_c.h +++ b/src/SFML/Graphics/SOIL/stbi_DDS_aug_c.h @@ -71,32 +71,34 @@ typedef struct { #define DDSCAPS2_CUBEMAP_NEGATIVEZ 0x00008000 #define DDSCAPS2_VOLUME 0x00200000 -static int dds_test(void) +static int dds_test(stbi *s) { // check the magic number - if (get8() != 'D') return 0; - if (get8() != 'D') return 0; - if (get8() != 'S') return 0; - if (get8() != ' ') return 0; + if (get8(s) != 'D') return 0; + if (get8(s) != 'D') return 0; + if (get8(s) != 'S') return 0; + if (get8(s) != ' ') return 0; // check header size - if (get32le() != 124) return 0; + if (get32le(s) != 124) return 0; return 1; } #ifndef STBI_NO_STDIO int stbi_dds_test_file (FILE *f) { + stbi s; int r,n = ftell(f); - start_file(f); - r = dds_test(); + start_file(&s,f); + r = dds_test(&s); fseek(f,n,SEEK_SET); return r; } #endif -int stbi_dds_test_memory (stbi_uc *buffer, int len) +int stbi_dds_test_memory (stbi_uc const *buffer, int len) { - start_mem(buffer, len); - return dds_test(); + stbi s; + start_mem(&s,buffer, len); + return dds_test(&s); } // helper functions @@ -263,7 +265,7 @@ void stbi_decode_DXT_color_block( } // done } -static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) +static stbi_uc *dds_load(stbi *s, int *x, int *y, int *comp, int req_comp) { // all variables go up front stbi_uc *dds_data = NULL; @@ -280,7 +282,7 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) { return NULL; } - getn( (stbi_uc*)(&header), 128 ); + getn( s, (stbi_uc*)(&header), 128 ); // and do some checking if( header.dwMagic != (('D' << 0) | ('D' << 8) | ('S' << 16) | (' ' << 24)) ) return NULL; if( header.dwSize != 124 ) return NULL; @@ -295,23 +297,23 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) if( (header.sPixelFormat.dwFlags & flags) == 0 ) return NULL; if( (header.sCaps.dwCaps1 & DDSCAPS_TEXTURE) == 0 ) return NULL; // get the image data - img_x = header.dwWidth; - img_y = header.dwHeight; - img_n = 4; + s->img_x = header.dwWidth; + s->img_y = header.dwHeight; + s->img_n = 4; is_compressed = (header.sPixelFormat.dwFlags & DDPF_FOURCC) / DDPF_FOURCC; has_alpha = (header.sPixelFormat.dwFlags & DDPF_ALPHAPIXELS) / DDPF_ALPHAPIXELS; has_mipmap = (header.sCaps.dwCaps1 & DDSCAPS_MIPMAP) && (header.dwMipMapCount > 1); cubemap_faces = (header.sCaps.dwCaps2 & DDSCAPS2_CUBEMAP) / DDSCAPS2_CUBEMAP; /* I need cubemaps to have square faces */ - cubemap_faces &= (img_x == img_y); + cubemap_faces &= (s->img_x == s->img_y); cubemap_faces *= 5; cubemap_faces += 1; - block_pitch = (img_x+3) >> 2; - num_blocks = block_pitch * ((img_y+3) >> 2); + block_pitch = (s->img_x+3) >> 2; + num_blocks = block_pitch * ((s->img_y+3) >> 2); /* let the user know what's going on */ - *x = img_x; - *y = img_y; - *comp = img_n; + *x = s->img_x; + *y = s->img_y; + *comp = s->img_n; /* is this uncompressed? */ if( is_compressed ) { @@ -323,7 +325,7 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) those non-compliant writers leave dwPitchOrLinearSize == 0 */ // passed all the tests, get the RAM for decoding - sz = (img_x)*(img_y)*4*cubemap_faces; + sz = (s->img_x)*(s->img_y)*4*cubemap_faces; dds_data = (unsigned char*)malloc( sz ); /* do this once for each face */ for( cf = 0; cf < cubemap_faces; ++ cf ) @@ -339,36 +341,36 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) if( DXT_family == 1 ) { // DXT1 - getn( compressed, 8 ); + getn( s, compressed, 8 ); stbi_decode_DXT1_block( block, compressed ); } else if( DXT_family < 4 ) { // DXT2/3 - getn( compressed, 8 ); + getn( s, compressed, 8 ); stbi_decode_DXT23_alpha_block ( block, compressed ); - getn( compressed, 8 ); + getn( s, compressed, 8 ); stbi_decode_DXT_color_block ( block, compressed ); } else { // DXT4/5 - getn( compressed, 8 ); + getn( s, compressed, 8 ); stbi_decode_DXT45_alpha_block ( block, compressed ); - getn( compressed, 8 ); + getn( s, compressed, 8 ); stbi_decode_DXT_color_block ( block, compressed ); } // is this a partial block? - if( ref_x + 4 > img_x ) + if( ref_x + 4 > s->img_x ) { - bw = img_x - ref_x; + bw = s->img_x - ref_x; } - if( ref_y + 4 > img_y ) + if( ref_y + 4 > s->img_y ) { - bh = img_y - ref_y; + bh = s->img_y - ref_y; } // now drop our decompressed data into the buffer for( by = 0; by < bh; ++by ) { - int idx = 4*((ref_y+by+cf*img_x)*img_x + ref_x); + int idx = 4*((ref_y+by+cf*s->img_x)*s->img_x + ref_x); for( bx = 0; bx < bw*4; ++bx ) { @@ -387,8 +389,8 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) } for( i = 1; i < header.dwMipMapCount; ++i ) { - int mx = img_x >> (i + 2); - int my = img_y >> (i + 2); + int mx = s->img_x >> (i + 2); + int my = s->img_y >> (i + 2); if( mx < 1 ) { mx = 1; @@ -397,7 +399,7 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) { my = 1; } - skip( mx*my*block_size ); + skip( s, mx*my*block_size ); } } }/* per cubemap face */ @@ -405,27 +407,27 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) { /* uncompressed */ DXT_family = 0; - img_n = 3; + s->img_n = 3; if( has_alpha ) { - img_n = 4; + s->img_n = 4; } - *comp = img_n; - sz = img_x*img_y*img_n*cubemap_faces; + *comp = s->img_n; + sz = s->img_x*s->img_y*s->img_n*cubemap_faces; dds_data = (unsigned char*)malloc( sz ); /* do this once for each face */ for( cf = 0; cf < cubemap_faces; ++ cf ) { /* read the main image for this face */ - getn( &dds_data[cf*img_x*img_y*img_n], img_x*img_y*img_n ); + getn( s, &dds_data[cf*s->img_x*s->img_y*s->img_n], s->img_x*s->img_y*s->img_n ); /* done reading and decoding the main image... skip MIPmaps if present */ if( has_mipmap ) { for( i = 1; i < header.dwMipMapCount; ++i ) { - int mx = img_x >> i; - int my = img_y >> i; + int mx = s->img_x >> i; + int my = s->img_y >> i; if( mx < 1 ) { mx = 1; @@ -434,12 +436,12 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) { my = 1; } - skip( mx*my*img_n ); + skip( s, mx*my*s->img_n ); } } } /* data was BGR, I need it RGB */ - for( i = 0; i < sz; i += img_n ) + for( i = 0; i < sz; i += s->img_n ) { unsigned char temp = dds_data[i]; dds_data[i] = dds_data[i+2]; @@ -449,12 +451,12 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) /* finished decompressing into RGBA, adjust the y size if we have a cubemap note: sz is already up to date */ - img_y *= cubemap_faces; - *y = img_y; + s->img_y *= cubemap_faces; + *y = s->img_y; // did the user want something else, or // see if all the alpha values are 255 (i.e. no transparency) has_alpha = 0; - if( img_n == 4) + if( s->img_n == 4) { for( i = 3; (i < sz) && (has_alpha == 0); i += 4 ) { @@ -464,17 +466,17 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) if( (req_comp <= 4) && (req_comp >= 1) ) { // user has some requirements, meet them - if( req_comp != img_n ) + if( req_comp != s->img_n ) { - dds_data = convert_format( dds_data, img_n, req_comp ); - *comp = img_n; + dds_data = convert_format( dds_data, s->img_n, req_comp, s->img_x, s->img_y ); + *comp = s->img_n; } } else { // user had no requirements, only drop to RGB is no alpha - if( (has_alpha == 0) && (img_n == 4) ) + if( (has_alpha == 0) && (s->img_n == 4) ) { - dds_data = convert_format( dds_data, 4, 3 ); + dds_data = convert_format( dds_data, 4, 3, s->img_x, s->img_y ); *comp = 3; } } @@ -483,25 +485,27 @@ static stbi_uc *dds_load(int *x, int *y, int *comp, int req_comp) } #ifndef STBI_NO_STDIO +stbi_uc *stbi_dds_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s,f); + return dds_load(&s,x,y,comp,req_comp); +} + stbi_uc *stbi_dds_load (char *filename, int *x, int *y, int *comp, int req_comp) { stbi_uc *data; FILE *f = fopen(filename, "rb"); if (!f) return NULL; - data = dds_load(x,y,comp,req_comp); + data = stbi_dds_load_from_file(f,x,y,comp,req_comp); fclose(f); return data; } - -stbi_uc *stbi_dds_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) -{ - start_file(f); - return dds_load(x,y,comp,req_comp); -} #endif -stbi_uc *stbi_dds_load_from_memory (stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp) +stbi_uc *stbi_dds_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { - start_mem(buffer, len); - return dds_load(x,y,comp,req_comp); + stbi s; + start_mem(&s,buffer, len); + return dds_load(&s,x,y,comp,req_comp); }