[ipxe-devel] [PATCH] [deflate] Add support for GZIP decompression
Petr Borsodi
petr.borsodi at gmail.com
Fri Feb 1 17:12:58 UTC 2019
GZIP file format (RFC 1952) uses DEFLATE algorithm with its
own header and footer. Implementation extends existing RAW
and ZLIB code. Also, a simple test was added.
---
src/crypto/deflate.c | 217 +++++++++++++++++++++++++++++++++++--
src/include/ipxe/deflate.h | 33 ++++++
src/tests/deflate_test.c | 16 +++
3 files changed, 259 insertions(+), 7 deletions(-)
diff --git a/src/crypto/deflate.c b/src/crypto/deflate.c
index e1c87d5f..8ced9f23 100644
--- a/src/crypto/deflate.c
+++ b/src/crypto/deflate.c
@@ -30,6 +30,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <ctype.h>
#include <ipxe/uaccess.h>
#include <ipxe/deflate.h>
+#include <ipxe/crc32.h>
/** @file
*
@@ -385,6 +386,40 @@ static int deflate_extract ( struct deflate *deflate, struct deflate_chunk *in,
return data;
}
+/**
+ * Attempt to get and extract a fixed number of bytes from input stream
+ *
+ * @v deflate Decompressor
+ * @v in Compressed input data
+ * @v len Number of bytes to extract
+ * @ret data Pointer to extracted data (or NULL if not available)
+ *
+ * No accumulated bits are allowed
+ */
+static void * deflate_extract_buffer ( struct deflate *deflate, struct deflate_chunk *in,
+ unsigned int len ) {
+ size_t offset, remaining;
+
+ /* Sanity check */
+ assert ( deflate->bits == 0 );
+
+ /* Return immediately if we are attempting to extract zero bytes */
+ if ( len == 0 )
+ return NULL;
+
+ /* Attempt to get len bytes */
+ offset = in->offset;
+ remaining = ( in->len - offset );
+ if ( len > remaining )
+ return NULL;
+
+ in->offset += len;
+
+ DBGCP ( deflate, "DEFLATE %p extracted %d bytes\n", deflate, len );
+
+ return user_to_virt ( in->data, offset );
+}
+
/**
* Attempt to decode a Huffman-coded symbol from input stream
*
@@ -453,9 +488,9 @@ static void deflate_discard_to_byte ( struct deflate *deflate ) {
* @v offset Starting offset within source data
* @v len Length to copy
*/
-static void deflate_copy ( struct deflate_chunk *out,
+static void deflate_copy ( struct deflate *deflate, struct deflate_chunk *out,
userptr_t start, size_t offset, size_t len ) {
- size_t out_offset = out->offset;
+ size_t in_offset = offset, out_offset = out->offset;
size_t copy_len;
/* Copy data one byte at a time, to allow for overlap */
@@ -465,10 +500,15 @@ static void deflate_copy ( struct deflate_chunk *out,
copy_len = len;
while ( copy_len-- ) {
memcpy_user ( out->data, out_offset++,
- start, offset++, 1 );
+ start, in_offset++, 1 );
}
}
out->offset += len;
+ deflate->total_length += len;
+
+ if ( deflate->format == DEFLATE_GZIP ) {
+ deflate->checksum = crc32_le( deflate->checksum, user_to_virt ( start, offset ), len );
+ }
}
/**
@@ -501,6 +541,9 @@ int deflate_inflate ( struct deflate *deflate,
} else switch ( deflate->format ) {
case DEFLATE_RAW: goto block_header;
case DEFLATE_ZLIB: goto zlib_header;
+ case DEFLATE_GZIP:
+ deflate->checksum = 0xffffffff;
+ goto gzip_header;
default: assert ( 0 );
}
@@ -532,6 +575,123 @@ int deflate_inflate ( struct deflate *deflate,
goto block_header;
}
+ gzip_header: {
+ uint8_t * header;
+
+ /* Extract header */
+ header = deflate_extract_buffer( deflate, in, GZIP_HEADER_BYTES );
+ if ( header == NULL ) {
+ deflate->resume = &&gzip_header;
+ return 0;
+ }
+
+ if ( header [0] != 0x1f || header [1] != 0x8b ) {
+ DBGC ( deflate, "DEFLATE %p invalid GZIP format\n", deflate );
+ return -EINVAL;
+ }
+
+ if ( header [2] != GZIP_HEADER_CM_DEFLATE ) {
+ DBGC ( deflate, "DEFLATE %p unsupported GZIP "
+ "compression method %d\n", deflate, header [2] );
+ return -ENOTSUP;
+ }
+
+ /* Save flags */
+ deflate->header = header [3];
+
+ /* Process GZIP members */
+ goto gzip_fextra_xlen;
+ }
+
+ gzip_fextra_xlen: {
+ if ( deflate->header & GZIP_HEADER_FLG_FEXTRA ) {
+ uint8_t * xlen;
+
+ /* Extract XLEN field */
+ xlen = deflate_extract_buffer( deflate, in, GZIP_HEADER_XLEN_BYTES );
+ if ( xlen == NULL ) {
+ deflate->resume = &&gzip_fextra_xlen;
+ return 0;
+ }
+
+ deflate->remaining = xlen [0] | ( xlen [1] << 8 );
+ } else {
+ /* Process FNAME */
+ goto gzip_fname;
+ }
+ }
+
+ gzip_fextra_data: {
+ size_t in_remaining;
+ size_t len;
+
+ /* Calculate available amount of FEXTRA data */
+ in_remaining = ( in->len - in->offset );
+ len = deflate->remaining;
+ if ( len > in_remaining )
+ len = in_remaining;
+
+ /* Discard data from input buffer */
+ in->offset += len;
+ deflate->remaining -= len;
+
+ /* Finish processing if we are blocked */
+ if ( deflate->remaining ) {
+ deflate->resume = &&gzip_fextra_data;
+ return 0;
+ }
+
+ /* Otherwise, finish FEXTRA member */
+ }
+
+
+ gzip_fname: {
+ if ( deflate->header & GZIP_HEADER_FLG_FNAME ) {
+ char * name;
+
+ /* Extract FNAME member */
+ do {
+ /* Extract one char of FNAME */
+ name = deflate_extract_buffer( deflate, in, 1 );
+ if ( name == NULL ) {
+ deflate->resume = &&gzip_fname;
+ return 0;
+ }
+ } while ( * name != '\0' );
+ }
+ }
+
+ gzip_fcomment: {
+ if ( deflate->header & GZIP_HEADER_FLG_FCOMMENT ) {
+ char * comment;
+
+ /* Extract FCOMMENT member */
+ do {
+ /* Extract char of FNAME */
+ comment = deflate_extract_buffer( deflate, in, 1 );
+ if ( comment == NULL ) {
+ deflate->resume = &&gzip_fcomment;
+ return 0;
+ }
+ } while ( * comment != '\0' );
+ }
+ }
+
+ gzip_fhcrc: {
+ if ( deflate->header & GZIP_HEADER_FLG_FHCRC ) {
+ uint8_t * fhcrc;
+
+ /* Extract FHCRC member */
+ fhcrc = deflate_extract_buffer( deflate, in, GZIP_HEADER_FHCRC_BYTES );
+ if ( fhcrc == NULL ) {
+ deflate->resume = &&gzip_fhcrc;
+ return 0;
+ }
+ }
+
+ /* Process first block header */
+ }
+
block_header: {
int header;
int bfinal;
@@ -617,7 +777,7 @@ int deflate_inflate ( struct deflate *deflate,
len = in_remaining;
/* Copy data to output buffer */
- deflate_copy ( out, in->data, in->offset, len );
+ deflate_copy ( deflate, out, in->data, in->offset, len );
/* Consume data from input buffer */
in->offset += len;
@@ -844,7 +1004,7 @@ int deflate_inflate ( struct deflate *deflate,
DBGCP ( deflate, "DEFLATE %p literal %#02x "
"('%c')\n", deflate, byte,
( isprint ( byte ) ? byte : '.' ) );
- deflate_copy ( out, virt_to_user ( &byte ), 0,
+ deflate_copy ( deflate, out, virt_to_user ( &byte ), 0,
sizeof ( byte ) );
} else if ( code == DEFLATE_LITLEN_END ) {
@@ -934,8 +1094,8 @@ int deflate_inflate ( struct deflate *deflate,
}
/* Copy data, allowing for overlap */
- deflate_copy ( out, out->data, ( out->offset - dup_distance ),
- dup_len );
+ deflate_copy ( deflate, out, out->data,
+ ( out->offset - dup_distance ), dup_len );
/* Process next literal/length symbol */
goto lzhuf_litlen;
@@ -953,6 +1113,7 @@ int deflate_inflate ( struct deflate *deflate,
switch ( deflate->format ) {
case DEFLATE_RAW: goto finished;
case DEFLATE_ZLIB: goto zlib_footer;
+ case DEFLATE_GZIP: goto gzip_footer;
default: assert ( 0 );
}
}
@@ -982,6 +1143,48 @@ int deflate_inflate ( struct deflate *deflate,
goto finished;
}
+ gzip_footer: {
+
+ /* Discard any bits up to the next byte boundary */
+ deflate_discard_to_byte ( deflate );
+
+ /* Return any remaining bytes to the input */
+ in->offset -= deflate->bits / 8;
+
+ deflate->bits = 0;
+ deflate->checksum ^= 0xffffffff;
+ }
+
+ gzip_crc32_isize: {
+ uint8_t * footer;
+ uint32_t crc32, isize;
+
+ /* Extract footer */
+ footer = deflate_extract_buffer( deflate, in,
+ GZIP_FOOTER_CRC32_BYTES + GZIP_FOOTER_ISIZE_BYTES );
+ if ( footer == NULL ) {
+ deflate->resume = &&gzip_crc32_isize;
+ return 0;
+ }
+
+ crc32 = footer [0] | ( footer [1] << 8 ) | ( footer [2] << 16 ) | ( footer [3] << 24 );
+ if ( deflate->checksum != crc32 ) {
+ DBGCP ( deflate, "DEFLATE %p invalid GZIP CRC 0x%08x/0x%08x\n",
+ deflate, deflate->checksum, crc32 );
+ return -EINVAL;
+ }
+
+ isize = footer [4] | ( footer [5] << 8 ) | ( footer [6] << 16 ) | ( footer [7] << 24 );
+ if ( deflate->total_length != isize ) {
+ DBGCP ( deflate, "DEFLATE %p invalid GZIP ISIZE 0x%08x/0x%08x\n",
+ deflate, deflate->checksum, crc32 );
+ return -EINVAL;
+ }
+
+ /* Finish processing */
+ goto finished;
+ }
+
finished: {
/* Mark as finished and terminate */
DBGCP ( deflate, "DEFLATE %p finished\n", deflate );
diff --git a/src/include/ipxe/deflate.h b/src/include/ipxe/deflate.h
index b751aa9a..2ac7567f 100644
--- a/src/include/ipxe/deflate.h
+++ b/src/include/ipxe/deflate.h
@@ -19,6 +19,8 @@ enum deflate_format {
DEFLATE_RAW,
/** ZLIB header and footer */
DEFLATE_ZLIB,
+ /** GZIP header and footer */
+ DEFLATE_GZIP,
};
/** Block header length (in bits) */
@@ -111,6 +113,31 @@ enum deflate_format {
/** ZLIB ADLER32 length (in bits) */
#define ZLIB_ADLER32_BITS 32
+/** GZIP header length (in bytes) */
+#define GZIP_HEADER_BYTES 10
+
+/** GZIP header compression method: DEFLATE */
+#define GZIP_HEADER_CM_DEFLATE 8
+
+/** GZIP header flags */
+#define GZIP_HEADER_FLG_FTEXT 0x01
+#define GZIP_HEADER_FLG_FHCRC 0x02
+#define GZIP_HEADER_FLG_FEXTRA 0x04
+#define GZIP_HEADER_FLG_FNAME 0x08
+#define GZIP_HEADER_FLG_FCOMMENT 0x10
+
+/** GZIP header XLEN bytes */
+#define GZIP_HEADER_XLEN_BYTES 2
+
+/** GZIP header FHCRC bytes */
+#define GZIP_HEADER_FHCRC_BYTES 2
+
+/** GZIP footer CRC32 bytes */
+#define GZIP_FOOTER_CRC32_BYTES 4
+
+/** GZIP footer ISIZE bytes */
+#define GZIP_FOOTER_ISIZE_BYTES 4
+
/** A Huffman-coded set of symbols of a given length */
struct deflate_huf_symbols {
/** Length of Huffman-coded symbols */
@@ -235,6 +262,12 @@ struct deflate {
uint8_t lengths[ ( ( DEFLATE_LITLEN_MAX_CODE + 1 ) +
( DEFLATE_DISTANCE_MAX_CODE + 1 ) +
1 /* round up */ ) / 2 ];
+
+ /** ZLIB/GZIP checksum */
+ uint32_t checksum;
+
+ /** Total inflated length */
+ unsigned int total_length;
};
/** A chunk of data */
diff --git a/src/tests/deflate_test.c b/src/tests/deflate_test.c
index 20ff5b9a..711f866d 100644
--- a/src/tests/deflate_test.c
+++ b/src/tests/deflate_test.c
@@ -133,6 +133,21 @@ DEFLATE ( zlib, DEFLATE_ZLIB,
0x65, 0x63, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f,
0x6e ) );
+/* "GZIP file format specification version 4.3" */
+DEFLATE ( gzip, DEFLATE_GZIP,
+ DATA ( 0x1f, 0x8b, 0x08, 0x08, 0x72, 0x4b, 0x54, 0x5c, 0x02, 0x0b,
+ 0x67, 0x7a, 0x69, 0x70, 0x2d, 0x74, 0x65, 0x73, 0x74, 0x00,
+ 0x73, 0x8f, 0xf2, 0x0c, 0x50, 0x48, 0xcb, 0xcc, 0x49, 0x55,
+ 0x48, 0xcb, 0x2f, 0xca, 0x4d, 0x2c, 0x51, 0x28, 0x2e, 0x48,
+ 0x4d, 0xce, 0x4c, 0xcb, 0x4c, 0x4e, 0x2c, 0xc9, 0xcc, 0xcf,
+ 0x53, 0x28, 0x4b, 0x2d, 0x2a, 0x06, 0xd1, 0x26, 0x7a, 0xc6,
+ 0x00, 0xde, 0x2b, 0xcf, 0xca, 0x2a, 0x00, 0x00, 0x00 ),
+ DATA ( 0x47, 0x5a, 0x49, 0x50, 0x20, 0x66, 0x69, 0x6c, 0x65, 0x20,
+ 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x73, 0x70, 0x65,
+ 0x63, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e,
+ 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x34,
+ 0x2e, 0x33 ) );
+
/* "ZLIB Compressed Data Format Specification" fragment list */
static struct deflate_test_fragments zlib_fragments[] = {
{ { -1UL, } },
@@ -231,6 +246,7 @@ static void deflate_test_exec ( void ) {
deflate_ok ( deflate, &hello_hello_world, NULL );
deflate_ok ( deflate, &rfc_sentence, NULL );
deflate_ok ( deflate, &zlib, NULL );
+ deflate_ok ( deflate, &gzip, NULL );
/* Test fragmentation */
for ( i = 0 ; i < ( sizeof ( zlib_fragments ) /
--
2.20.1.windows.1
More information about the ipxe-devel
mailing list