[ipxe-devel] [PATCH] [deflate] Add support for GZIP decompression

Petr Borsodi petr.borsodi at gmail.com
Fri Feb 1 17:12:58 UTC 2019


GZIP file format (RFC 1952) uses DEFLATE algorithm with its
own header and footer. Implementation extends existing RAW
and ZLIB code. Also, a simple test was added.
---
 src/crypto/deflate.c       | 217 +++++++++++++++++++++++++++++++++++--
 src/include/ipxe/deflate.h |  33 ++++++
 src/tests/deflate_test.c   |  16 +++
 3 files changed, 259 insertions(+), 7 deletions(-)

diff --git a/src/crypto/deflate.c b/src/crypto/deflate.c
index e1c87d5f..8ced9f23 100644
--- a/src/crypto/deflate.c
+++ b/src/crypto/deflate.c
@@ -30,6 +30,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
 #include <ctype.h>
 #include <ipxe/uaccess.h>
 #include <ipxe/deflate.h>
+#include <ipxe/crc32.h>
 
 /** @file
  *
@@ -385,6 +386,40 @@ static int deflate_extract ( struct deflate *deflate, struct deflate_chunk *in,
 	return data;
 }
 
+/**
+ * Attempt to get and extract a fixed number of bytes from input stream
+ *
+ * @v deflate		Decompressor
+ * @v in		Compressed input data
+ * @v len		Number of bytes to extract
+ * @ret data		Pointer to extracted data (or NULL if not available)
+ *
+ * No accumulated bits are allowed
+ */
+static void * deflate_extract_buffer ( struct deflate *deflate, struct deflate_chunk *in,
+				       unsigned int len ) {
+	size_t offset, remaining;
+
+	/* Sanity check */
+	assert ( deflate->bits == 0 );
+
+	/* Return immediately if we are attempting to extract zero bytes */
+	if ( len == 0 )
+		return NULL;
+
+	/* Attempt to get len bytes */
+	offset = in->offset;
+	remaining = ( in->len - offset );
+	if ( len > remaining )
+		return NULL;
+
+	in->offset += len;
+
+	DBGCP ( deflate, "DEFLATE %p extracted %d bytes\n", deflate, len );
+
+	return user_to_virt ( in->data, offset );
+}
+
 /**
  * Attempt to decode a Huffman-coded symbol from input stream
  *
@@ -453,9 +488,9 @@ static void deflate_discard_to_byte ( struct deflate *deflate ) {
  * @v offset		Starting offset within source data
  * @v len		Length to copy
  */
-static void deflate_copy ( struct deflate_chunk *out,
+static void deflate_copy ( struct deflate *deflate, struct deflate_chunk *out,
 			   userptr_t start, size_t offset, size_t len ) {
-	size_t out_offset = out->offset;
+	size_t in_offset = offset, out_offset = out->offset;
 	size_t copy_len;
 
 	/* Copy data one byte at a time, to allow for overlap */
@@ -465,10 +500,15 @@ static void deflate_copy ( struct deflate_chunk *out,
 			copy_len = len;
 		while ( copy_len-- ) {
 			memcpy_user ( out->data, out_offset++,
-				      start, offset++, 1 );
+				      start, in_offset++, 1 );
 		}
 	}
 	out->offset += len;
+	deflate->total_length += len;
+
+	if ( deflate->format == DEFLATE_GZIP ) {
+		deflate->checksum = crc32_le( deflate->checksum, user_to_virt ( start, offset ), len );
+	}
 }
 
 /**
@@ -501,6 +541,9 @@ int deflate_inflate ( struct deflate *deflate,
 	} else switch ( deflate->format ) {
 		case DEFLATE_RAW:	goto block_header;
 		case DEFLATE_ZLIB:	goto zlib_header;
+		case DEFLATE_GZIP:
+			deflate->checksum = 0xffffffff;
+			goto gzip_header;
 		default:		assert ( 0 );
 	}
 
@@ -532,6 +575,123 @@ int deflate_inflate ( struct deflate *deflate,
 		goto block_header;
 	}
 
+ gzip_header: {
+		uint8_t * header;
+
+		/* Extract header */
+		header = deflate_extract_buffer( deflate, in, GZIP_HEADER_BYTES );
+		if ( header == NULL ) {
+			deflate->resume = &&gzip_header;
+			return 0;
+		}
+
+		if ( header [0] != 0x1f || header [1] != 0x8b ) {
+			DBGC ( deflate, "DEFLATE %p invalid GZIP format\n", deflate );
+			return -EINVAL;
+		}
+
+		if ( header [2] != GZIP_HEADER_CM_DEFLATE ) {
+			DBGC ( deflate, "DEFLATE %p unsupported GZIP "
+			       "compression method %d\n", deflate, header [2] );
+			return -ENOTSUP;
+		}
+
+		/* Save flags */
+		deflate->header = header [3];
+
+		/* Process GZIP members */
+		goto gzip_fextra_xlen;
+	}
+
+ gzip_fextra_xlen: {
+		if ( deflate->header & GZIP_HEADER_FLG_FEXTRA ) {
+			uint8_t * xlen;
+
+			/* Extract XLEN field */
+			xlen = deflate_extract_buffer( deflate, in, GZIP_HEADER_XLEN_BYTES );
+			if ( xlen == NULL ) {
+				deflate->resume = &&gzip_fextra_xlen;
+				return 0;
+			}
+
+			deflate->remaining = xlen [0] | ( xlen [1] << 8 );
+		} else {
+			/* Process FNAME */
+			goto gzip_fname;
+		}
+	}
+
+ gzip_fextra_data: {
+		size_t in_remaining;
+		size_t len;
+
+		/* Calculate available amount of FEXTRA data */
+		in_remaining = ( in->len - in->offset );
+		len = deflate->remaining;
+		if ( len > in_remaining )
+			len = in_remaining;
+
+		/* Discard data from input buffer */
+		in->offset += len;
+		deflate->remaining -= len;
+
+		/* Finish processing if we are blocked */
+		if ( deflate->remaining ) {
+			deflate->resume = &&gzip_fextra_data;
+			return 0;
+		}
+
+		/* Otherwise, finish FEXTRA member */
+	}
+
+
+ gzip_fname: {
+		if ( deflate->header & GZIP_HEADER_FLG_FNAME ) {
+			char * name;
+
+			/* Extract FNAME member */
+			do {
+				/* Extract one char of FNAME */
+				name = deflate_extract_buffer( deflate, in, 1 );
+				if ( name == NULL ) {
+					deflate->resume = &&gzip_fname;
+					return 0;
+				}
+			} while ( * name != '\0' );
+		}
+	}
+
+ gzip_fcomment: {
+		if ( deflate->header & GZIP_HEADER_FLG_FCOMMENT ) {
+			char * comment;
+
+			/* Extract FCOMMENT member */
+			do {
+				/* Extract char of FNAME */
+				comment = deflate_extract_buffer( deflate, in, 1 );
+				if ( comment == NULL ) {
+					deflate->resume = &&gzip_fcomment;
+					return 0;
+				}
+			} while ( * comment != '\0' );
+		}
+	}
+
+ gzip_fhcrc: {
+		if ( deflate->header & GZIP_HEADER_FLG_FHCRC ) {
+			uint8_t * fhcrc;
+
+			/* Extract FHCRC member */
+			fhcrc = deflate_extract_buffer( deflate, in, GZIP_HEADER_FHCRC_BYTES );
+			if ( fhcrc == NULL ) {
+				deflate->resume = &&gzip_fhcrc;
+				return 0;
+			}
+		}
+
+		/* Process first block header */
+	}
+
  block_header: {
 		int header;
 		int bfinal;
@@ -617,7 +777,7 @@ int deflate_inflate ( struct deflate *deflate,
 			len = in_remaining;
 
 		/* Copy data to output buffer */
-		deflate_copy ( out, in->data, in->offset, len );
+		deflate_copy ( deflate, out, in->data, in->offset, len );
 
 		/* Consume data from input buffer */
 		in->offset += len;
@@ -844,7 +1004,7 @@ int deflate_inflate ( struct deflate *deflate,
 				DBGCP ( deflate, "DEFLATE %p literal %#02x "
 					"('%c')\n", deflate, byte,
 					( isprint ( byte ) ? byte : '.' ) );
-				deflate_copy ( out, virt_to_user ( &byte ), 0,
+				deflate_copy ( deflate, out, virt_to_user ( &byte ), 0,
 					       sizeof ( byte ) );
 
 			} else if ( code == DEFLATE_LITLEN_END ) {
@@ -934,8 +1094,8 @@ int deflate_inflate ( struct deflate *deflate,
 		}
 
 		/* Copy data, allowing for overlap */
-		deflate_copy ( out, out->data, ( out->offset - dup_distance ),
-			       dup_len );
+		deflate_copy ( deflate, out, out->data,
+			       ( out->offset - dup_distance ), dup_len );
 
 		/* Process next literal/length symbol */
 		goto lzhuf_litlen;
@@ -953,6 +1113,7 @@ int deflate_inflate ( struct deflate *deflate,
 		switch ( deflate->format ) {
 		case DEFLATE_RAW:	goto finished;
 		case DEFLATE_ZLIB:	goto zlib_footer;
+		case DEFLATE_GZIP:	goto gzip_footer;
 		default:		assert ( 0 );
 		}
 	}
@@ -982,6 +1143,48 @@ int deflate_inflate ( struct deflate *deflate,
 		goto finished;
 	}
 
+ gzip_footer: {
+
+		/* Discard any bits up to the next byte boundary */
+		deflate_discard_to_byte ( deflate );
+
+		/* Return any remaining bytes to the input */
+		in->offset -= deflate->bits / 8;
+
+		deflate->bits = 0;
+		deflate->checksum ^= 0xffffffff;
+	}
+
+ gzip_crc32_isize: {
+		uint8_t * footer;
+		uint32_t crc32, isize;
+
+		/* Extract footer */
+		footer = deflate_extract_buffer( deflate, in,
+						 GZIP_FOOTER_CRC32_BYTES + GZIP_FOOTER_ISIZE_BYTES );
+		if ( footer == NULL ) {
+			deflate->resume = &&gzip_crc32_isize;
+			return 0;
+		}
+
+		crc32 = footer [0] | ( footer [1] << 8 ) | ( footer [2] << 16 ) | ( footer [3] << 24 );
+		if ( deflate->checksum != crc32 ) {
+			DBGCP ( deflate, "DEFLATE %p invalid GZIP CRC 0x%08x/0x%08x\n",
+				deflate, deflate->checksum, crc32 );
+			return -EINVAL;
+		}
+
+		isize = footer [4] | ( footer [5] << 8 ) | ( footer [6] << 16 ) | ( footer [7] << 24 );
+		if ( deflate->total_length != isize ) {
+			DBGCP ( deflate, "DEFLATE %p invalid GZIP ISIZE 0x%08x/0x%08x\n",
+				deflate, deflate->checksum, crc32 );
+			return -EINVAL;
+		}
+
+		/* Finish processing */
+		goto finished;
+	}
+
  finished: {
 		/* Mark as finished and terminate */
 		DBGCP ( deflate, "DEFLATE %p finished\n", deflate );
diff --git a/src/include/ipxe/deflate.h b/src/include/ipxe/deflate.h
index b751aa9a..2ac7567f 100644
--- a/src/include/ipxe/deflate.h
+++ b/src/include/ipxe/deflate.h
@@ -19,6 +19,8 @@ enum deflate_format {
 	DEFLATE_RAW,
 	/** ZLIB header and footer */
 	DEFLATE_ZLIB,
+	/** GZIP header and footer */
+	DEFLATE_GZIP,
 };
 
 /** Block header length (in bits) */
@@ -111,6 +113,31 @@ enum deflate_format {
 /** ZLIB ADLER32 length (in bits) */
 #define ZLIB_ADLER32_BITS 32
 
+/** GZIP header length (in bytes) */
+#define GZIP_HEADER_BYTES 10
+
+/** GZIP header compression method: DEFLATE */
+#define GZIP_HEADER_CM_DEFLATE 8
+
+/** GZIP header flags */
+#define GZIP_HEADER_FLG_FTEXT    0x01
+#define GZIP_HEADER_FLG_FHCRC    0x02
+#define GZIP_HEADER_FLG_FEXTRA   0x04
+#define GZIP_HEADER_FLG_FNAME    0x08
+#define GZIP_HEADER_FLG_FCOMMENT 0x10
+
+/** GZIP header XLEN bytes */
+#define GZIP_HEADER_XLEN_BYTES 2
+
+/** GZIP header FHCRC bytes */
+#define GZIP_HEADER_FHCRC_BYTES 2
+
+/** GZIP footer CRC32 bytes */
+#define GZIP_FOOTER_CRC32_BYTES 4
+
+/** GZIP footer ISIZE bytes */
+#define GZIP_FOOTER_ISIZE_BYTES 4
+
 /** A Huffman-coded set of symbols of a given length */
 struct deflate_huf_symbols {
 	/** Length of Huffman-coded symbols */
@@ -235,6 +262,12 @@ struct deflate {
 	uint8_t lengths[ ( ( DEFLATE_LITLEN_MAX_CODE + 1 ) +
 			   ( DEFLATE_DISTANCE_MAX_CODE + 1 ) +
 			   1 /* round up */ ) / 2 ];
+
+	/** ZLIB/GZIP checksum */
+	uint32_t checksum;
+
+	/** Total inflated length */
+	unsigned int total_length;
 };
 
 /** A chunk of data */
diff --git a/src/tests/deflate_test.c b/src/tests/deflate_test.c
index 20ff5b9a..711f866d 100644
--- a/src/tests/deflate_test.c
+++ b/src/tests/deflate_test.c
@@ -133,6 +133,21 @@ DEFLATE ( zlib, DEFLATE_ZLIB,
 		 0x65, 0x63, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f,
 		 0x6e ) );
 
+/* "GZIP file format specification version 4.3" */
+DEFLATE ( gzip, DEFLATE_GZIP,
+	  DATA ( 0x1f, 0x8b, 0x08, 0x08, 0x72, 0x4b, 0x54, 0x5c, 0x02, 0x0b,
+		 0x67, 0x7a, 0x69, 0x70, 0x2d, 0x74, 0x65, 0x73, 0x74, 0x00,
+		 0x73, 0x8f, 0xf2, 0x0c, 0x50, 0x48, 0xcb, 0xcc, 0x49, 0x55,
+		 0x48, 0xcb, 0x2f, 0xca, 0x4d, 0x2c, 0x51, 0x28, 0x2e, 0x48,
+		 0x4d, 0xce, 0x4c, 0xcb, 0x4c, 0x4e, 0x2c, 0xc9, 0xcc, 0xcf,
+		 0x53, 0x28, 0x4b, 0x2d, 0x2a, 0x06, 0xd1, 0x26, 0x7a, 0xc6,
+		 0x00, 0xde, 0x2b, 0xcf, 0xca, 0x2a, 0x00, 0x00, 0x00 ),
+	  DATA ( 0x47, 0x5a, 0x49, 0x50, 0x20, 0x66, 0x69, 0x6c, 0x65, 0x20,
+		 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x73, 0x70, 0x65,
+		 0x63, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e,
+		 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x34,
+		 0x2e, 0x33 ) );
+
 /* "ZLIB Compressed Data Format Specification" fragment list */
 static struct deflate_test_fragments zlib_fragments[] = {
 	{ { -1UL, } },
@@ -231,6 +246,7 @@ static void deflate_test_exec ( void ) {
 		deflate_ok ( deflate, &hello_hello_world, NULL );
 		deflate_ok ( deflate, &rfc_sentence, NULL );
 		deflate_ok ( deflate, &zlib, NULL );
+		deflate_ok ( deflate, &gzip, NULL );
 
 		/* Test fragmentation */
 		for ( i = 0 ; i < ( sizeof ( zlib_fragments ) /
-- 
2.20.1.windows.1




More information about the ipxe-devel mailing list