/*
 * zip2john processes input ZIP files into a format suitable for use with JtR.
 *
 * This software is Copyright (c) 2011, Dhiru Kholia <dhiru.kholia at gmail.com>,
 * and it is hereby released to the general public under the following terms:
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted.
 *
 * Updated in Aug 2011 by JimF.  Added PKZIP 'old' encryption.  The signature on the
 * pkzip will be $pkzip$ and does not look like the AES version written by Dhiru
 * Also fixed some porting issues, such as variables needing declared at top of blocks.
 *
 * References:
 *
 * 1. http://www.winzip.com/aes_info.htm
 * 2. http://www.winzip.com/aes_tips.htm
 * 4. ftp://ftp.info-zip.org/pub/infozip/doc/appnote-iz-latest.zip
 * 5. Nathan Moinvaziri's work in extending minizip to support AES.
 * 6. http://oldhome.schmorp.de/marc/fcrackzip.html (coding hints)
 * 7. http://www.pkware.com/documents/casestudies/APPNOTE.TXT
 * 8. http://gladman.plushost.co.uk/oldsite/cryptography_technology/fileencrypt/index.php
 *   (borrowed files have "gladman_" prepended to them). This gladman code has been removed from JtR source tree.
 *
 * Usage:
 *
 * 1. Run zip2john on zip file(s) as "zip2john [zip files]".
 *    Output is written to standard output.
 * 2. Run JtR on the output generated by zip2john as "john [output file]".
 *
 * Output Line Format:
 *
 * For type = 0, for ZIP files encrypted using AES
 * filename:$zip$*type*hex(CRC)*encryption_strength*hex(salt)*hex(password_verfication_value):hex(authentication_code)
 *
 * For original pkzip encryption:  (JimF, with longer explaination of fields)
 * filename:$pkzip$C*B*[DT*MT{CL*UL*CR*OF*OX}*CT*DL*CS*DA]*$/pkzip$   (deprecated)
 * filename:$pkzip2$C*B*[DT*MT{CL*UL*CR*OF*OX}*CT*DL*CS*TC*DA]*$/pkzip2$   (new format, with 2 checksums)
 * All numeric and 'binary data' fields are stored in hex.
 *
 * C   is the count of hashes present (the array of items, inside the []  C can be 1 to 3.).
 * B   is number of valid bytes in the checksum (1 or 2).  Unix zip is 2 bytes, all others are 1 (NOTE, some can be 0)
 * ARRAY of data starts here
 *   DT  is a "Data Type enum".  This will be 1 2 or 3.  1 is 'partial'. 2 and 3 are full file data (2 is inline, 3 is load from file).
 *   MT  Magic Type enum.  0 is no 'type'.  255 is 'text'. Other types (like MS Doc, GIF, etc), see source.
 *     NOTE, CL, DL, CRC, OFF are only present if DT != 1
 *     CL  Compressed length of file blob data (includes 12 byte IV).
 *     UL  Uncompressed length of the file.
 *     CR  CRC32 of the 'final' file.
 *     OF  Offset to the PK\x3\x4 record for this file data. If DT == 2, then this will be a 0, as it is not needed, all of the data is already included in the line.
 *     OX  Additional offset (past OF), to get to the zip data within the file.
 *     END OF 'optional' fields.
 *   CT  Compression type  (0 or 8)  0 is stored, 8 is imploded.
 *   DL  Length of the DA data.
 *   CS  2 bytes of checksum data.
 *   TC  2 bytes of checksun data (fron timestamp)
 *   DA  This is the 'data'.  It will be hex data if DT == 1 or 2. If DT == 3, then it is a filename (name of the .zip file).
 * END of array item.  There will be C (count) array items.
 * The format string will end with $/pkzip$
 *
 * The AES-zip format redone by JimF, Summer 2014.  Spent some time to understand the AES authentication code,
 * and now have placed code to do this. However, this required format change.  The old AES format was:
 *
 *    For type = 0, for ZIP files encrypted using AES
 *    filename:$zip$*type*hex(CRC)*encryption_strength*hex(salt)*hex(password_verfication_value):hex(authentication_code)
 *     NOTE, the authentication code was NOT part of this, even though documented in this file. nor is hex(CRC) a part.
 *
 * The new format is:  (and the $zip$ is deprecated)
 *
 *    filename:$zip2$*Ty*Mo*Ma*Sa*Va*Le*DF*Au*$/zip2$
 *    Ty = type (0) and ignored.
 *    Mo = mode (1 2 3 for 128/192/256 bit)
 *    Ma = magic (file magic).  This is reservered for now.  See pkzip_fmt_plug.c or zip2john.c for information.
 *         For now, this must be a '0'
 *    Sa = salt(hex).   8, 12 or 16 bytes of salt (depends on mode)
 *    Va = Verification bytes(hex) (2 byte quick checker)
 *    Le = real compr len (hex) length of compressed/encrypted data (field DF)
 *    DF = compressed data DF can be Le*2 hex bytes, and if so, then it is the ENTIRE file blob written 'inline'.
 *         However, if the data blob is too long, then a .zip ZIPDATA_FILE_PTR_RECORD structure will be the 'contents' of DF
 *    Au = Authentication code (hex) a 10 byte hex value that is the hmac-sha1 of data over DF. This is the binary() value
 *
 *  ZIPDATA_FILE_PTR_RECORD  (this can be the 'DF' of this above hash line).
 *      *ZFILE*Fn*Oh*Ob*  (Note, the leading and trailing * are the * that 'wrap' the DF object.
 *  ZFILE This is the literal string ZFILE
 *  Fn    This is the name of the .zip file.  NOTE the user will need to keep the .zip file in proper locations (same as
 *        was seen when running zip2john. If the file is removed, this hash line will no longer be valid.
 *  Oh    Offset to the zip central header record for this blob.
 *  Ob    Offset to the start of the blob data
 *
 *
 * The new format for PKWARE's Strong Encryption Specification is:
 *
 *    filename:$zip3$*Ty*Al*Bi*Ma*Sa*Erd*Le*DF*Au*Fn
 *    Ty = type (0) and ignored.
 *    Al = algorithm (1 for AES)
 *    Bi = bit length (128/192/256 bit)
 *    Ma = magic (file magic), reserved, must be '0' now
 *    Sa = salt(hex), 12 or 16 bytes of IV data
 *    Erd = encrypted random data (max. 256 bytes)
 *    Le = real compr len (hex) length of compressed/encrypted data (field DF), unused currently
 *    DF = compressed data DF can be Le*2 hex bytes, and if so, then it is the ENTIRE file blob written 'inline', unused currently
 *    Au = authentication code, a 8 byte hex value that contains a CRC32 checksum, unused currently
 *    Fn = filename within zip file
 */

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "arch.h"
#if !AC_BUILT || HAVE_LIMITS_H
#include <limits.h>
#endif
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#if (!AC_BUILT || HAVE_UNISTD_H) && !_MSC_VER
#include <unistd.h>
#endif

#include "common.h"
#include "jumbo.h"
#include "formats.h"
#include "memory.h"
#include "pkzip.h"
#ifdef _MSC_VER
#include "missing_getopt.h"
#endif
#include "johnswap.h"

static int checksum_only = 0, use_magic = 0;
static int force_2_byte_checksum = 0;
static char *ascii_fname, *only_fname;

static char *MagicTypes[] = { "", "DOC", "XLS", "DOT", "XLT", "EXE", "DLL", "ZIP", "BMP", "DIB", "GIF", "PDF", "GZ", "TGZ", "BZ2", "TZ2", "FLV", "SWF", "MP3", NULL };
static int  MagicToEnum[] = {  0,   1,     1,     1,     1,     2,     2,     3,     4,     4,     5,     6,     7,    7,     8,     8,     9,     10,    11,  0};

static void print_hex_inline(unsigned char *str, int len)
{
	int i;
	for (i = 0; i < len; ++i)
		printf("%02x", str[i]);
}

static void process_old_zip(const char *fname);
static void process_file(const char *fname)
{
	unsigned char filename[1024];
	FILE *fp;
	uint64_t i;
	char *cur = NULL, *cp;
	uint64_t best_len = 0xffffffff;


	if (!(fp = fopen(fname, "rb"))) {
		fprintf(stderr, "! %s : %s\n", fname, strerror(errno));
		return;
	}

	while (!feof(fp)) {
		uint32_t id = fget32LE(fp);
		uint32_t store = 0;

		if (id == 0x04034b50UL) {	/* local header */
			uint16_t version = fget16LE(fp);
			uint16_t flags = fget16LE(fp);
			uint16_t compression_method = fget16LE(fp);
			uint16_t lastmod_time = fget16LE(fp);
			uint16_t lastmod_date = fget16LE(fp);
			uint32_t crc = fget32LE(fp);
			uint64_t compressed_size = fget32LE(fp);
			uint64_t uncompressed_size = fget32LE(fp);
			uint16_t filename_length = fget16LE(fp);
			uint16_t extrafield_length = fget16LE(fp);
			/* unused variables */
			(void) version;
			(void) lastmod_time;
			(void) lastmod_date;
			(void) crc;
			(void) uncompressed_size;

			if (filename_length > 250) {
				fprintf(stderr, "! %s: Invalid zip file, filename length too long!\n", fname);
				return;
			}
			if (fread(filename, 1, filename_length, fp) != filename_length) {
				fprintf(stderr, "Error, in fread of file data!\n");
				goto cleanup;
			}
			filename[filename_length] = 0;

			if (compression_method == 99) {	/* AES encryption */
#define AES_EXTRA_DATA_LENGTH 11  // http://www.winzip.com/aes_info.htm#authentication-code
				uint64_t real_cmpr_len;
				uint16_t efh_id;
				uint16_t efh_datasize;
				uint16_t efh_vendor_version = 0;
				uint16_t efh_vendor_id = 0;
				char efh_aes_strength = 0;
				uint16_t actual_compression_method = 0;
				unsigned char salt[16], d;
				char *bname;
				int found = 0;
				int magic_enum = 0;  // reserved at 0 for now, we are not computing this (yet).

				// There could be multiple extra fields, so need to process them all.
				while (!ferror(fp)  && extrafield_length > 0) {
					efh_id = fget16LE(fp);
					efh_datasize = fget16LE(fp);

					// Adjust the bytes processed for id, size and acutal data so the
					// file pointer is moved on correctly,
					// - 2 bytes for the efh_id
					// - 2 bytes for the efh_datasize
					extrafield_length = extrafield_length - 2 - 2 - efh_datasize;
					if (efh_id != 0x9901) {
#if DEBUG
						fprintf(stderr, "[DEBUG] Skipping over efh_id (%x) with size %d.\n", efh_id, efh_datasize);
#endif
						fseek(fp, efh_datasize, SEEK_CUR);
					} else {
						found = 1;
						// Data size: this value is currently 7, but because it is possible that this
						// specification will be modified in the future to store additional data in
						// this extra field, vendors should not assume that it will always remain 7.
						if (efh_datasize != 7) {
							fprintf(stderr, "AES_EXTRA_DATA_LENGTH is not 11 for %s, please report this to us!\n", fname);
							goto cleanup;
						}
						efh_vendor_version = fget16LE(fp);
						efh_vendor_id = fget16LE(fp);
						efh_aes_strength = fgetc(fp);
						actual_compression_method = fget16LE(fp);
					}
				}
				if (!found)
					goto cleanup;

				bname = jtr_basename(fname);
				cp = cur;
				if (best_len < compressed_size) {
#if DEBUG
					fprintf(stderr, "This buffer not used, it is not 'best' size\n");
#endif
				} else {
					store = 1;
					best_len = compressed_size;
					MEM_FREE(cur);
					cur = mem_alloc(compressed_size * 2 + 400);
					cp = cur;
				}

#if DEBUG
				fprintf(stderr,
				    "%s/%s is using AES encryption, extrafield_length is %d\n",
				    bname, filename, extrafield_length);
#endif
				/* unused variables */
				(void) efh_id;
				(void) efh_datasize;
				(void) efh_vendor_version;
				(void) efh_vendor_id;
				(void) actual_compression_method; /* we need this!! */

				if (store)
					cp += sprintf(cp, "%s/%s:$zip2$*0*%x*%x*",
					              bname, filename, efh_aes_strength,
					              magic_enum);
				if (sizeof(salt) < 4 + 4 * efh_aes_strength ||
					fread(salt, 1, 4+4*efh_aes_strength, fp) != 4+4*efh_aes_strength) {
						fprintf(stderr, "Error, in fread of file data!\n");
						goto cleanup;
				}

				for (i = 0; i < 4+4*efh_aes_strength; i++) {
					if (store)
						cp += sprintf(cp, "%c%c",
						              itoa16[ARCH_INDEX(salt[i] >> 4)],
						              itoa16[ARCH_INDEX(salt[i] & 0x0f)]);
				}
				if (store)
					cp += sprintf(cp, "*");
				// since in the format we read/compare this one, we do it char by
				// char, so there is no endianity swapping needed. (validator)
				for (i = 0; i < 2; i++) {
					d = fgetc(fp);
					if (store)
						cp += sprintf(cp, "%c%c",
						              itoa16[ARCH_INDEX(d >> 4)],
						              itoa16[ARCH_INDEX(d & 0x0f)]);
				}
				// Password verification value -> 2 bytes, Salt value -> (4 + 4 * efh_aes_strength)
				real_cmpr_len = compressed_size - 2 - (4 + 4 * efh_aes_strength) - AES_EXTRA_DATA_LENGTH;
				// not quite sure why the real_cmpr_len is 'off by 1' ????
				++real_cmpr_len;
				if (store)
					cp += sprintf(cp, "*%"PRIx64"*", real_cmpr_len);

				for (i = 0; i < real_cmpr_len; i++) {
					d = fgetc(fp);
					if (store)
						cp += sprintf(cp, "%c%c",
						              itoa16[ARCH_INDEX(d >> 4)],
						              itoa16[ARCH_INDEX(d & 0x0f)]);
				}
				if (store)
					cp += sprintf(cp, "*");
				for (i = 0; i < 10; i++) {
					d = fgetc(fp);
					if (store)
						cp += sprintf(cp, "%c%c",
						              itoa16[ARCH_INDEX(d >> 4)],
						              itoa16[ARCH_INDEX(d & 0x0f)]);
				}
				for (d = ' ' + 1; d < '~'; ++d) {
					if (!strchr(fname, d) && d != ':' && !isxdigit(d))
						break;
				}
				if (store)
					cp += sprintf(cp, "*$/zip2$:%s:%s:%s\n",
					              filename, bname, fname);
				if (cur) {
					printf("%s", cur);
					MEM_FREE(cur);  // dirty hack to avoid printing of last hash twice
				}
			} else if (flags & 1 && (version == 51 || version == 52 || version >= 61)) {	/* Strong Encryption?, APPNOTE-6.3.4.TXT, bit 6 check doesn't really work */
				// fseek(fp, filename_length, SEEK_CUR);
				// fseek(fp, extrafield_length, SEEK_CUR);
				// continue;
				unsigned char iv[16];
				unsigned char Erd[256];
				uint32_t Size;
				uint32_t Format;
				uint16_t AlgId;
				uint16_t Bitlen;
				uint16_t Flags;
				uint16_t ErdSize;
				uint32_t Reserved1;
				uint16_t VSize;
				uint16_t IVSize;
				char *bname;
				long previous_position;

				// unused
				(void) Flags;
				(void) Bitlen;
				(void) Reserved1;
				(void) Size;

				bname = jtr_basename(fname);
				previous_position = ftell(fp);
				IVSize = fget16LE(fp);
				if (IVSize > sizeof(iv))
					goto bail;
				if (fread(iv, 1, IVSize, fp) != IVSize)
					goto bail;
				Size = fget32LE(fp);
				Format = fget16LE(fp);
				if (Format != 3) {
					goto bail;
				}
				AlgId = fget16LE(fp);
				if (AlgId == 0x660E || AlgId == 0x660F || AlgId ==  0x6610)
					AlgId = 1;
				else if (AlgId == 0x6603 || AlgId == 0x6609 || AlgId == 0x6720 || AlgId == 0x6721 || AlgId == 0x6801) {
					fprintf(stderr, "AlgId (%x) is currently unsupported, please report this to us!\n", AlgId);
					goto bail;
				} else
					goto bail;
				if (IVSize == 0) {
					memset(iv, 0, 16);
#if !ARCH_LITTLE_ENDIAN
					crc = JOHNSWAP(crc);
					uncompressed_size = JOHNSWAP64(uncompressed_size);
#endif
					memcpy(iv, &crc, 4);
					memcpy(iv + 4, &uncompressed_size, 8);
					IVSize = 12;
				}
				Bitlen = fget16LE(fp);
				Flags = fget16LE(fp);
				ErdSize = fget16LE(fp);
				if (ErdSize > sizeof(Erd))
					goto bail;
				if (fread(Erd, 1, ErdSize, fp) != ErdSize)
					goto bail;
				Reserved1 = fget32LE(fp);
				if (Reserved1 != 0) {
					fprintf(stderr, "Reserved1 is %u (non-zero), please report this bug to us!\n", Reserved1);
					goto bail;
				}
				VSize = fget16LE(fp);
				fseek(fp, VSize, SEEK_CUR);

				printf("%s:$zip3$*%d*%d*%d*%d*", bname, 0, AlgId, Bitlen, 0);
				print_hex_inline(iv, IVSize);  // getting this right isn't important ;)
				printf("*");
				print_hex_inline(Erd, ErdSize);
				printf("*0*0*0*%s\n", filename);
				continue;
bail:
				fseek(fp, previous_position, SEEK_SET);
				fseek(fp, filename_length, SEEK_CUR);
				fseek(fp, extrafield_length, SEEK_CUR);
				fseek(fp, compressed_size, SEEK_CUR);
			}
			if (flags & 1) {	/* old encryption */
				fclose(fp);
				fp = 0;
				process_old_zip(fname);
				return;
			} else {
				fprintf(stderr, "%s/%s is not encrypted!\n",
				        jtr_basename(fname), filename);
				fseek(fp, extrafield_length, SEEK_CUR);
				fseek(fp, compressed_size, SEEK_CUR);
			}
		} else if (id == 0x08074b50UL) {	/* data descriptor */
			fseek(fp, 12, SEEK_CUR);
		} else if (id == 0x02014b50UL) {	/* central directory structures */
			/* uint16_t version_maker = fget16LE(fp);
			uint16_t version_needed = fget16LE(fp);
			uint16_t filename_length;
			uint16_t extrafield_length;
			uint16_t comment_length;
			(void) fget16LE(fp);
			(void) fget16LE(fp);
			(void) fget16LE(fp);
			(void) fget16LE(fp);
			(void) fget32LE(fp);
			(void) fget32LE(fp);
			(void) fget32LE(fp);
			filename_length = fget16LE(fp);
			extrafield_length = fget16LE(fp);
			comment_length = fget16LE(fp);
			(void) fget16LE(fp);
			(void) fget16LE(fp);
			(void) fget32LE(fp);
			(void) fget32LE(fp);
			(void) version_maker;
			(void) version_needed;

			if (fread(filename, 1, filename_length, fp) != filename_length) {
				fprintf(stderr, "Error, in fread of file data!\n");
				goto cleanup;
			}
			filename[filename_length] = 0;
			fseek(fp, extrafield_length, SEEK_CUR);
			fseek(fp, comment_length, SEEK_CUR); */
			goto cleanup;
		} else if (id == 0x06054b50UL) { /* end of central dir  */
			goto cleanup;
		}
	}

cleanup:
	if (cur)
		printf("%s", cur);
	MEM_FREE(cur);
	fclose(fp);
}

/* instead of using anything from the process_file, we simply detected a encrypted old style
 * password, close the file, and call this function.  This function handles the older pkzip
 * password, while the process_file handles ONLY the AES from WinZip
 */
typedef struct _zip_ptr
{
	char         *hash_data;
	char         *file_name;
	uint32_t      crc;
	uint64_t      offset, offex;
	uint64_t      cmp_len, decomp_len;
	uint16_t      magic_type, cmptype;
	char          chksum[5];
	char          chksum2[5];
} zip_ptr;

typedef struct _zip_file
{
	int unix_made;
	int check_in_crc;
	int check_bytes;
} zip_file;

static int magic_type(const char *filename) {
	char *Buf = str_alloc_copy((char*)filename), *cp;
	int i;

	if (!use_magic)
		return 0;

	strupr(Buf);
	if (ascii_fname && !strcasecmp(Buf, ascii_fname))
		return 255;

	cp = strrchr(Buf, '.');
	if (!cp)
		return 0;
	++cp;
	for (i = 1; MagicTypes[i]; ++i)
		if (!strcmp(cp, MagicTypes[i]))
			return MagicToEnum[i];
	return 0;
}

static void print_hex(unsigned char *p, uint64_t len) {
	while (len--)
		printf("%02x", *p++);
	printf("*");
}

// If archive was created from a non-seekable stream, we need to find CRC and
// sizes AFTER file data which means we're in a hen-and-egg situation since we
// don't know the size... I think the below is enough but there may be edge
// cases where we need to also recognize some other kind of start-of-whatever
// and seek back 16 bytes.
static void scan_for_eod (FILE **fp, zip_ptr *p, int size64)
{
	long saved_pos = ftell(*fp);

	fprintf(stderr, "Scanning for EOD... ");
	while (!feof(*fp)) {
		if (fgetc(*fp) == 0x50) {
			if (fgetc(*fp) == 0x4b) {
				if (fgetc(*fp) == 0x07) {
					if (fgetc(*fp) == 0x08) {
						fprintf(stderr, "FOUND Extended local header\n");
						p->crc = fget32LE(*fp);
						if (size64) {
							p->cmp_len = fget64LE(*fp);
							p->decomp_len = fget64LE(*fp);
						} else {
							p->cmp_len = fget32LE(*fp);
							p->decomp_len = fget32LE(*fp);
						}
						break;
					}
				}
				else if (fgetc(*fp) == 0x03) {
					if (fgetc(*fp) == 0x04) {
						fprintf(stderr, "FOUND next Local file header\n");
						if (size64)
							fseek(*fp, -24, SEEK_CUR);
						else
							fseek(*fp, -16, SEEK_CUR);
						p->crc = fget32LE(*fp);
						if (size64) {
							p->cmp_len = fget64LE(*fp);
							p->decomp_len = fget64LE(*fp);
						} else {
							p->cmp_len = fget32LE(*fp);
							p->decomp_len = fget32LE(*fp);
						}
						break;
					}
				}
				else if (fgetc(*fp) == 0x01) {
					if (fgetc(*fp) == 0x02) {
						fprintf(stderr, "FOUND Central directory\n");
						if (size64)
							fseek(*fp, -24, SEEK_CUR);
						else
							fseek(*fp, -16, SEEK_CUR);
						p->crc = fget32LE(*fp);
						if (size64) {
							p->cmp_len = fget64LE(*fp);
							p->decomp_len = fget64LE(*fp);
						} else {
							p->cmp_len = fget32LE(*fp);
							p->decomp_len = fget32LE(*fp);
						}
						break;
					}
				}
			}
		}
	}

	fseek(*fp, saved_pos, SEEK_SET);
}

static int LoadZipBlob(FILE *fp, zip_ptr *p, zip_file *zfp, const char *zip_fname)
{
	uint16_t version,flags,lastmod_time,lastmod_date,filename_length,extrafield_length;
	unsigned char filename[1024];
	int size64 = 0;

	memset(p, 0, sizeof(*p));

	p->offset = ftell(fp)-4;
	version = fget16LE(fp);
	flags = fget16LE(fp);
	p->cmptype = fget16LE(fp);
	lastmod_time = fget16LE(fp);
	lastmod_date = fget16LE(fp);
	p->crc = fget32LE(fp);
	p->cmp_len = fget32LE(fp);
	p->decomp_len = fget32LE(fp);
	filename_length = fget16LE(fp);
	extrafield_length = fget16LE(fp);
	p->hash_data = NULL;
	p->file_name = NULL;
	/* unused variables */
	(void) lastmod_date;

	if (sizeof(filename) < filename_length ||
		fread(filename, 1, filename_length, fp) != filename_length) {
		fprintf(stderr, "Error, fread could not read the data from the file: %s\n", zip_fname);
		return 0;
	}
	filename[filename_length] = 0;
	p->magic_type = magic_type((char*)filename);

	p->offex = 30 + filename_length + extrafield_length;

	if (!only_fname || !strcmp(only_fname, (char*)filename))
		fprintf(stderr, "ver %d.%d ", version / 10, version % 10);

	// we only handle implode or store.
	// 0x314 (788) was seen at 2012 CMIYC ?? I have to look into that one.
	if ( (flags & 1) &&
	     (version == 10 || version == 20 || version == 45 || version == 788)) {
		uint16_t extra_len_used = 0;

		if (flags & (1 << 3)) {
			while (extra_len_used < extrafield_length) {
				uint16_t efh_id = fget16LE(fp);
				uint16_t efh_datasize = fget16LE(fp);

				if (!only_fname || !strcmp(only_fname, (char*)filename))
					fprintf(stderr, "efh %04x ", efh_id);

				if (efh_id == 0x0001) {
					size64 = 1;
					p->decomp_len = fget64LE(fp);
					p->cmp_len = fget64LE(fp);
					extra_len_used += 16;
					efh_datasize -= 16;
				}
				fseek(fp, efh_datasize, SEEK_CUR);

				extra_len_used += 4 + efh_datasize;
				if (efh_id == 0x07c8 ||  // Info-ZIP Macintosh (old, J. Lee)
					efh_id == 0x334d ||  // Info-ZIP Macintosh (new, D. Haase's 'Mac3' field)
					efh_id == 0x4d49 ||  // Info-ZIP OpenVMS (obsolete)
					efh_id == 0x5855 ||  // Info-ZIP UNIX (original; also OS/2, NT, etc.)
					efh_id == 0x6375 ||  // Info-ZIP UTF-8 comment field
					efh_id == 0x7075 ||  // Info-ZIP UTF-8 name field
					efh_id == 0x7855 ||  // Info-ZIP UNIX (16-bit UID/GID info)
					efh_id == 0x7875)    // Info-ZIP UNIX 3rd generation (generic UID/GID, ...)

					// 7zip ALSO is 2 byte checksum, but I have no way to find them.  NOTE, it is 2 bytes of CRC, not timestamp like InfoZip.
					// OLD winzip (I think 8.01 or before), is also supposed to be 2 byte.
					// old v1 pkzip (the DOS builds) are 2 byte checksums.
				{
					zfp->unix_made = 1;
					zfp->check_bytes = 2;
					zfp->check_in_crc = 0;
				}
			}
		}
		else if (extrafield_length)
			fseek(fp, extrafield_length, SEEK_CUR);

		if (p->cmp_len == 0 && p->decomp_len == 0)
			scan_for_eod(&fp, p, size64);

		if (only_fname && strcmp(only_fname, (char*)filename)) {
			fseek(fp, p->cmp_len, SEEK_CUR);
			return 0;
		}

		if (force_2_byte_checksum)
			zfp->check_bytes = 2;

		fprintf(stderr,
		        "%s/%s PKZIP%s Encr:%s%s cmplen=%"PRIu64", decmplen=%"PRIu64", crc=%X\n",
		        jtr_basename(zip_fname), filename,
		        size64 ? "64" : "",
		        zfp->check_bytes == 2 ? " 2b chk," : "",
		        zfp->check_in_crc ? "" : " TS_chk,",
		        p->cmp_len, p->decomp_len, p->crc);

		MEM_FREE(p->hash_data);
		MEM_FREE(p->file_name);
		p->hash_data = mem_alloc(p->cmp_len + 1);
		p->file_name = mem_alloc(strlen((char*)filename) + 1);
		strcpy(p->file_name, (char*)filename);
		if (fread(p->hash_data, 1, p->cmp_len, fp) != p->cmp_len) {
			fprintf(stderr, "Error, fread could not read the data from the file: %s\n", zip_fname);
			return 0;
		}

		// Ok, now set checksum bytes.  This will depend upon if from crc, or from timestamp
		sprintf(p->chksum, "%02x%02x", (p->crc>>24)&0xFF, (p->crc>>16)&0xFF);
		sprintf(p->chksum2, "%02x%02x", lastmod_time>>8, lastmod_time&0xFF);

		return 1;
	}

	if (p->cmp_len == 0 && p->decomp_len == 0 && flags & (1 << 3))
		scan_for_eod(&fp, p, version >= 45);

	fprintf(stderr, "%s/%s is not encrypted, or stored with non-handled compression type\n", zip_fname, filename);
	fseek(fp, extrafield_length, SEEK_CUR);
	fseek(fp, p->cmp_len, SEEK_CUR);

	return 0;
}

static void process_old_zip(const char *fname)
{
	FILE *fp;
	int count_of_hashes = 0;
	zip_ptr hashes[3], curzip;
	zip_file zfp;

	memset(hashes, 0, sizeof(hashes));

	zfp.check_in_crc = 1;
	zfp.check_bytes = 1;
	zfp.unix_made = 0;

	if (!(fp = fopen(fname, "rb"))) {
		fprintf(stderr, "! %s : %s\n", fname, strerror(errno));
		return;
	}

	while (!feof(fp)) {
		uint32_t id = fget32LE(fp);

		if (id == 0x04034b50UL) {	/* local header */
			if (LoadZipBlob(fp, &curzip, &zfp, fname) && curzip.decomp_len > 3) {
				if (!count_of_hashes)
					memcpy(&(hashes[count_of_hashes++]), &curzip, sizeof(curzip));
				else {
					if (count_of_hashes == 1) {
						if (curzip.cmp_len < hashes[0].cmp_len) {
							memcpy(&(hashes[count_of_hashes++]), &(hashes[0]), sizeof(curzip));
							memcpy(&(hashes[0]), &curzip, sizeof(curzip));
						} else
							memcpy(&(hashes[count_of_hashes++]), &curzip, sizeof(curzip));
					}
					else if (count_of_hashes == 2) {
						if (curzip.cmp_len < hashes[0].cmp_len) {
							memcpy(&(hashes[count_of_hashes++]), &(hashes[1]), sizeof(curzip));
							memcpy(&(hashes[1]), &(hashes[0]), sizeof(curzip));
							memcpy(&(hashes[0]), &curzip, sizeof(curzip));
						} else if (curzip.cmp_len < hashes[1].cmp_len) {
							memcpy(&(hashes[count_of_hashes++]), &(hashes[1]), sizeof(curzip));
							memcpy(&(hashes[1]), &curzip, sizeof(curzip));
						} else
							memcpy(&(hashes[count_of_hashes++]), &curzip, sizeof(curzip));
					}
					else {
						int done = 0;
						if (curzip.magic_type && curzip.cmp_len > hashes[0].cmp_len) {
							// if we have a magic type, we will replace any NON magic type, for the 2nd and 3rd largest, without caring about
							// the size.
							if (hashes[1].magic_type == 0) {
								if (hashes[2].cmp_len < curzip.cmp_len) {
									MEM_FREE(hashes[1].hash_data);
									memcpy(&(hashes[1]), &(hashes[2]), sizeof(curzip));
									memcpy(&(hashes[2]), &curzip, sizeof(curzip));
									done = 1;
								} else {
									MEM_FREE(hashes[1].hash_data);
									memcpy(&(hashes[1]), &curzip, sizeof(curzip));
									done = 1;
								}
							} else if (hashes[2].magic_type == 0) {
								if (hashes[1].cmp_len < curzip.cmp_len) {
									MEM_FREE(hashes[2].hash_data);
									memcpy(&(hashes[2]), &curzip, sizeof(curzip));
									done = 1;
								} else {
									MEM_FREE(hashes[2].hash_data);
									memcpy(&(hashes[2]), &(hashes[1]), sizeof(curzip));
									memcpy(&(hashes[1]), &curzip, sizeof(curzip));
									done = 1;
								}
							}
						}
						if (!done && curzip.cmp_len < hashes[0].cmp_len) {
							// we 'only' replace the smallest zip, and always keep as many any other magic as possible.
							if (hashes[0].magic_type == 0) {
								MEM_FREE(hashes[0].hash_data);
								memcpy(&(hashes[0]), &curzip, sizeof(curzip));
							} else {
								// Ok, the 1st is a magic, we WILL keep it.
								if (hashes[1].magic_type) {  // Ok, we found our 2
									MEM_FREE(hashes[2].hash_data);
									memcpy(&(hashes[2]), &(hashes[1]), sizeof(curzip));
									memcpy(&(hashes[1]), &(hashes[0]), sizeof(curzip));
									memcpy(&(hashes[0]), &curzip, sizeof(curzip));
								} else if (hashes[2].magic_type) {  // Ok, we found our 2
									MEM_FREE(hashes[1].hash_data);
									memcpy(&(hashes[1]), &(hashes[0]), sizeof(curzip));
									memcpy(&(hashes[0]), &curzip, sizeof(curzip));
								} else {
									// found none.  So we will simply roll them down (like when #1 was a magic also).
									MEM_FREE(hashes[2].hash_data);
									memcpy(&(hashes[2]), &(hashes[1]), sizeof(curzip));
									memcpy(&(hashes[1]), &(hashes[0]), sizeof(curzip));
									memcpy(&(hashes[0]), &curzip, sizeof(curzip));
								}
							}
						}
					}
				}
			}
		} else if (id == 0x08074b50UL) {	/* data descriptor */
			fseek(fp, 12, SEEK_CUR);
		} else if (id == 0x02014b50UL || id == 0x06054b50UL) {	/* central directory structures */
			goto print_and_cleanup;
		}
	}

print_and_cleanup:;
	if (count_of_hashes) {
		int i = 1;
		char *bname;
		static int once;
		char *filenames = strdup(hashes[0].file_name);

		bname = jtr_basename(fname);

		printf("%s%s%s:$pkzip2$%x*%x*", bname,
		       count_of_hashes == 1 ? "/" : "",
		       count_of_hashes == 1 ? hashes[0].file_name : "",
		       count_of_hashes, zfp.check_bytes);
		if (checksum_only)
			i = 0;
		for (; i < count_of_hashes; ++i) {
			uint64_t len = 12+24;

			if (i) {
				filenames = mem_realloc(filenames,
				                        strlen(filenames) +
				                        strlen(hashes[i].file_name) + 3);
				strcat(filenames, ", ");
				strcat(filenames, hashes[i].file_name);
			}
			if (hashes[i].magic_type)
				len = 12+180;
			if (len > hashes[i].cmp_len)
				len = hashes[i].cmp_len; // even though we 'could' output a '2', we do not.  We only need one full inflate CRC check file.
			printf("1*%x*%x*%"PRIx64"*%s*%s*", hashes[i].magic_type, hashes[i].cmptype, (uint64_t)len, hashes[i].chksum, hashes[i].chksum2);
			print_hex((unsigned char*)hashes[i].hash_data, len);
		}
		// Ok, now output the 'little' one (the first).
		if (!checksum_only) {
			printf("%x*%x*%"PRIx64"*%"PRIx64"*%x*%"PRIx64"*%"PRIx64"*%x*", 2, hashes[0].magic_type, hashes[0].cmp_len, hashes[0].decomp_len, hashes[0].crc, hashes[0].offset, hashes[0].offex, hashes[0].cmptype);
			printf("%"PRIx64"*%s*%s*", hashes[0].cmp_len, hashes[0].chksum, hashes[0].chksum2);
			print_hex((unsigned char*)hashes[0].hash_data, hashes[0].cmp_len);
		}
		if (count_of_hashes > 1)
			printf("$/pkzip2$::%s:%s:%s\n", bname, filenames, fname);
		else
			printf("$/pkzip2$:%s:%s::%s\n", filenames, bname, fname);

		if (count_of_hashes > 1 && !once++)
			fprintf(stderr,
"NOTE: It is assumed that all files in each archive have the same password.\n"
"If that is not the case, the hash may be uncrackable. To avoid this, use\n"
"option -o to pick a file at a time.\n");

		for (i = 0; i < count_of_hashes; ++i) {
			MEM_FREE(hashes[i].hash_data);
			MEM_FREE(hashes[i].file_name);
		}
		MEM_FREE(filenames);
	}
	fclose(fp);
}

static int usage(char *name)
{
	fprintf(stderr, "Usage: %s [options] [zip file(s)]\n", name);
	fprintf(stderr, "Options for 'old' PKZIP encrypted files only:\n");
	fprintf(stderr, " -a <filename>   This is a 'known' ASCII file. This can be faster, IF all\n");
	fprintf(stderr, "    files are larger, and you KNOW that at least one of them starts out as\n");
	fprintf(stderr, "    'pure' ASCII data.\n");
	fprintf(stderr, " -o <filename>   Only use this file from the .zip file.\n");
	fprintf(stderr, " -c This will create a 'checksum only' hash.  If there are many encrypted\n");
	fprintf(stderr, "    files in the .zip file, then this may be an option, and there will be\n");
	fprintf(stderr, "    enough data that false possitives will not be seen.  If the .zip is 2\n");
	fprintf(stderr, "    byte checksums, and there are 3 or more of them, then we have 48 bits\n");
	fprintf(stderr, "    knowledge, which 'may' be enough to crack the password, without having\n");
	fprintf(stderr, "    to force the user to have the .zip file present.\n");
	fprintf(stderr, " -m Use \"file magic\" as known-plain if applicable. This can be faster but\n");
	fprintf(stderr, "    not 100%% safe in all situations.\n");
	fprintf(stderr, " -2 Force 2 byte checksum computation.\n");
	fprintf(stderr, "\nNOTE: By default it is assumed that all files in each archive have the same\n");
	fprintf(stderr, "password. If that's not the case, the produced hash may be uncrackable.\n");
	fprintf(stderr, "To avoid this, use -o option to pick a file at a time.\n");

	return EXIT_FAILURE;
}

int zip2john(int argc, char **argv)
{
	int c;

	/* Parse command line */
	while ((c = getopt(argc, argv, "a:o:cm2")) != -1) {
		switch (c) {
		case 'a':
			ascii_fname = optarg;
			fprintf(stderr, "Using file %s as an 'ASCII' quick check file\n", ascii_fname);
			break;
		case 'o':
			only_fname = optarg;
			fprintf(stderr, "Using file %s as only file to check\n", only_fname);
			break;
		case 'c':
			checksum_only = 1;
			fprintf(stderr, "Outputing hashes that are 'checksum ONLY' hashes\n");
			break;
		case 'm':
			use_magic = 1;
			fprintf(stderr, "Using file 'magic' signatures if applicable (not 100%% safe)\n");
			break;
		case '2':
			force_2_byte_checksum = 1;
			fprintf(stderr, "Forcing a 2 byte checksum detection\n");
			break;
		case '?':
		default:
			return usage(argv[0]);
		}
	}
	argc -= optind;
	if (argc == 0)
		return usage(argv[0]);
	argv += optind;

	while(argc--)
		process_file(*argv++);

	cleanup_tiny_memory();

	return EXIT_SUCCESS;
}
