/* extract.c
* Copyright (c) 2005, Nick Mikus
* This file contains the file specific functions used to extract
* data from an image.
*
* Each has a similar structure
* f_state *s: state of the program.
* c_offset: offset that the header was recorded within the current chunk
* foundat: The location the header was "foundat"
* buflen: How much buffer is left until the end of the current chunk
* needle: Search specification
* f_offset: Offset that the current chunk is located within the file
*/
#include "main.h"
#include "extract.h"
#include "ole.h"
extern unsigned char buffer[OUR_BLK_SIZE];
extern int verbose;
extern int dir_count;
extern int block_list[OUR_BLK_SIZE / sizeof(int)];
extern int *FAT;
extern char *extract_name;
extern int extract;
extern int FATblk;
extern int highblk;
/********************************************************************************
*Function: extract_zip
*Description: Given that we have a ZIP header jump through the file headers
until we reach the EOF.
*Return: A pointer to where the EOF of the ZIP is in the current buffer
**********************************************************************************/
unsigned char *extract_zip(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
s_spec *needle, u_int64_t f_offset, char *type)
{
unsigned char *currentpos = NULL;
unsigned char *buf = foundat;
unsigned short comment_length = 0;
unsigned char *extractbuf = NULL;
struct zipLocalFileHeader localFH;
u_int64_t bytes_to_search = 50 * KILOBYTE;
u_int64_t file_size = 0;
int oOffice = FALSE;
int office2007 = FALSE;
char comment[32];
localFH.genFlag=0;
localFH.compressed=0;
localFH.uncompressed =0;
if (buflen < 100)
return NULL;
if (strncmp((char *) &foundat[30], "mimetypeapplication/vnd.sun.xml.", 32) == 0)
{
oOffice = TRUE;
if (strncmp((char *) &foundat[62], "calc", 4) == 0)
{
needle->suffix = "sxc";
}
else if (strncmp((char *) &foundat[62], "impress", 7) == 0)
{
needle->suffix = "sxi";
}
else if (strncmp((char *) &foundat[62], "writer", 6) == 0)
{
needle->suffix = "sxw";
}
else
{
sprintf(comment, " (OpenOffice Doc?)");
strcat(needle->comment, comment);
needle->suffix = "sx";
}
}
else
{
needle->suffix = "zip";
}
while (1) //Jump through each local file header until the central directory structure is reached, much faster than searching
{
if (foundat[2] == '\x03' && foundat[3] == '\x04') //Verfiy we are looking at a local file header//
{
localFH.compression=htos(&foundat[8], FOREMOST_LITTLE_ENDIAN);
localFH.compressed = htoi(&foundat[18], FOREMOST_LITTLE_ENDIAN);
localFH.uncompressed = htoi(&foundat[22], FOREMOST_LITTLE_ENDIAN);
localFH.filename_length = htos(&foundat[26], FOREMOST_LITTLE_ENDIAN);
localFH.extra_length = htos(&foundat[28], FOREMOST_LITTLE_ENDIAN);;
localFH.genFlag = htos(&foundat[6], FOREMOST_LITTLE_ENDIAN);
// Sanity checking
if (localFH.compressed > needle->max_len)
return foundat + needle->header_len;
if (localFH.filename_length > 100)
return foundat + needle->header_len;
//Check if we should grab more from the disk
if (localFH.compressed + 30 > buflen - (foundat - buf))
{
return NULL;
}
//Size of the local file header data structure
foundat += 30;
if (strcmp(needle->suffix,"zip")==0)
{
if (strncmp((char *)foundat, "content.xml", 11) == 0 && strcmp(needle->suffix,"zip")==0)
{
oOffice = TRUE;
sprintf(comment, " (OpenOffice Doc?)");
strcat(needle->comment, comment);
needle->suffix = "sx";
}
else if (strstr((char *)foundat, ".class") || strstr((char *)foundat, ".jar") ||
strstr((char *)foundat, ".java"))
{
needle->suffix = "jar";
}
else if(strncmp((char *)foundat, "[Content_Types].xml",19)==0)
{
office2007=TRUE;
}
else if(strncmp((char *)foundat, "ppt/slides",10)==0 && office2007==TRUE)
{
needle->suffix = "pptx";
}
else if(strncmp((char *)foundat, "word/document.xml",17)==0 && office2007==TRUE)
{
needle->suffix = "docx";
}
else if(strncmp((char *)foundat, "xl/workbook.xml",15)==0 && office2007==TRUE)
{
needle->suffix = "xlsx";
}
else
{
printf("foundat=%s\n",foundat);
}
}
foundat += localFH.compressed;
foundat += localFH.filename_length;
foundat += localFH.extra_length;
if (localFH.genFlag == 8)
{
#ifdef DEBUG
fprintf(stderr,"We have extra stuff!!!");
#endif
}
if(localFH.genFlag & 1<<3 && localFH.uncompressed==0 && localFH.compressed==0 )
{
#ifdef DEBUG
fprintf(stderr,"No data to jmp Just search for the next file Footer (localFH.genFlag:=%d)\n",localFH.genFlag);
#endif
break;
}
#ifdef DEBUG
printf("localFH.compressed:=%d localFH.uncompressed:=%d\n\t jumping %d bytes filename=%d bytes",
localFH.compressed,
localFH.uncompressed,localFH.filename_length+localFH.compressed+localFH.extra_length,localFH.filename_length);
printx(foundat, 0, 16);
#endif
}
else if (oOffice && localFH.genFlag == 8)
{
break;
}
else
{
break;
}
}//end while loop
if (oOffice)
{
//We have an OO doc how long should we search for?
bytes_to_search = 1 * MEGABYTE;
}
else if (localFH.genFlag & 1<<3 && localFH.uncompressed==0 && localFH.compressed==0 )
{
bytes_to_search = needle->max_len;
}
else
{
bytes_to_search = (buflen < (foundat - buf) ? buflen : buflen - (foundat - buf));
}
//Make sure we are not searching more than what he have
if (buflen <= (foundat - buf)) {
#ifdef DEBUG
printf("avoided bug in extract_zip!\n");
#endif
bytes_to_search = 0;
} else {
if (buflen - (foundat - buf) < bytes_to_search)
{
bytes_to_search = buflen - (foundat - buf);
}
}
currentpos = foundat;
#ifdef DEBUG
printf("Search for the footer bytes_to_search:=%lld buflen:=%lld\n", bytes_to_search, buflen);
#endif
foundat = bm_search(needle->footer,
needle->footer_len,
foundat,
bytes_to_search,
needle->footer_bm_table,
needle->case_sen,
SEARCHTYPE_FORWARD);
#ifdef DEBUG
printf("Search complete \n");
#endif
if (foundat) /*Found the end of the central directory structure, determine the exact length and extract*/
{
/*Jump to the comment length field*/
#ifdef DEBUG
printf("distance searched:=%lu\n", foundat - currentpos);
#endif
if (buflen - (foundat - buf) > 20)
{
foundat += 20;
}
else
{
return NULL;
}
comment_length = htos(foundat, FOREMOST_LITTLE_ENDIAN);
foundat += comment_length + 2;
file_size = (foundat - buf);
#ifdef DEBUG
printf("File size %lld\n", file_size);
printf("Found a %s type:=%s\n", needle->suffix, type);
#endif
extractbuf = buf;
if (strcmp(type,"all")==0 || strcmp(type,needle->suffix)==0)
{
#ifdef DEBUG
printf("Writing a %s to disk\n", needle->suffix);
#endif
write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
}
#ifdef DEBUG
printf("Found a %s\n", needle->suffix);
#endif
return foundat-2;
}
if (bytes_to_search > buflen - (currentpos - buf))
return NULL;
#ifdef DEBUG
printf("I give up \n");
#endif
return currentpos;
}
/********************************************************************************
*Function: extract_pdf
*Description: Given that we have a PDF header check if it is Linearized, if so
grab the file size and we are done, else search for the %%EOF
*Return: A pointer to where the EOF of the PDF is in the current buffer
**********************************************************************************/
unsigned char *extract_pdf(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
s_spec *needle, u_int64_t f_offset)
{
unsigned char *currentpos = NULL;
unsi