summaryrefslogtreecommitdiffstats
path: root/src/file_pdf.c
diff options
context:
space:
mode:
authorChristophe Grenier <[email protected]>2007-10-29 22:38:52 +0100
committerChristophe Grenier <[email protected]>2007-10-29 22:38:52 +0100
commit9928d99936105b4653d2d1b8ca74dc3ffba5c71e (patch)
tree06aa4f5e9f0055027c6fb54dd47a8414cf2fba32 /src/file_pdf.c
First version in git
Diffstat (limited to 'src/file_pdf.c')
-rw-r--r--src/file_pdf.c147
1 files changed, 147 insertions, 0 deletions
diff --git a/src/file_pdf.c b/src/file_pdf.c
new file mode 100644
index 00000000..132959e9
--- /dev/null
+++ b/src/file_pdf.c
@@ -0,0 +1,147 @@
+/*
+
+ File: file_pdf.c
+
+ Copyright (C) 1998-2007 Christophe GRENIER <[email protected]>
+
+ This software is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write the Free Software Foundation, Inc., 51
+ Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#include <stdio.h>
+#include "types.h"
+#include "filegen.h"
+
+static inline const unsigned char *find_in_mem(const unsigned char *haystack, const unsigned char * haystack_end,
+ const unsigned char *needle, const unsigned int needle_length);
+
+static void register_header_check_pdf(file_stat_t *file_stat);
+static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
+static void file_check_pdf(file_recovery_t *file_recovery);
+static void file_check_pdf_and_size(file_recovery_t *file_recovery);
+
+const file_hint_t file_hint_pdf= {
+ .extension="pdf",
+ .description="Portable Document Format, Adobe Illustrator",
+ .min_header_distance=0,
+ .max_filesize=PHOTOREC_MAX_FILE_SIZE,
+ .recover=1,
+ .header_check=&header_check_pdf,
+ .register_header_check=&register_header_check_pdf
+};
+
+static const unsigned char pdf_header[] = { '%','P','D','F','-','1'};
+
+static void register_header_check_pdf(file_stat_t *file_stat)
+{
+ register_header_check(0, pdf_header,sizeof(pdf_header), &header_check_pdf, file_stat);
+}
+
+static inline const unsigned char *find_in_mem(const unsigned char *haystack, const unsigned char * haystack_end,
+ const unsigned char *needle, const unsigned int needle_length)
+{
+ while(haystack!=NULL)
+ {
+ haystack=memchr(haystack,needle[0],haystack_end-haystack);
+ if(haystack!=NULL && haystack<=(haystack_end-needle_length))
+ {
+ if(memcmp(haystack,needle,needle_length)==0)
+ return haystack;
+ haystack++;
+ }
+ else
+ haystack=NULL;
+ };
+ return NULL;
+}
+
+static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
+{
+ if(memcmp(buffer,pdf_header,sizeof(pdf_header))==0)
+ {
+ const unsigned char sig_illustrator[11]={'I','l','l','u','s','t','r','a','t','o','r'};
+ const unsigned char sig_linearized[10]={'L','i','n','e','a','r','i','z','e','d'};
+ const unsigned char *linearized;
+ reset_file_recovery(file_recovery_new);
+ if(find_in_mem(buffer, buffer+512, sig_illustrator,sizeof(sig_illustrator)) != NULL)
+ file_recovery_new->extension="ai";
+ else
+ file_recovery_new->extension=file_hint_pdf.extension;
+ if((linearized=find_in_mem(buffer, buffer+512, sig_linearized,sizeof(sig_linearized))) != NULL)
+ {
+ linearized+=sizeof(sig_linearized);
+ while(*linearized!='>' && linearized<=buffer+512)
+ {
+ if(*linearized=='/' && *(linearized+1)=='L')
+ {
+ linearized+=2;
+ while(*linearized==' ' || *linearized=='\t' || *linearized=='\n' || *linearized=='\r')
+ linearized++;
+ file_recovery_new->calculated_file_size=0;
+ while(*linearized>='0' && *linearized<='9' && linearized<=buffer+512)
+ {
+ file_recovery_new->calculated_file_size=file_recovery_new->calculated_file_size*10+(*linearized)-'0';
+ linearized++;
+ }
+ file_recovery_new->data_check=&data_check_size;
+ file_recovery_new->file_check=&file_check_pdf_and_size;
+ return 1;
+ }
+ linearized++;
+ }
+ }
+ file_recovery_new->file_check=&file_check_pdf;
+ return 1;
+ }
+ return 0;
+}
+
+static void file_check_pdf_and_size(file_recovery_t *file_recovery)
+{
+ if(file_recovery->file_size>=file_recovery->calculated_file_size)
+ {
+ const unsigned int read_size=20;
+ unsigned char buffer[20+3]; /* read_size+3 */
+ int i;
+ int taille;
+ file_recovery->file_size=file_recovery->calculated_file_size;
+ if(fseek(file_recovery->handle,file_recovery->file_size-read_size,SEEK_SET)<0)
+ {
+ file_recovery->file_size=0;
+ return ;
+ }
+ taille=fread(buffer,1,read_size,file_recovery->handle);
+ for(i=taille-4;i>=0;i--)
+ {
+ if(buffer[i]=='%' && buffer[i+1]=='E' && buffer[i+2]=='O' && buffer[i+3]=='F')
+ return ;
+ }
+ }
+ file_recovery->file_size=0;
+}
+
+static void file_check_pdf(file_recovery_t *file_recovery)
+{
+ const unsigned char pdf_footer[4]= { '%', 'E', 'O', 'F'};
+ file_recovery->file_size=file_recovery->calculated_file_size;
+ file_search_footer(file_recovery, pdf_footer, sizeof(pdf_footer));
+ file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
+}