<?php
//
// FPDI - Version 1.2.1
//
// Copyright 2004-2008 Setasign - Jan Slabon
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
if (!defined ('PDF_TYPE_NULL'))
define ('PDF_TYPE_NULL', 0);
if (!defined ('PDF_TYPE_NUMERIC'))
define ('PDF_TYPE_NUMERIC', 1);
if (!defined ('PDF_TYPE_TOKEN'))
define ('PDF_TYPE_TOKEN', 2);
if (!defined ('PDF_TYPE_HEX'))
define ('PDF_TYPE_HEX', 3);
if (!defined ('PDF_TYPE_STRING'))
define ('PDF_TYPE_STRING', 4);
if (!defined ('PDF_TYPE_DICTIONARY'))
define ('PDF_TYPE_DICTIONARY', 5);
if (!defined ('PDF_TYPE_ARRAY'))
define ('PDF_TYPE_ARRAY', 6);
if (!defined ('PDF_TYPE_OBJDEC'))
define ('PDF_TYPE_OBJDEC', 7);
if (!defined ('PDF_TYPE_OBJREF'))
define ('PDF_TYPE_OBJREF', 8);
if (!defined ('PDF_TYPE_OBJECT'))
define ('PDF_TYPE_OBJECT', 9);
if (!defined ('PDF_TYPE_STREAM'))
define ('PDF_TYPE_STREAM', 10);
if (!defined ('PDF_TYPE_BOOLEAN'))
define ('PDF_TYPE_BOOLEAN', 11);
if (!defined ('PDF_TYPE_REAL'))
define ('PDF_TYPE_REAL', 12);
require_once("pdf_context.php");
require_once("wrapper_functions.php");
class pdf_parser {
/**
* Filename
* @var string
*/
var $filename;
/**
* File resource
* @var resource
*/
var $f;
/**
* PDF Context
* @var object pdf_context-Instance
*/
var $c;
/**
* xref-Data
* @var array
*/
var $xref;
/**
* root-Object
* @var array
*/
var $root;
/**
* PDF version of the loaded document
* @var string
*/
var $pdfVersion;
/**
* Constructor
*
* @param string $filename Source-Filename
*/
function pdf_parser($filename) {
$this->filename = $filename;
$this->f = @fopen($this->filename, "rb");
if (!$this->f)
$this->error(sprintf("Cannot open %s !", $filename));
$this->getPDFVersion();
$this->c =& new pdf_context($this->f);
// Read xref-Data
$this->pdf_read_xref($this->xref, $this->pdf_find_xref());
// Check for Encryption
$this->getEncryption();
// Read root
$this->pdf_read_root();
}
/**
* Close the opened file
*/
function closeFile() {
if (isset($this->f)) {
fclose($this->f);
unset($this->f);
}
}
/**
* Print Error and die
*
* @param string $msg Error-Message
*/
function error($msg) {
die("<b>PDF-Parser Error:</b> ".$msg);
}
/**
* Check Trailer for Encryption
*/
function getEncryption() {
if (isset($this->xref['trailer'][1]['/Encrypt'])) {
$this->error("File is encrypted!");
}
}
/**
* Find/Return /Root
*
* @return array
*/
function pdf_find_root() {
if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) {
$this->error("Wrong Type of Root-Element! Must be an indirect reference");
}
return $this->xref['trailer'][1]['/Root'];
}
/**
* Read the /Root
*/
function pdf_read_root() {
// read root
$this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root());
}
/**
* Get PDF-Version
*
* And reset the PDF Version used in FPDI if needed
*/
function getPDFVersion() {
fseek($this->f, 0);
preg_match("/\d\.\d/",fread($this->f,16),$m);
if (isset($m[0]))
$this->pdfVersion = $m[0];
return $this->pdfVersion;
}
/**
* Find the xref-Table
*/
function pdf_find_xref() {
$toRead = 1500;
$stat = fseek ($this->f, -$toRead, SEEK_END);
if ($stat === -1) {
fseek ($this->f, 0);
}
$data = fread($this->f, $toRead);
$pos = strlen($data) - strpos(strrev($data), strrev('startxref'));
$data = substr($data, $pos);
if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {
$this->error("Unable to find pointer to xref table");
}
return (int) $matches[1];
}
/**
* Read xref-table
*
* @param array $result Array of xref-table
* @param integer $offset of xref-table
*/
function pdf_read_xref(&$result, $offset) {
fseek($this->f, $o_pos = $offset-20); // set some bytes backwards to fetch errorious docs
$data = fread($this->f, 100);
$xrefPos = strpos($data, 'xref');
if ($xrefPos === false) {
$this->error('Unable to find xref table.');
}
if (!isset($result['xref_location'])) {
$result['xref_location'] = $o_pos+$xrefPos;
$result['max_object'] = 0;
}
$cylces = -1;
$bytesPerCycle = 100;
fseek($this->f, $o_pos = $o_pos+$xrefPos+4); // set the handle directly after the "xref"-keyword
$data = fread($this->f, $bytesPerCycle);
while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle*$cylces++, 0))) === false && !feof($this->f)) {
$data .= fread($this->f, $bytesPerCycle);
}
if ($trailerPos === false) {
$this->error('Trailer keyword not found after xref table');
}
$data = substr($data, 0, $trailerPos);
// get Line-Ending
preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks
$differentLineEndings = count(array_unique($m[0]));
if ($differentLineEndings > 1) {
$lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY);
} else {
$lines = explode($m[0][1], $data);
}
$data = $differentLineEndings = $m = null;
unset($data, $differentLineEndings, $m);
$linesCount = count($lines);
$start = 1;
for ($i = 0; $i < $linesCount; $i++) {
$line = trim($lines[$i]);
if ($line) {
$pieces = explode(" ", $line);
$c = count($pieces);
switch($c) {
case 2:
$start = (int)$pieces[0];
$end = $start+(int)$pieces[1];
if ($end > $result['max_object'])
$result['max_object'] = $end;
break;
case 3:
if (!isset($result['xref'][$start]))
$result['xref'][$start] = array();
if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) {
$result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null;
}
$start++;
break;
default:
$this->error('Unexpected data in xref table');
}
}
}
$lines = $pieces = $line = $sta