2021-05-25 17:00:45 +05:00

1818 lines
71 KiB
C#
Raw Blame History

#region PDFsharp - A .NET library for processing PDF
//
// Authors:
// Stefan Lange
//
// Copyright (c) 2005-2017 empira Software GmbH, Cologne Area (Germany)
//
// http://www.pdfsharp.com
// http://sourceforge.net/projects/pdfsharp
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
#endregion
using System;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using PdfSharp.Internal;
using PdfSharp.Pdf.Advanced;
using PdfSharp.Pdf.Internal;
namespace PdfSharp.Pdf.IO
{
/*
Direct and indirect objects
* If a simple object (boolean, integer, number, date, string, rectangle etc.) is referenced indirect,
the parser reads this objects immediately and consumes the indirection.
* If a composite object (dictionary, array etc.) is referenced indirect, a PdfReference objects
is returned.
* If a composite object is a direct object, no PdfReference is created and the object is
parsed immediately.
* A reference to a non existing object is specified as legal, therefore null is returned.
*/
/// <summary>
/// Provides the functionality to parse PDF documents.
/// </summary>
internal sealed class Parser
{
public Parser(PdfDocument document, Stream pdf)
{
_document = document;
_lexer = new Lexer(pdf);
_stack = new ShiftStack();
}
public Parser(PdfDocument document)
{
_document = document;
_lexer = document._lexer;
_stack = new ShiftStack();
}
/// <summary>
/// Sets PDF input stream position to the specified object.
/// </summary>
public int MoveToObject(PdfObjectID objectID)
{
int position = _document._irefTable[objectID].Position;
return _lexer.Position = position;
}
public Symbol Symbol
{
get { return _lexer.Symbol; }
}
public PdfObjectID ReadObjectNumber(int position)
{
_lexer.Position = position;
int objectNumber = ReadInteger();
int generationNumber = ReadInteger();
#if DEBUG && CORE
if (objectNumber == 1074)
GetType();
#endif
return new PdfObjectID(objectNumber, generationNumber);
}
/// <summary>
/// Reads PDF object from input stream.
/// </summary>
/// <param name="pdfObject">Either the instance of a derived type or null. If it is null
/// an appropriate object is created.</param>
/// <param name="objectID">The address of the object.</param>
/// <param name="includeReferences">If true, specifies that all indirect objects
/// are included recursively.</param>
/// <param name="fromObjecStream">If true, the objects is parsed from an object stream.</param>
public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool includeReferences, bool fromObjecStream)
{
#if DEBUG_
Debug.WriteLine("ReadObject: " + objectID);
if (objectID.ObjectNumber == 20)
GetType();
#endif
int objectNumber = objectID.ObjectNumber;
int generationNumber = objectID.GenerationNumber;
if (!fromObjecStream)
{
MoveToObject(objectID);
objectNumber = ReadInteger();
generationNumber = ReadInteger();
}
#if DEBUG
// The following assertion sometime failed (see below)
//Debug.Assert(objectID == new PdfObjectID(objectNumber, generationNumber));
if (!fromObjecStream && objectID != new PdfObjectID(objectNumber, generationNumber))
{
// A special kind of bug? Or is this an undocumented PDF feature?
// PDF4NET 2.6 provides a sample called 'Unicode', which produces a file 'unicode.pdf'
// The iref table of this file contains the following entries:
// iref
// 0 148
// 0000000000 65535 f
// 0000000015 00000 n
// 0000000346 00000 n
// ....
// 0000083236 00000 n
// 0000083045 00000 n
// 0000083045 00000 n
// 0000083045 00000 n
// 0000083045 00000 n
// 0000080334 00000 n
// ....
// Object 84, 85, 86, and 87 maps to the same dictionary, but all PDF readers I tested
// ignores this mismatch! The following assertion failed about 50 times with this file.
#if true_
string message = String.Format("xref entry {0} {1} maps to object {2} {3}.",
objectID.ObjectNumber, objectID.GenerationNumber, objectNumber, generationNumber);
Debug.Assert(false, message);
#endif
}
#endif
// Always use object ID from iref table (see above).
objectNumber = objectID.ObjectNumber;
generationNumber = objectID.GenerationNumber;
#if true_
Debug.WriteLine(String.Format("obj: {0} {1}", objectNumber, generationNumber));
#endif
if (!fromObjecStream)
ReadSymbol(Symbol.Obj);
bool checkForStream = false;
Symbol symbol = ScanNextToken();
switch (symbol)
{
case Symbol.BeginArray:
PdfArray array;
if (pdfObject == null)
array = new PdfArray(_document);
else
array = (PdfArray)pdfObject;
//PdfObject.RegisterObject(array, objectID, generation);
pdfObject = ReadArray(array, includeReferences);
pdfObject.SetObjectID(objectNumber, generationNumber);
break;
case Symbol.BeginDictionary:
PdfDictionary dict;
if (pdfObject == null)
dict = new PdfDictionary(_document);
else
dict = (PdfDictionary)pdfObject;
//PdfObject.RegisterObject(dict, objectID, generation);
checkForStream = true;
pdfObject = ReadDictionary(dict, includeReferences);
pdfObject.SetObjectID(objectNumber, generationNumber);
break;
// Acrobat 6 Professional proudly presents: The Null object!
// Even with a one-digit object number an indirect reference <20>x 0 R<> to this object is
// one character larger than the direct use of <20>null<6C>. Probable this is the reason why
// it is true that Acrobat Web Capture 6.0 creates this object, but obviously never
// creates a reference to it!
case Symbol.Null:
pdfObject = new PdfNullObject(_document);
pdfObject.SetObjectID(objectNumber, generationNumber);
if (!fromObjecStream)
ReadSymbol(Symbol.EndObj);
return pdfObject;
// Empty object. Invalid PDF, but we need to handle it. Treat as null object.
case Symbol.EndObj:
pdfObject = new PdfNullObject(_document);
pdfObject.SetObjectID(objectNumber, generationNumber);
return pdfObject;
case Symbol.Boolean:
pdfObject = new PdfBooleanObject(_document, String.Compare(_lexer.Token, Boolean.TrueString, StringComparison.OrdinalIgnoreCase) == 0);
pdfObject.SetObjectID(objectNumber, generationNumber);
if (!fromObjecStream)
ReadSymbol(Symbol.EndObj);
return pdfObject;
case Symbol.Integer:
pdfObject = new PdfIntegerObject(_document, _lexer.TokenToInteger);
pdfObject.SetObjectID(objectNumber, generationNumber);
if (!fromObjecStream)
ReadSymbol(Symbol.EndObj);
return pdfObject;
case Symbol.UInteger:
pdfObject = new PdfUIntegerObject(_document, _lexer.TokenToUInteger);
pdfObject.SetObjectID(objectNumber, generationNumber);
if (!fromObjecStream)
ReadSymbol(Symbol.EndObj);
return pdfObject;
case Symbol.Real:
pdfObject = new PdfRealObject(_document, _lexer.TokenToReal);
pdfObject.SetObjectID(objectNumber, generationNumber);
if (!fromObjecStream)
ReadSymbol(Symbol.EndObj);
return pdfObject;
case Symbol.String:
case Symbol.UnicodeString:
case Symbol.HexString:
case Symbol.UnicodeHexString:
pdfObject = new PdfStringObject(_document, _lexer.Token);
pdfObject.SetObjectID(objectNumber, generationNumber);
if (!fromObjecStream)
ReadSymbol(Symbol.EndObj);
return pdfObject;
case Symbol.Name:
pdfObject = new PdfNameObject(_document, _lexer.Token);
pdfObject.SetObjectID(objectNumber, generationNumber);
if (!fromObjecStream)
ReadSymbol(Symbol.EndObj);
return pdfObject;
case Symbol.Keyword:
// Should not come here anymore.
ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
break;
default:
// Should not come here anymore.
ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
break;
}
symbol = ScanNextToken();
if (symbol == Symbol.BeginStream)
{
PdfDictionary dict = (PdfDictionary)pdfObject;
Debug.Assert(checkForStream, "Unexpected stream...");
#if true_
ReadStream(dict);
#else
int length = GetStreamLength(dict);
byte[] bytes = _lexer.ReadStream(length);
#if true_
if (dict.Elements.GetString("/Filter") == "/FlateDecode")
{
if (dict.Elements["/Subtype"] == null)
{
try
{
byte[] decoded = Filtering.FlateDecode.Decode(bytes);
if (decoded.Length == 0)
goto End;
string pageContent = Filtering.FlateDecode.DecodeToString(bytes);
if (pageContent.Length > 100)
pageContent = pageContent.Substring(pageContent.Length - 100);
pageContent.GetType();
bytes = decoded;
dict.Elements.Remove("/Filter");
dict.Elements.SetInteger("/Length", bytes.Length);
}
catch
{
}
}
End: ;
}
#endif
PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
dict.Stream = stream;
ReadSymbol(Symbol.EndStream);
symbol = ScanNextToken();
#endif
}
if (!fromObjecStream && symbol != Symbol.EndObj)
ParserDiagnostics.ThrowParserException(PSSR.UnexpectedToken(_lexer.Token));
return pdfObject;
}
//public PdfObject ReadObject(PdfObject obj, bool includeReferences)
/// <summary>
/// Reads the stream of a dictionary.
/// </summary>
private void ReadStream(PdfDictionary dict)
{
Symbol symbol = _lexer.Symbol;
Debug.Assert(symbol == Symbol.BeginStream);
int length = GetStreamLength(dict);
byte[] bytes = _lexer.ReadStream(length);
PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
Debug.Assert(dict.Stream == null, "Dictionary already has a stream.");
dict.Stream = stream;
ReadSymbol(Symbol.EndStream);
ScanNextToken();
}
// HACK: Solve problem more general.
private int GetStreamLength(PdfDictionary dict)
{
if (dict.Elements["/F"] != null)
throw new NotImplementedException("File streams are not yet implemented.");
PdfItem value = dict.Elements["/Length"];
if (value is PdfInteger)
return Convert.ToInt32(value);
PdfReference reference = value as PdfReference;
if (reference != null)
{
ParserState state = SaveState();
object length = ReadObject(null, reference.ObjectID, false, false);
RestoreState(state);
int len = ((PdfIntegerObject)length).Value;
dict.Elements["/Length"] = new PdfInteger(len);
return len;
}
throw new InvalidOperationException("Cannot retrieve stream length.");
}
public PdfArray ReadArray(PdfArray array, bool includeReferences)
{
Debug.Assert(Symbol == Symbol.BeginArray);
if (array == null)
array = new PdfArray(_document);
int sp = _stack.SP;
ParseObject(Symbol.EndArray);
int count = _stack.SP - sp;
PdfItem[] items = _stack.ToArray(sp, count);
_stack.Reduce(count);
for (int idx = 0; idx < count; idx++)
{
PdfItem val = items[idx];
if (includeReferences && val is PdfReference)
val = ReadReference((PdfReference)val, true);
array.Elements.Add(val);
}
return array;
}
#if DEBUG_
static int ReadDictionaryCounter;
#endif
internal PdfDictionary ReadDictionary(PdfDictionary dict, bool includeReferences)
{
Debug.Assert(Symbol == Symbol.BeginDictionary);
#if DEBUG_
ReadDictionaryCounter++;
Debug.WriteLine(ReadDictionaryCounter.ToString());
if (ReadDictionaryCounter == 101)
GetType();
#endif
if (dict == null)
dict = new PdfDictionary(_document);
DictionaryMeta meta = dict.Meta;
int sp = _stack.SP;
ParseObject(Symbol.EndDictionary);
int count = _stack.SP - sp;
Debug.Assert(count % 2 == 0);
PdfItem[] items = _stack.ToArray(sp, count);
_stack.Reduce(count);
for (int idx = 0; idx < count; idx += 2)
{
PdfItem val = items[idx];
if (!(val is PdfName))
ParserDiagnostics.ThrowParserException("Name expected."); // TODO L10N using PSSR.
string key = val.ToString();
val = items[idx + 1];
if (includeReferences && val is PdfReference)
val = ReadReference((PdfReference)val, true);
dict.Elements[key] = val;
}
return dict;
}
#if DEBUG_
static int ParseObjectCounter;
#endif
/// <summary>
/// Parses whatever comes until the specified stop symbol is reached.
/// </summary>
private void ParseObject(Symbol stop)
{
#if DEBUG_
ParseObjectCounter++;
Debug.WriteLine(ParseObjectCounter.ToString());
if (ParseObjectCounter == 178)
GetType();
#endif
Symbol symbol;
while ((symbol = ScanNextToken()) != Symbol.Eof)
{
if (symbol == stop)
return;
switch (symbol)
{
case Symbol.Comment:
// ignore comments
break;
case Symbol.Null:
_stack.Shift(PdfNull.Value);
break;
case Symbol.Boolean:
_stack.Shift(new PdfBoolean(_lexer.TokenToBoolean));
break;
case Symbol.Integer:
_stack.Shift(new PdfInteger(_lexer.TokenToInteger));
break;
case Symbol.UInteger:
_stack.Shift(new PdfUInteger(_lexer.TokenToUInteger));
break;
case Symbol.Real:
_stack.Shift(new PdfReal(_lexer.TokenToReal));
break;
case Symbol.String:
//stack.Shift(new PdfString(lexer.Token, PdfStringFlags.PDFDocEncoding));
_stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.RawEncoding));
break;
case Symbol.UnicodeString:
_stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.Unicode));
break;
case Symbol.HexString:
_stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.HexLiteral));
break;
case Symbol.UnicodeHexString:
_stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.Unicode | PdfStringFlags.HexLiteral));
break;
case Symbol.Name:
_stack.Shift(new PdfName(_lexer.Token));
break;
case Symbol.R:
{
Debug.Assert(_stack.GetItem(-1) is PdfInteger && _stack.GetItem(-2) is PdfInteger);
PdfObjectID objectID = new PdfObjectID(_stack.GetInteger(-2), _stack.GetInteger(-1));
PdfReference iref = _document._irefTable[objectID];
if (iref == null)
{
// If a document has more than one PdfXRefTable it is possible that the first trailer has
// indirect references to objects whose iref entry is not yet read in.
if (_document._irefTable.IsUnderConstruction)
{
// XRefTable not complete when trailer is read. Create temporary irefs that are
// removed later in PdfTrailer.FixXRefs.
iref = new PdfReference(objectID, 0);
_stack.Reduce(iref, 2);
break;
}
// PDF Reference section 3.2.9:
// An indirect reference to an undefined object is not an error;
// it is simply treated as a reference to the null object.
_stack.Reduce(PdfNull.Value, 2);
// Let's see what null objects are good for...
//Debug.Assert(false, "Null object detected!");
//stack.Reduce(PdfNull.Value, 2);
}
else
_stack.Reduce(iref, 2);
break;
}
case Symbol.BeginArray:
PdfArray array = new PdfArray(_document);
ReadArray(array, false);
_stack.Shift(array);
break;
case Symbol.BeginDictionary:
PdfDictionary dict = new PdfDictionary(_document);
ReadDictionary(dict, false);
_stack.Shift(dict);
break;
case Symbol.BeginStream:
throw new NotImplementedException();
// Not expected here:
//case Symbol.None:
//case Symbol.Keyword:
//case Symbol.EndStream:
//case Symbol.EndArray:
//case Symbol.EndDictionary:
//case Symbol.Obj:
//case Symbol.EndObj:
//case Symbol.XRef:
//case Symbol.Trailer:
//case Symbol.StartXRef:
//case Symbol.Eof:
default:
ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
SkipCharsUntil(stop);
return;
}
}
ParserDiagnostics.ThrowParserException("Unexpected end of file."); // TODO L10N using PSSR.
}
private Symbol ScanNextToken()
{
return _lexer.ScanNextToken();
}
private Symbol ScanNextToken(out string token)
{
Symbol symbol = _lexer.ScanNextToken();
token = _lexer.Token;
return symbol;
}
private Symbol SkipCharsUntil(Symbol stop)
{
Symbol symbol;
switch (stop)
{
case Symbol.EndDictionary:
return SkipCharsUntil(">>", stop);
default:
do
{
symbol = ScanNextToken();
} while (symbol != stop && symbol != Symbol.Eof);
return symbol;
}
}
private Symbol SkipCharsUntil(string text, Symbol stop)
{
int length = text.Length;
int idx = 0;
char ch;
while ((ch = _lexer.ScanNextChar(true)) != Chars.EOF)
{
if (ch == text[idx])
{
if (idx + 1 == length)
{
_lexer.ScanNextChar(true);
return stop;
}
idx++;
}
else
idx = 0;
}
return Symbol.Eof;
}
//protected Symbol ScanNextToken(out string token, bool testReference)
//{
// Symbol symbol = lexer.ScanNextToken(testReference);
// token = lexer.Token;
// return symbol;
//}
// internal object ReadObject(int position)
// {
// lexer.Position = position;
// return ReadObject(false);
// }
//
// internal virtual object ReadObject(bool directObject)
// {
// throw new InvalidOperationException("PdfParser.ReadObject() base class called");
// }
/// <summary>
/// Reads the object ID and the generation and sets it into the specified object.
/// </summary>
private void ReadObjectID(PdfObject obj)
{
int objectNubmer = ReadInteger();
int generationNumber = ReadInteger();
ReadSymbol(Symbol.Obj);
if (obj != null)
obj.SetObjectID(objectNubmer, generationNumber);
}
private PdfItem ReadReference(PdfReference iref, bool includeReferences)
{
throw new NotImplementedException("ReadReference");
}
/// <summary>
/// Reads the next symbol that must be the specified one.
/// </summary>
private Symbol ReadSymbol(Symbol symbol)
{
if (symbol == Symbol.EndStream)
{
Skip:
char ch = _lexer.MoveToNonWhiteSpace();
if (ch == Chars.EOF)
ParserDiagnostics.HandleUnexpectedCharacter(ch);
if (ch != 'e')
{
_lexer.ScanNextChar(false);
goto Skip;
}
}
Symbol current = _lexer.ScanNextToken();
if (symbol != current)
ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
return current;
}
/// <summary>
/// Reads the next token that must be the specified one.
/// </summary>
private Symbol ReadToken(string token)
{
Symbol current = _lexer.ScanNextToken();
if (token != _lexer.Token)
ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
return current;
}
/// <summary>
/// Reads a name from the PDF data stream. The preceding slash is part of the result string.
/// </summary>
private string ReadName()
{
string name;
Symbol symbol = ScanNextToken(out name);
if (symbol != Symbol.Name)
ParserDiagnostics.HandleUnexpectedToken(name);
return name;
}
/*
/// <summary>
/// Reads a string immediately or (optionally) indirectly from the PDF data stream.
/// </summary>
protected string ReadString(bool canBeIndirect)
{
Symbol symbol = Symbol.None; //lexer.ScanNextToken(canBeIndirect);
if (symbol == Symbol.String || symbol == Symbol.UnicodeString || symbol == Symbol.HexString || symbol == Symbol.UnicodeHexString)
return lexer.Token;
else if (symbol == Symbol.R)
{
int position = lexer.Position;
MoveToObject(lexer.Token);
ReadObjectID(null);
string s = ReadString();
ReadSymbol(Symbol.EndObj);
lexer.Position = position;
return s;
}
thr ow new PdfReaderException(PSSR.UnexpectedToken(lexer.Token));
}
protected string ReadString()
{
return ReadString(false);
}
/// <summary>
/// Reads a string immediately or (optionally) indirectly from the PDF data stream.
/// </summary>
protected bool ReadBoolean(bool canBeIndirect)
{
Symbol symbol = lexer.ScanNextToken(canBeIndirect);
if (symbol == Symbol.Boolean)
return lexer.TokenToBoolean;
else if (symbol == Symbol.R)
{
int position = lexer.Position;
MoveToObject(lexer.Token);
ReadObjectID(null);
bool b = ReadBoolean();
ReadSymbol(Symbol.EndObj);
lexer.Position = position;
return b;
}
thr ow new PdfReaderException(PSSR.UnexpectedToken(lexer.Token));
}
protected bool ReadBoolean()
{
return ReadBoolean(false);
}
*/
/// <summary>
/// Reads an integer value directly from the PDF data stream.
/// </summary>
private int ReadInteger(bool canBeIndirect)
{
Symbol symbol = _lexer.ScanNextToken();
if (symbol == Symbol.Integer)
return _lexer.TokenToInteger;
if (symbol == Symbol.R)
{
int position = _lexer.Position;
// MoveToObject(lexer.Token);
ReadObjectID(null);
int n = ReadInteger();
ReadSymbol(Symbol.EndObj);
_lexer.Position = position;
return n;
}
ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
return 0;
}
private int ReadInteger()
{
return ReadInteger(false);
}
// /// <summary>
// /// Reads a real value directly or (optionally) indirectly from the PDF data stream.
// /// </summary>
// double ReadReal(bool canBeIndirect)
// {
// Symbol symbol = lexer.ScanNextToken(canBeIndirect);
// if (symbol == Symbol.Real || symbol == Symbol.Integer)
// return lexer.TokenToReal;
// else if (symbol == Symbol.R)
// {
// int position = lexer.Position;
//// MoveToObject(lexer.Token);
// ReadObjectID(null);
// double f = ReadReal();
// ReadSymbol(Symbol.EndObj);
// lexer.Position = position;
// return f;
// }
// thr ow new PdfReaderException(PSSR.UnexpectedToken(lexer.Token));
// }
//
// double ReadReal()
// {
// return ReadReal(false);
// }
// /// <summary>
// /// Reads an object from the PDF input stream. If the object has a specialized parser, it it used.
// /// </summary>
// public static PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID)
// {
// if (pdfObject == null)
// thr ow new ArgumentNullException("pdfObject");
// if (pdfObject.Document == null)
// th row new ArgumentException(PSSR.OwningDocumentRequired, "pdfObject");
//
// Type type = pdfObject.GetType();
// PdfParser parser = CreateParser(pdfObject.Document, type);
// return parser.ReadObject(pdfObject, objectID, false);
// }
/// <summary>
/// Reads an object from the PDF input stream using the default parser.
/// </summary>
public static PdfObject ReadObject(PdfDocument owner, PdfObjectID objectID)
{
if (owner == null)
throw new ArgumentNullException("owner");
Parser parser = new Parser(owner);
return parser.ReadObject(null, objectID, false, false);
}
/// <summary>
/// Reads the irefs from the compressed object with the specified index in the object stream
/// of the object with the specified object id.
/// </summary>
internal void ReadIRefsFromCompressedObject(PdfObjectID objectID)
{
PdfReference iref;
Debug.Assert(_document._irefTable.ObjectTable.ContainsKey(objectID));
if (!_document._irefTable.ObjectTable.TryGetValue(objectID, out iref))
{
// We should never come here because the object stream must be a type 1 entry in the xref stream
// and iref was created before.
throw new NotImplementedException("This case is not coded or something else went wrong");
}
// Read in object stream object when we come here for the very first time.
if (iref.Value == null)
{
try
{
Debug.Assert(_document._irefTable.Contains(iref.ObjectID));
PdfDictionary pdfObject = (PdfDictionary)ReadObject(null, iref.ObjectID, false, false);
PdfObjectStream objectStream = new PdfObjectStream(pdfObject);
Debug.Assert(objectStream.Reference == iref);
// objectStream.Reference = iref; Superfluous, see Assert in line before.
Debug.Assert(objectStream.Reference.Value != null, "Something went wrong.");
}
catch (Exception ex)
{
Debug.WriteLine(ex.Message);
throw;
}
}
Debug.Assert(iref.Value != null);
PdfObjectStream objectStreamStream = iref.Value as PdfObjectStream;
if (objectStreamStream == null)
{
Debug.Assert(((PdfDictionary)iref.Value).Elements.GetName("/Type") == "/ObjStm");
objectStreamStream = new PdfObjectStream((PdfDictionary)iref.Value);
Debug.Assert(objectStreamStream.Reference == iref);
// objectStream.Reference = iref; Superfluous, see Assert in line before.
Debug.Assert(objectStreamStream.Reference.Value != null, "Something went wrong.");
}
Debug.Assert(objectStreamStream != null);
//PdfObjectStream objectStreamStream = (PdfObjectStream)iref.Value;
if (objectStreamStream == null)
throw new Exception("Something went wrong here.");
objectStreamStream.ReadReferences(_document._irefTable);
}
/// <summary>
/// Reads the compressed object with the specified index in the object stream
/// of the object with the specified object id.
/// </summary>
internal PdfReference ReadCompressedObject(PdfObjectID objectID, int index)
{
PdfReference iref;
#if true
Debug.Assert(_document._irefTable.ObjectTable.ContainsKey(objectID));
if (!_document._irefTable.ObjectTable.TryGetValue(objectID, out iref))
{
throw new NotImplementedException("This case is not coded or something else went wrong");
}
#else
// We should never come here because the object stream must be a type 1 entry in the xref stream
// and iref was created before.
// Has the specified object already an iref in the object table?
if (!_document._irefTable.ObjectTable.TryGetValue(objectID, out iref))
{
try
{
#if true_
iref = new PdfReference(objectID,);
iref.ObjectID = objectID;
_document._irefTable.Add(os);
#else
PdfDictionary dict = (PdfDictionary)ReadObject(null, objectID, false, false);
PdfObjectStream os = new PdfObjectStream(dict);
iref = new PdfReference(os);
iref.ObjectID = objectID;
_document._irefTable.Add(os);
#endif
}
catch (Exception ex)
{
Debug.WriteLine(ex.Message);
throw;
}
}
#endif
// Read in object stream object when we come here for the very first time.
if (iref.Value == null)
{
try
{
Debug.Assert(_document._irefTable.Contains(iref.ObjectID));
PdfDictionary pdfObject = (PdfDictionary)ReadObject(null, iref.ObjectID, false, false);
PdfObjectStream objectStream = new PdfObjectStream(pdfObject);
Debug.Assert(objectStream.Reference == iref);
// objectStream.Reference = iref; Superfluous, see Assert in line before.
Debug.Assert(objectStream.Reference.Value != null, "Something went wrong.");
}
catch (Exception ex)
{
Debug.WriteLine(ex.Message);
throw;
}
}
Debug.Assert(iref.Value != null);
PdfObjectStream objectStreamStream = iref.Value as PdfObjectStream;
if (objectStreamStream == null)
{
Debug.Assert(((PdfDictionary)iref.Value).Elements.GetName("/Type") == "/ObjStm");
objectStreamStream = new PdfObjectStream((PdfDictionary)iref.Value);
Debug.Assert(objectStreamStream.Reference == iref);
// objectStream.Reference = iref; Superfluous, see Assert in line before.
Debug.Assert(objectStreamStream.Reference.Value != null, "Something went wrong.");
}
Debug.Assert(objectStreamStream != null);
//PdfObjectStream objectStreamStream = (PdfObjectStream)iref.Value;
if (objectStreamStream == null)
throw new Exception("Something went wrong here.");
return objectStreamStream.ReadCompressedObject(index);
}
/// <summary>
/// Reads the compressed object with the specified number at the given offset.
/// The parser must be initialized with the stream an object stream object.
/// </summary>
internal PdfReference ReadCompressedObject(int objectNumber, int offset)
{
#if DEBUG__
if (objectNumber == 1034)
GetType();
#endif
// Generation is always 0 for compressed objects.
PdfObjectID objectID = new PdfObjectID(objectNumber);
_lexer.Position = offset;
PdfObject obj = ReadObject(null, objectID, false, true);
return obj.Reference;
}
/// <summary>
/// Reads the object stream header as pairs of integers from the beginning of the
/// stream of an object stream. Parameter first is the value of the First entry of
/// the object stream object.
/// </summary>
internal int[][] ReadObjectStreamHeader(int n, int first)
{
// TODO: Concept for general error handling.
// If the stream is corrupted a lot of things can go wrong here.
// Make it sense to do a more detailed error checking?
// Create n pairs of integers with object number and offset.
int[][] header = new int[n][];
for (int idx = 0; idx < n; idx++)
{
int number = ReadInteger();
#if DEBUG
if (number == 1074)
GetType();
#endif
int offset = ReadInteger() + first; // Calculate absolute offset.
header[idx] = new int[] { number, offset };
}
return header;
}
/// <summary>
/// Reads the cross-reference table(s) and their trailer dictionary or
/// cross-reference streams.
/// </summary>
internal PdfTrailer ReadTrailer()
{
int length = _lexer.PdfLength;
// Implementation note 18 Appendix H:
// Acrobat viewers require only that the %%EOF marker appear somewhere within the last 1024 bytes of the file.
int idx;
if (length < 1030)
{
// Reading the final 30 bytes should work for all files. But often it does not.
string trail = _lexer.ReadRawString(length - 31, 30); //lexer.Pdf.Substring(length - 30);
idx = trail.LastIndexOf("startxref", StringComparison.Ordinal);
_lexer.Position = length - 31 + idx;
}
else
{
// For larger files we read 1 kiB - in most cases we find "startxref" in that range.
string trail = _lexer.ReadRawString(length - 1031, 1030);
idx = trail.LastIndexOf("startxref", StringComparison.Ordinal);
_lexer.Position = length - 1031 + idx;
}
// SAP sometimes creates files with a size of several MByte and places "startxref" somewhere in the middle...
if (idx == -1)
{
// If "startxref" was still not found yet, read the file completely.
string trail = _lexer.ReadRawString(0, length);
idx = trail.LastIndexOf("startxref", StringComparison.Ordinal);
_lexer.Position = idx;
}
if (idx == -1)
throw new Exception("The StartXRef table could not be found, the file cannot be opened.");
ReadSymbol(Symbol.StartXRef);
_lexer.Position = ReadInteger();
// Read all trailers.
while (true)
{
PdfTrailer trailer = ReadXRefTableAndTrailer(_document._irefTable);
// 1st trailer seems to be the best.
if (_document._trailer == null)
_document._trailer = trailer;
int prev = trailer != null ? trailer.Elements.GetInteger(PdfTrailer.Keys.Prev) : 0;
if (prev == 0)
break;
//if (prev > lexer.PdfLength)
// break;
_lexer.Position = prev;
}
return _document._trailer;
}
/// <summary>
/// Reads cross reference table(s) and trailer(s).
/// </summary>
private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
{
Debug.Assert(xrefTable != null);
Symbol symbol = ScanNextToken();
if (symbol == Symbol.XRef) // Is it a cross-reference table?
{
// Reference: 3.4.3 Cross-Reference Table / Page 93
while (true)
{
symbol = ScanNextToken();
if (symbol == Symbol.Integer)
{
int start = _lexer.TokenToInteger;
int length = ReadInteger();
for (int id = start; id < start + length; id++)
{
int position = ReadInteger();
int generation = ReadInteger();
ReadSymbol(Symbol.Keyword);
string token = _lexer.Token;
// Skip start entry.
if (id == 0)
continue;
// Skip unused entries.
if (token != "n")
continue;
#if true
//!!!new 2018-03-14 begin
// Check if the object at the address has the correct ID and generation.
int idToUse = id;
int idChecked, generationChecked;
if (!CheckXRefTableEntry(position, id, generation, out idChecked, out generationChecked))
{
// Found the keyword "obj", but ID or generation did not match.
// There is a tool where ID is off by one. In this case we use the ID from the object, not the ID from the XRef table.
if (generation == generationChecked && id == idChecked + 1)
idToUse = idChecked;
else
ParserDiagnostics.ThrowParserException("Invalid entry in XRef table, ID=" + id + ", Generation=" + generation + ", Position=" + position + ", ID of referenced object=" + idChecked + ", Generation of referenced object=" + generationChecked); // TODO L10N using PSSR.
}
//!!!new 2018-03-14 end
#endif
// Even if it is restricted, an object can exist in more than one subsection.
// (PDF Reference Implementation Notes 15).
PdfObjectID objectID = new PdfObjectID(idToUse, generation);
// Ignore the latter one.
if (xrefTable.Contains(objectID))
continue;
xrefTable.Add(new PdfReference(objectID, position));
}
}
else if (symbol == Symbol.Trailer)
{
ReadSymbol(Symbol.BeginDictionary);
PdfTrailer trailer = new PdfTrailer(_document);
ReadDictionary(trailer, false);
return trailer;
}
else
ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
}
}
// ReSharper disable once RedundantIfElseBlock because of code readability.
else if (symbol == Symbol.Integer) // Is it an cross-reference stream?
{
// Reference: 3.4.7 Cross-Reference Streams / Page 93
// TODO: Handle PDF files larger than 2 GiB, see implementation note 21 in Appendix H.
// The parsed integer is the object id of the cross-reference stream.
return ReadXRefStream(xrefTable);
}
return null;
}
/// <summary>
/// Checks the x reference table entry. Returns true if everything is correct.
/// Return false if the keyword "obj" was found, but ID or Generation are incorrect.
/// Throws an exception otherwise.
/// </summary>
/// <param name="position">The position where the object is supposed to be.</param>
/// <param name="id">The ID from the XRef table.</param>
/// <param name="generation">The generation from the XRef table.</param>
/// <param name="idChecked">The identifier found in the PDF file.</param>
/// <param name="generationChecked">The generation found in the PDF file.</param>
/// <returns></returns>
private bool CheckXRefTableEntry(int position, int id, int generation, out int idChecked, out int generationChecked)
{
int origin = _lexer.Position;
idChecked = -1;
generationChecked = -1;
try
{
_lexer.Position = position;
idChecked = ReadInteger();
generationChecked = ReadInteger();
//// TODO Should we use ScanKeyword here?
//ReadKSymbol(Symbol.Keyword);
//string token = _lexer.Token;
Symbol symbol = _lexer.ScanNextToken();
if (symbol != Symbol.Obj)
ParserDiagnostics.ThrowParserException("Invalid entry in XRef table, ID=" + id + ", Generation=" + generation + ", Position=" + position); // TODO L10N using PSSR.
if (id != idChecked || generation != generationChecked)
return false;
}
catch (PdfReaderException)
{
throw;
}
catch (Exception ex)
{
ParserDiagnostics.ThrowParserException("Invalid entry in XRef table, ID=" + id + ", Generation=" + generation + ", Position=" + position, ex); // TODO L10N using PSSR.
}
finally
{
_lexer.Position = origin;
}
return true;
}
/// <summary>
/// Reads cross reference stream(s).
/// </summary>
private PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable)
{
// Read cross reference stream.
//Debug.Assert(_lexer.Symbol == Symbol.Integer);
int number = _lexer.TokenToInteger;
int generation = ReadInteger();
Debug.Assert(generation == 0);
ReadSymbol(Symbol.Obj);
ReadSymbol(Symbol.BeginDictionary);
PdfObjectID objectID = new PdfObjectID(number, generation);
PdfCrossReferenceStream xrefStream = new PdfCrossReferenceStream(_document);
ReadDictionary(xrefStream, false);
ReadSymbol(Symbol.BeginStream);
ReadStream(xrefStream);
//xrefTable.Add(new PdfReference(objectID, position));
PdfReference iref = new PdfReference(xrefStream);
iref.ObjectID = objectID;
iref.Value = xrefStream;
xrefTable.Add(iref);
Debug.Assert(xrefStream.Stream != null);
//string sValue = new RawEncoding().GetString(xrefStream.Stream.UnfilteredValue,);
//sValue.GetType();
byte[] bytesRaw = xrefStream.Stream.UnfilteredValue;
byte[] bytes = bytesRaw;
// HACK: Should be done in UnfilteredValue.
if (xrefStream.Stream.HasDecodeParams)
{
int predictor = xrefStream.Stream.DecodePredictor;
int columns = xrefStream.Stream.DecodeColumns;
bytes = DecodeCrossReferenceStream(bytesRaw, columns, predictor);
}
#if DEBUG_
for (int idx = 0; idx < bytes.Length; idx++)
{
if (idx % 4 == 0)
Console.WriteLine();
Console.Write("{0:000} ", (int)bytes[idx]);
}
Console.WriteLine();
#endif
// bytes.GetType();
// Add to table.
// xrefTable.Add(new PdfReference(objectID, -1));
int size = xrefStream.Elements.GetInteger(PdfCrossReferenceStream.Keys.Size);
PdfArray index = xrefStream.Elements.GetValue(PdfCrossReferenceStream.Keys.Index) as PdfArray;
int prev = xrefStream.Elements.GetInteger(PdfCrossReferenceStream.Keys.Prev);
PdfArray w = (PdfArray)xrefStream.Elements.GetValue(PdfCrossReferenceStream.Keys.W);
// E.g.: W[1 2 1] <20> Index[7 12] <20> Size 19
// Setup subsections.
int subsectionCount;
int[][] subsections = null;
int subsectionEntryCount = 0;
if (index == null)
{
// Setup with default values.
subsectionCount = 1;
subsections = new int[subsectionCount][];
subsections[0] = new int[] { 0, size }; // HACK: What is size? Contradiction in PDF reference.
subsectionEntryCount = size;
}
else
{
// Read subsections from array.
Debug.Assert(index.Elements.Count % 2 == 0);
subsectionCount = index.Elements.Count / 2;
subsections = new int[subsectionCount][];
for (int idx = 0; idx < subsectionCount; idx++)
{
subsections[idx] = new int[] { index.Elements.GetInteger(2 * idx), index.Elements.GetInteger(2 * idx + 1) };
subsectionEntryCount += subsections[idx][1];
}
}
// W key.
Debug.Assert(w.Elements.Count == 3);
int[] wsize = { w.Elements.GetInteger(0), w.Elements.GetInteger(1), w.Elements.GetInteger(2) };
int wsum = StreamHelper.WSize(wsize);
if (wsum * subsectionEntryCount != bytes.Length)
GetType();
Debug.Assert(wsum * subsectionEntryCount == bytes.Length, "Check implementation here.");
int testcount = subsections[0][1];
int[] currentSubsection = subsections[0];
#if DEBUG && CORE
if (PdfDiagnostics.TraceXrefStreams)
{
for (int idx = 0; idx < testcount; idx++)
{
uint field1 = StreamHelper.ReadBytes(bytes, idx * wsum, wsize[0]);
uint field2 = StreamHelper.ReadBytes(bytes, idx * wsum + wsize[0], wsize[1]);
uint field3 = StreamHelper.ReadBytes(bytes, idx * wsum + wsize[0] + wsize[1], wsize[2]);
string res = String.Format("{0,2:00}: {1} {2,5} {3} // ", idx, field1, field2, field3);
switch (field1)
{
case 0:
res += "Fee list: object number, generation number";
break;
case 1:
res += "Not compresed: offset, generation number";
break;
case 2:
res += "Compressed: object stream object number, index in stream";
break;
default:
res += "??? Type undefined";
break;
}
Debug.WriteLine(res);
}
}
#endif
int index2 = -1;
for (int ssc = 0; ssc < subsectionCount; ssc++)
{
int abc = subsections[ssc][1];
for (int idx = 0; idx < abc; idx++)
{
index2++;
PdfCrossReferenceStream.CrossReferenceStreamEntry item =
new PdfCrossReferenceStream.CrossReferenceStreamEntry();
item.Type = StreamHelper.ReadBytes(bytes, index2 * wsum, wsize[0]);
item.Field2 = StreamHelper.ReadBytes(bytes, index2 * wsum + wsize[0], wsize[1]);
item.Field3 = StreamHelper.ReadBytes(bytes, index2 * wsum + wsize[0] + wsize[1], wsize[2]);
xrefStream.Entries.Add(item);
switch (item.Type)
{
case 0:
// Nothing to do, not needed.
break;
case 1: // offset / generation number
//// Even it is restricted, an object can exists in more than one subsection.
//// (PDF Reference Implementation Notes 15).
int position = (int)item.Field2;
objectID = ReadObjectNumber(position);
#if DEBUG
if (objectID.ObjectNumber == 1074)
GetType();
#endif
Debug.Assert(objectID.GenerationNumber == item.Field3);
//// Ignore the latter one.
if (!xrefTable.Contains(objectID))
{
#if DEBUG
GetType();
#endif
// Add iref for all uncrompressed objects.
xrefTable.Add(new PdfReference(objectID, position));
}
break;
case 2:
// Nothing to do yet.
break;
}
}
}
return xrefStream;
}
/// <summary>
/// Parses a PDF date string.
/// </summary>
internal static DateTime ParseDateTime(string date, DateTime errorValue) // TODO: TryParseDateTime
{
DateTime datetime = errorValue;
try
{
if (date.StartsWith("D:"))
{
// Format is
// D:YYYYMMDDHHmmSSOHH'mm'
// ^2 ^10 ^16 ^20
int length = date.Length;
int year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0, hh = 0, mm = 0;
char o = 'Z';
if (length >= 10)
{
year = Int32.Parse(date.Substring(2, 4));
month = Int32.Parse(date.Substring(6, 2));
day = Int32.Parse(date.Substring(8, 2));
if (length >= 16)
{
hour = Int32.Parse(date.Substring(10, 2));
minute = Int32.Parse(date.Substring(12, 2));
second = Int32.Parse(date.Substring(14, 2));
if (length >= 23)
{
if ((o = date[16]) != 'Z')
{
hh = Int32.Parse(date.Substring(17, 2));
mm = Int32.Parse(date.Substring(20, 2));
}
}
}
}
// There are miserable PDF tools around the world.
month = Math.Min(Math.Max(month, 1), 12);
datetime = new DateTime(year, month, day, hour, minute, second);
if (o != 'Z')
{
TimeSpan ts = new TimeSpan(hh, mm, 0);
if (o == '-')
datetime = datetime.Add(ts);
else
datetime = datetime.Subtract(ts);
}
// Now that we converted datetime to UTC, mark it as UTC.
DateTime.SpecifyKind(datetime, DateTimeKind.Utc);
}
else
{
// Some libraries use plain English format.
datetime = DateTime.Parse(date, CultureInfo.InvariantCulture);
}
}
// ReSharper disable once EmptyGeneralCatchClause
catch (Exception ex)
{
// If we cannot parse datetime, just eat it, but give a hint in DEBUG build.
Debug.Assert(false, ex.Message);
}
return datetime;
}
// /// <summary>
// /// Creates a parser for the specified PDF object type. A PDF object can define a specialized
// /// parser in the optional PdfObjectInfoAttribute. If no parser is specified, the default
// /// Parser object is returned.
// /// </summary>
// public static Parser CreateParser(PdfDocument document, Type pdfObjectType)
// {
// // TODO: ParserFactory
// object[] attribs = null; //pdfObjectType.GetCustomAttributes(typeof(PdfObjectInfoAttribute), false);
// if (attribs.Length == 1)
// {
// PdfObjectInfoAttribute attrib = null; //(PdfObjectInfoAttribute)attribs[0];
// Type parserType = attrib.Parser;
// if (parserType != null)
// {
// ConstructorInfo ctorInfo = parserType.GetConstructor(
// BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null,
// new Type[]{typeof(PdfDocument)}, null);
// Parser parser = (Parser)ctorInfo.Invoke(new object[]{document});
// Debug.Assert(parser != null, "Creation of parser failed.");
// return parser;
// }
// }
// return new Parser(document);
// }
/*
/// <summary>
/// Reads a date value directly or (optionally) indirectly from the PDF data stream.
/// </summary>
protected DateTime ReadDate(bool canBeIndirect)
{
Symbol symbol = lexer.ScanNextToken(canBeIndirect);
if (symbol == Symbol.String || symbol == Symbol.UnicodeString || symbol == Symbol.HexString || symbol == Symbol.UnicodeHexString)
{
// D:YYYYMMDDHHmmSSOHH'mm'
// ^2 ^10 ^16 ^20
string date = lexer.Token;
int length = date.Length;
int year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0, hh = 0, mm = 0;
char o = 'Z';
if (length >= 10)
{
year = Int32.Parse(date.Substring(2, 4));
month = Int32.Parse(date.Substring(6, 2));
day = Int32.Parse(date.Substring(8, 2));
if (length >= 16)
{
hour = Int32.Parse(date.Substring(10, 2));
minute = Int32.Parse(date.Substring(12, 2));
second = Int32.Parse(date.Substring(14, 2));
if (length >= 23)
{
if ((o = date[16]) != 'Z')
{
hh = Int32.Parse(date.Substring(17, 2));
mm = Int32.Parse(date.Substring(20, 2));
}
}
}
}
DateTime datetime = new DateTime(year, month, day, hour, minute, second);
if (o != 'Z')
{
TimeSpan ts = new TimeSpan(hh, mm, 0);
if (o == '+')
datetime.Add(ts);
else
datetime.Subtract(ts);
}
return datetime;
}
else if (symbol == Symbol.R)
{
int position = lexer.Position;
MoveToObject(lexer.Token);
ReadObjectID(null);
DateTime d = ReadDate();
ReadSymbol(Symbol.EndObj);
lexer.Position = position;
return d;
}
thr ow new PdfReaderException(PSSR.UnexpectedToken(lexer.Token));
}
protected DateTime ReadDate()
{
return ReadDate(false);
}
/// <summary>
/// Reads a PdfRectangle value directly or (optionally) indirectly from the PDF data stream.
/// </summary>
protected PdfRectangle ReadRectangle(bool canBeIndirect)
{
Symbol symbol = lexer.ScanNextToken(canBeIndirect);
if (symbol == Symbol.BeginArray)
{
PdfRectangle rect = new PdfRectangle();
rect.X1 = ReadReal();
rect.Y1 = ReadReal();
rect.X2 = ReadReal();
rect.Y2 = ReadReal();
ReadSymbol(Symbol.EndArray);
return rect;
}
else if (symbol == Symbol.R)
{
int position = lexer.Position;
MoveToObject(lexer.Token);
ReadObjectID(null);
PdfRectangle rect = ReadRectangle();
ReadSymbol(Symbol.EndObj);
lexer.Position = position;
return rect;
}
thr ow new PdfReaderException(PSSR.UnexpectedToken(lexer.Token));
}
/// <summary>
/// Short cut for ReadRectangle(false).
/// </summary>
protected PdfRectangle ReadRectangle()
{
return ReadRectangle(false);
}
/// <summary>
/// Reads a generic dictionary.
/// </summary>
protected PdfDictionary ReadDictionary(bool canBeIndirect)
{
// Just read over dictionary
PdfDictionary dictionary = new PdfDictionary();
Symbol symbol = lexer.ScanNextToken(canBeIndirect);
if (symbol == Symbol.BeginDictionary)
{
int nestingLevel = 0;
symbol = ScanNextToken();
while (symbol != Symbol.Eof)
{
switch (symbol)
{
case Symbol.BeginDictionary:
nestingLevel++;
break;
case Symbol.EndDictionary:
if (nestingLevel == 0)
return dictionary;
else
nestingLevel--;
break;
}
symbol = ScanNextToken();
}
Debug.Assert(false, "Must not come here");
return dictionary;
}
else if (symbol == Symbol.R)
{
return dictionary;
}
thr ow new PdfReaderException(PSSR.UnexpectedToken(lexer.Token));
}
/// <summary>
/// Short cut for ReadDictionary(false).
/// </summary>
protected PdfDictionary ReadDictionary()
{
return ReadDictionary(false);
}
/// <summary>
/// Reads a generic array.
/// </summary>
protected PdfArray ReadArray(bool canBeIndirect)
{
// Just read over array
PdfArray array = new PdfArray();
Symbol symbol = lexer.ScanNextToken(canBeIndirect);
if (symbol == Symbol.BeginArray)
{
int nestingLevel = 0;
symbol = ScanNextToken();
while (symbol != Symbol.Eof)
{
switch (symbol)
{
case Symbol.BeginArray:
nestingLevel++;
break;
case Symbol.EndArray:
if (nestingLevel == 0)
return array;
else
nestingLevel--;
break;
}
symbol = ScanNextToken();
}
Debug.Assert(false, "Must not come here");
return array;
}
else if (symbol == Symbol.R)
{
return array;
}
th row new PdfReaderException(PSSR.UnexpectedToken(lexer.Token));
}
protected PdfArray ReadArray()
{
return ReadArray(false);
}
protected object ReadGeneric(KeysMeta meta, string token)
{
KeyDescriptor descriptor = meta[token];
Debug.Assert(descriptor != null);
object result = null;
switch (descriptor.KeyType & KeyType.TypeMask)
{
case KeyType.Name:
result = ReadName();
break;
case KeyType.String:
result = ReadString(descriptor.CanBeIndirect);
break;
case KeyType.Boolean:
result = ReadBoolean(descriptor.CanBeIndirect);
break;
case KeyType.Integer:
result = ReadInteger(descriptor.CanBeIndirect);
break;
case KeyType.Real:
result = ReadReal(descriptor.CanBeIndirect);
break;
case KeyType.Date:
result = ReadDate(descriptor.CanBeIndirect);
break;
case KeyType.Rectangle:
result = ReadRectangle(descriptor.CanBeIndirect);
break;
case KeyType.Array:
result = ReadArray(descriptor.CanBeIndirect);
break;
case KeyType.Dictionary:
result = ReadDictionary(descriptor.CanBeIndirect);
break;
case KeyType.Stream:
break;
case KeyType.NumberTree:
thr ow new NotImplementedException("KeyType.NumberTree");
case KeyType.NameOrArray:
char ch = lexer.MoveToNonWhiteSpace();
if (ch == '/')
result = ReadName();
else if (ch == '[')
result = ReadArray();
else
th row new NotImplementedException("KeyType.NameOrArray");
break;
case KeyType.ArrayOrDictionary:
thr ow new NotImplementedException("KeyType.ArrayOrDictionary");
}
//Debug.Assert(false, "ReadGeneric");
return result;
}
// /// <summary>
// /// Gets the current symbol from the lexer.
// /// </summary>
// protected Symbol Symbol
// {
// get {return lexer.Symbol;}
// }
//
// /// <summary>
// /// Gets the current token from the lexer.
// /// </summary>
// protected string Token
// {
// get {return lexer.Token.ToString();}
// }
public static object Read(PdfObject o, string key)
{
return null;
}
*/
private ParserState SaveState()
{
ParserState state = new ParserState();
state.Position = _lexer.Position;
state.Symbol = _lexer.Symbol;
return state;
}
private void RestoreState(ParserState state)
{
_lexer.Position = state.Position;
_lexer.Symbol = state.Symbol;
}
private class ParserState
{
public int Position;
public Symbol Symbol;
}
byte[] DecodeCrossReferenceStream(byte[] bytes, int columns, int predictor)
{
int size = bytes.Length;
if (predictor < 10 || predictor > 15)
throw new ArgumentException("Invalid predictor.", "predictor");
int rowSizeRaw = columns + 1;
if (size % rowSizeRaw != 0)
throw new ArgumentException("Columns and size of array do not match.");
int rows = size / rowSizeRaw;
byte[] result = new byte[rows * columns];
#if DEBUG
for (int i = 0; i < result.Length; ++i)
result[i] = 88;
#endif
for (int row = 0; row < rows; ++row)
{
if (bytes[row * rowSizeRaw] != 2)
throw new ArgumentException("Invalid predictor in array.");
for (int col = 0; col < columns; ++col)
{
// Copy data for first row.
if (row == 0)
result[row * columns + col] = bytes[row * rowSizeRaw + col + 1];
else
{
// For other rows, add previous row.
result[row * columns + col] = (byte)(result[row * columns - columns + col] + bytes[row * rowSizeRaw + col + 1]);
}
}
}
return result;
}
private readonly PdfDocument _document;
private readonly Lexer _lexer;
private readonly ShiftStack _stack;
}
static class StreamHelper
{
public static int WSize(int[] w)
{
Debug.Assert(w.Length == 3);
return w[0] + w[1] + w[2];
}
public static uint ReadBytes(byte[] bytes, int index, int byteCount)
{
uint value = 0;
for (int idx = 0; idx < byteCount; idx++)
{
value *= 256;
value += bytes[index + idx];
}
return value;
}
}
}