#region PDFsharp - A .NET library for processing PDF // // Authors: // Stefan Lange // // Copyright (c) 2005-2017 empira Software GmbH, Cologne Area (Germany) // // http://www.pdfsharp.com // http://sourceforge.net/projects/pdfsharp // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), // to deal in the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. #endregion using System; using System.Diagnostics; using System.Collections; using System.Collections.Generic; using PdfSharp.Pdf.IO; namespace PdfSharp.Pdf.Advanced { /// /// Represents the cross-reference table of a PDF document. /// It contains all indirect objects of a document. /// internal sealed class PdfCrossReferenceTable // Must not be derive from PdfObject. { public PdfCrossReferenceTable(PdfDocument document) { _document = document; } readonly PdfDocument _document; /// /// Represents the relation between PdfObjectID and PdfReference for a PdfDocument. /// public Dictionary ObjectTable = new Dictionary(); internal bool IsUnderConstruction { get { return _isUnderConstruction; } set { _isUnderConstruction = value; } } bool _isUnderConstruction; /// /// Adds a cross reference entry to the table. Used when parsing the trailer. /// public void Add(PdfReference iref) { #if DEBUG if (iref.ObjectID.ObjectNumber == 948) GetType(); #endif if (iref.ObjectID.IsEmpty) iref.ObjectID = new PdfObjectID(GetNewObjectNumber()); if (ObjectTable.ContainsKey(iref.ObjectID)) throw new InvalidOperationException("Object already in table."); ObjectTable.Add(iref.ObjectID, iref); } /// /// Adds a PdfObject to the table. /// public void Add(PdfObject value) { if (value.Owner == null) value.Document = _document; else Debug.Assert(value.Owner == _document); if (value.ObjectID.IsEmpty) value.SetObjectID(GetNewObjectNumber(), 0); if (ObjectTable.ContainsKey(value.ObjectID)) throw new InvalidOperationException("Object already in table."); ObjectTable.Add(value.ObjectID, value.Reference); } public void Remove(PdfReference iref) { ObjectTable.Remove(iref.ObjectID); } /// /// Gets a cross reference entry from an object identifier. /// Returns null if no object with the specified ID exists in the object table. /// public PdfReference this[PdfObjectID objectID] { get { PdfReference iref; ObjectTable.TryGetValue(objectID, out iref); return iref; } } /// /// Indicates whether the specified object identifier is in the table. /// public bool Contains(PdfObjectID objectID) { return ObjectTable.ContainsKey(objectID); } //public PdfObject GetObject(PdfObjectID objectID) //{ // return this[objectID].Value; //} // /// // /// Gets the entry for the specified object, or null, if the object is not in // /// this XRef table. // /// // internal PdfReference GetEntry(PdfObjectID objectID) // { // return this[objectID]; // } /// /// Returns the next free object number. /// public int GetNewObjectNumber() { // New objects are numbered consecutively. If a document is imported, maxObjectNumber is // set to the highest object number used in the document. return ++_maxObjectNumber; } internal int _maxObjectNumber; /// /// Writes the xref section in pdf stream. /// internal void WriteObject(PdfWriter writer) { writer.WriteRaw("xref\n"); PdfReference[] irefs = AllReferences; int count = irefs.Length; writer.WriteRaw(String.Format("0 {0}\n", count + 1)); writer.WriteRaw(String.Format("{0:0000000000} {1:00000} {2} \n", 0, 65535, "f")); //PdfEncoders.WriteAnsi(stream, text); for (int idx = 0; idx < count; idx++) { PdfReference iref = irefs[idx]; // Acrobat is very pedantic; it must be exactly 20 bytes per line. writer.WriteRaw(String.Format("{0:0000000000} {1:00000} {2} \n", iref.Position, iref.GenerationNumber, "n")); } } /// /// Gets an array of all object identifiers. For debugging purposes only. /// internal PdfObjectID[] AllObjectIDs { get { ICollection collection = ObjectTable.Keys; PdfObjectID[] objectIDs = new PdfObjectID[collection.Count]; collection.CopyTo(objectIDs, 0); return objectIDs; } } /// /// Gets an array of all cross references in ascending order by their object identifier. /// internal PdfReference[] AllReferences { get { Dictionary.ValueCollection collection = ObjectTable.Values; List list = new List(collection); list.Sort(PdfReference.Comparer); PdfReference[] irefs = new PdfReference[collection.Count]; list.CopyTo(irefs, 0); return irefs; } } internal void HandleOrphanedReferences() { } /// /// Removes all objects that cannot be reached from the trailer. /// Returns the number of removed objects. /// internal int Compact() { // TODO: remove PdfBooleanObject, PdfIntegerObject etc. int removed = ObjectTable.Count; //CheckConsistence(); // TODO: Is this really so easy? PdfReference[] irefs = TransitiveClosure(_document._trailer); #if DEBUG // Have any two objects the same ID? Dictionary ids = new Dictionary(); foreach (PdfObjectID objID in ObjectTable.Keys) { ids.Add(objID.ObjectNumber, 0); } // Have any two irefs the same value? //Dictionary ids = new Dictionary(); ids.Clear(); foreach (PdfReference iref in ObjectTable.Values) { ids.Add(iref.ObjectNumber, 0); } // Dictionary refs = new Dictionary(); foreach (PdfReference iref in irefs) { refs.Add(iref, 0); } foreach (PdfReference value in ObjectTable.Values) { if (!refs.ContainsKey(value)) value.GetType(); } foreach (PdfReference iref in ObjectTable.Values) { if (iref.Value == null) GetType(); Debug.Assert(iref.Value != null); } foreach (PdfReference iref in irefs) { if (!ObjectTable.ContainsKey(iref.ObjectID)) GetType(); Debug.Assert(ObjectTable.ContainsKey(iref.ObjectID)); if (iref.Value == null) GetType(); Debug.Assert(iref.Value != null); } #endif _maxObjectNumber = 0; ObjectTable.Clear(); foreach (PdfReference iref in irefs) { // This if is needed for corrupt PDF files from the wild. // Without the if, an exception will be thrown if the file contains duplicate IDs ("An item with the same key has already been added to the dictionary."). // With the if, the first object with the ID will be used and later objects with the same ID will be ignored. if (!ObjectTable.ContainsKey(iref.ObjectID)) { ObjectTable.Add(iref.ObjectID, iref); _maxObjectNumber = Math.Max(_maxObjectNumber, iref.ObjectNumber); } } //CheckConsistence(); removed -= ObjectTable.Count; return removed; } /// /// Renumbers the objects starting at 1. /// internal void Renumber() { //CheckConsistence(); PdfReference[] irefs = AllReferences; ObjectTable.Clear(); // Give all objects a new number. int count = irefs.Length; for (int idx = 0; idx < count; idx++) { PdfReference iref = irefs[idx]; #if DEBUG_ if (iref.ObjectNumber == 1108) GetType(); #endif iref.ObjectID = new PdfObjectID(idx + 1); // Rehash with new number. ObjectTable.Add(iref.ObjectID, iref); } _maxObjectNumber = count; //CheckConsistence(); } /// /// Checks the logical consistence for debugging purposes (useful after reconstruction work). /// [Conditional("DEBUG_")] public void CheckConsistence() { Dictionary ht1 = new Dictionary(); foreach (PdfReference iref in ObjectTable.Values) { Debug.Assert(!ht1.ContainsKey(iref), "Duplicate iref."); Debug.Assert(iref.Value != null); ht1.Add(iref, null); } Dictionary ht2 = new Dictionary(); foreach (PdfReference iref in ObjectTable.Values) { Debug.Assert(!ht2.ContainsKey(iref.ObjectID), "Duplicate iref."); ht2.Add(iref.ObjectID, null); } ICollection collection = ObjectTable.Values; int count = collection.Count; PdfReference[] irefs = new PdfReference[count]; collection.CopyTo(irefs, 0); #if true for (int i = 0; i < count; i++) for (int j = 0; j < count; j++) if (i != j) { Debug.Assert(ReferenceEquals(irefs[i].Document, _document)); Debug.Assert(irefs[i] != irefs[j]); Debug.Assert(!ReferenceEquals(irefs[i], irefs[j])); Debug.Assert(!ReferenceEquals(irefs[i].Value, irefs[j].Value)); Debug.Assert(!Equals(irefs[i].ObjectID, irefs[j].Value.ObjectID)); Debug.Assert(irefs[i].ObjectNumber != irefs[j].Value.ObjectNumber); Debug.Assert(ReferenceEquals(irefs[i].Document, irefs[j].Document)); GetType(); } #endif } ///// ///// The garbage collector for PDF objects. ///// //public sealed class GC //{ // PdfXRefTable xrefTable; // // internal GC(PdfXRefTable xrefTable) // { // _xrefTable = xrefTable; // } // // public void Collect() // { } // // public PdfReference[] ReachableObjects() // { // Hash_table objects = new Hash_table(); // TransitiveClosure(objects, _xrefTable.document.trailer); // } /// /// Calculates the transitive closure of the specified PdfObject, i.e. all indirect objects /// recursively reachable from the specified object. /// public PdfReference[] TransitiveClosure(PdfObject pdfObject) { return TransitiveClosure(pdfObject, short.MaxValue); } /// /// Calculates the transitive closure of the specified PdfObject with the specified depth, i.e. all indirect objects /// recursively reachable from the specified object in up to maximally depth steps. /// public PdfReference[] TransitiveClosure(PdfObject pdfObject, int depth) { CheckConsistence(); Dictionary objects = new Dictionary(); _overflow = new Dictionary(); TransitiveClosureImplementation(objects, pdfObject); TryAgain: if (_overflow.Count > 0) { PdfObject[] array = new PdfObject[_overflow.Count]; _overflow.Keys.CopyTo(array, 0); _overflow = new Dictionary(); for (int idx = 0; idx < array.Length; idx++) { PdfObject obj = array[idx]; TransitiveClosureImplementation(objects, obj); } goto TryAgain; } CheckConsistence(); ICollection collection = objects.Keys; int count = collection.Count; PdfReference[] irefs = new PdfReference[count]; collection.CopyTo(irefs, 0); #if true_ for (int i = 0; i < count; i++) for (int j = 0; j < count; j++) if (i != j) { Debug.Assert(ReferenceEquals(irefs[i].Document, _document)); Debug.Assert(irefs[i] != irefs[j]); Debug.Assert(!ReferenceEquals(irefs[i], irefs[j])); Debug.Assert(!ReferenceEquals(irefs[i].Value, irefs[j].Value)); Debug.Assert(!Equals(irefs[i].ObjectID, irefs[j].Value.ObjectID)); Debug.Assert(irefs[i].ObjectNumber != irefs[j].Value.ObjectNumber); Debug.Assert(ReferenceEquals(irefs[i].Document, irefs[j].Document)); GetType(); } #endif return irefs; } static int _nestingLevel; Dictionary _overflow = new Dictionary(); void TransitiveClosureImplementation(Dictionary objects, PdfObject pdfObject/*, ref int depth*/) { try { _nestingLevel++; if (_nestingLevel >= 1000) { if (!_overflow.ContainsKey(pdfObject)) _overflow.Add(pdfObject, null); return; } #if DEBUG_ //enterCount++; if (enterCount == 5400) GetType(); //if (!Object.ReferenceEquals(pdfObject.Owner, _document)) // GetType(); //////Debug.Assert(Object.ReferenceEquals(pdfObject27.Document, _document)); // if (item is PdfObject && ((PdfObject)item).ObjectID.ObjectNumber == 5) // Debug.WriteLine("items: " + ((PdfObject)item).ObjectID.ToString()); //if (pdfObject.ObjectNumber == 5) // GetType(); #endif IEnumerable enumerable = null; //(IEnumerator)pdfObject; PdfDictionary dict; PdfArray array; if ((dict = pdfObject as PdfDictionary) != null) enumerable = dict.Elements.Values; else if ((array = pdfObject as PdfArray) != null) enumerable = array.Elements; else Debug.Assert(false, "Should not come here."); if (enumerable != null) { foreach (PdfItem item in enumerable) { PdfReference iref = item as PdfReference; if (iref != null) { // Is this an indirect reference to an object that does not exist? //if (iref.Document == null) //{ // Debug.WriteLine("Dead object detected: " + iref.ObjectID.ToString()); // PdfReference dead = DeadObject; // iref.ObjectID = dead.ObjectID; // iref.Document = _document; // iref.SetObject(dead.Value); // PdfDictionary dict = (PdfDictionary)dead.Value; // dict.Elements["/DeadObjectCount"] = // new PdfInteger(dict.Elements.GetInteger("/DeadObjectCount") + 1); // iref = dead; //} if (!ReferenceEquals(iref.Document, _document)) { GetType(); Debug.WriteLine(String.Format("Bad iref: {0}", iref.ObjectID.ToString())); } Debug.Assert(ReferenceEquals(iref.Document, _document) || iref.Document == null, "External object detected!"); #if DEBUG_ if (iref.ObjectID.ObjectNumber == 23) GetType(); #endif if (!objects.ContainsKey(iref)) { PdfObject value = iref.Value; // Ignore unreachable objects. if (iref.Document != null) { // ... from trailer hack if (value == null) { iref = ObjectTable[iref.ObjectID]; Debug.Assert(iref.Value != null); value = iref.Value; } Debug.Assert(ReferenceEquals(iref.Document, _document)); objects.Add(iref, null); //Debug.WriteLine(String.Format("objects.Add('{0}', null);", iref.ObjectID.ToString())); if (value is PdfArray || value is PdfDictionary) TransitiveClosureImplementation(objects, value /*, ref depth*/); } //else //{ // objects2.Add(this[iref.ObjectID], null); //} } } else { PdfObject pdfObject28 = item as PdfObject; //if (pdfObject28 != null) // Debug.Assert(Object.ReferenceEquals(pdfObject28.Document, _document)); if (pdfObject28 != null && (pdfObject28 is PdfDictionary || pdfObject28 is PdfArray)) TransitiveClosureImplementation(objects, pdfObject28 /*, ref depth*/); } } } } finally { _nestingLevel--; } } /// /// Gets the cross reference to an objects used for undefined indirect references. /// public PdfReference DeadObject { get { if (_deadObject == null) { _deadObject = new PdfDictionary(_document); Add(_deadObject); _deadObject.Elements.Add("/DeadObjectCount", new PdfInteger()); } return _deadObject.Reference; } } PdfDictionary _deadObject; } ///// ///// Represents the cross-reference table of a PDF document. ///// It contains all indirect objects of a document. ///// //internal sealed class PdfCrossReferenceStreamTable // Must not be derive from PdfObject. //{ // public PdfCrossReferenceStreamTable(PdfDocument document) // { // _document = document; // } // readonly PdfDocument _document; // public class Item // { // public PdfReference Reference; // public readonly List Entries = new List(); // } //} //struct CrossReferenceStreamEntry //{ // public int Type; // public int Field2; // public int Field3; //} }