520 lines
20 KiB
C#
520 lines
20 KiB
C#
|
#region PDFsharp - A .NET library for processing PDF
|
|||
|
//
|
|||
|
// Authors:
|
|||
|
// Stefan Lange
|
|||
|
//
|
|||
|
// Copyright (c) 2005-2017 empira Software GmbH, Cologne Area (Germany)
|
|||
|
//
|
|||
|
// http://www.pdfsharp.com
|
|||
|
// http://sourceforge.net/projects/pdfsharp
|
|||
|
//
|
|||
|
// Permission is hereby granted, free of charge, to any person obtaining a
|
|||
|
// copy of this software and associated documentation files (the "Software"),
|
|||
|
// to deal in the Software without restriction, including without limitation
|
|||
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|||
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|||
|
// Software is furnished to do so, subject to the following conditions:
|
|||
|
//
|
|||
|
// The above copyright notice and this permission notice shall be included
|
|||
|
// in all copies or substantial portions of the Software.
|
|||
|
//
|
|||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|||
|
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|||
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|||
|
// DEALINGS IN THE SOFTWARE.
|
|||
|
#endregion
|
|||
|
|
|||
|
using System;
|
|||
|
using System.Collections.Generic;
|
|||
|
using System.Diagnostics;
|
|||
|
using System.IO;
|
|||
|
using PdfSharp.Internal;
|
|||
|
using PdfSharp.Pdf.Advanced;
|
|||
|
using PdfSharp.Pdf.Security;
|
|||
|
using PdfSharp.Pdf.Internal;
|
|||
|
|
|||
|
namespace PdfSharp.Pdf.IO
|
|||
|
{
|
|||
|
/// <summary>
|
|||
|
/// Encapsulates the arguments of the PdfPasswordProvider delegate.
|
|||
|
/// </summary>
|
|||
|
public class PdfPasswordProviderArgs
|
|||
|
{
|
|||
|
/// <summary>
|
|||
|
/// Sets the password to open the document with.
|
|||
|
/// </summary>
|
|||
|
public string Password;
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// When set to true the PdfReader.Open function returns null indicating that no PdfDocument was created.
|
|||
|
/// </summary>
|
|||
|
public bool Abort;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// A delegated used by the PdfReader.Open function to retrieve a password if the document is protected.
|
|||
|
/// </summary>
|
|||
|
public delegate void PdfPasswordProvider(PdfPasswordProviderArgs args);
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Represents the functionality for reading PDF documents.
|
|||
|
/// </summary>
|
|||
|
public static class PdfReader
|
|||
|
{
|
|||
|
/// <summary>
|
|||
|
/// Determines whether the file specified by its path is a PDF file by inspecting the first eight
|
|||
|
/// bytes of the data. If the file header has the form <20>%PDF-x.y<> the function returns the version
|
|||
|
/// number as integer (e.g. 14 for PDF 1.4). If the file header is invalid or inaccessible
|
|||
|
/// for any reason, 0 is returned. The function never throws an exception.
|
|||
|
/// </summary>
|
|||
|
public static int TestPdfFile(string path)
|
|||
|
{
|
|||
|
#if !NETFX_CORE
|
|||
|
FileStream stream = null;
|
|||
|
try
|
|||
|
{
|
|||
|
int pageNumber;
|
|||
|
string realPath = Drawing.XPdfForm.ExtractPageNumber(path, out pageNumber);
|
|||
|
if (File.Exists(realPath)) // prevent unwanted exceptions during debugging
|
|||
|
{
|
|||
|
stream = new FileStream(realPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
|
|||
|
byte[] bytes = new byte[1024];
|
|||
|
stream.Read(bytes, 0, 1024);
|
|||
|
return GetPdfFileVersion(bytes);
|
|||
|
}
|
|||
|
}
|
|||
|
// ReSharper disable once EmptyGeneralCatchClause
|
|||
|
catch { }
|
|||
|
finally
|
|||
|
{
|
|||
|
try
|
|||
|
{
|
|||
|
if (stream != null)
|
|||
|
{
|
|||
|
#if UWP
|
|||
|
stream.Dispose();
|
|||
|
#else
|
|||
|
stream.Close();
|
|||
|
#endif
|
|||
|
}
|
|||
|
}
|
|||
|
// ReSharper disable once EmptyGeneralCatchClause
|
|||
|
catch
|
|||
|
{
|
|||
|
}
|
|||
|
}
|
|||
|
#endif
|
|||
|
return 0;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Determines whether the specified stream is a PDF file by inspecting the first eight
|
|||
|
/// bytes of the data. If the data begins with <20>%PDF-x.y<> the function returns the version
|
|||
|
/// number as integer (e.g. 14 for PDF 1.4). If the data is invalid or inaccessible
|
|||
|
/// for any reason, 0 is returned. The function never throws an exception.
|
|||
|
/// </summary>
|
|||
|
public static int TestPdfFile(Stream stream)
|
|||
|
{
|
|||
|
long pos = -1;
|
|||
|
try
|
|||
|
{
|
|||
|
pos = stream.Position;
|
|||
|
byte[] bytes = new byte[1024];
|
|||
|
stream.Read(bytes, 0, 1024);
|
|||
|
return GetPdfFileVersion(bytes);
|
|||
|
}
|
|||
|
// ReSharper disable once EmptyGeneralCatchClause
|
|||
|
catch { }
|
|||
|
finally
|
|||
|
{
|
|||
|
try
|
|||
|
{
|
|||
|
if (pos != -1)
|
|||
|
stream.Position = pos;
|
|||
|
}
|
|||
|
// ReSharper disable once EmptyGeneralCatchClause
|
|||
|
catch { }
|
|||
|
}
|
|||
|
return 0;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Determines whether the specified data is a PDF file by inspecting the first eight
|
|||
|
/// bytes of the data. If the data begins with <20>%PDF-x.y<> the function returns the version
|
|||
|
/// number as integer (e.g. 14 for PDF 1.4). If the data is invalid or inaccessible
|
|||
|
/// for any reason, 0 is returned. The function never throws an exception.
|
|||
|
/// </summary>
|
|||
|
public static int TestPdfFile(byte[] data)
|
|||
|
{
|
|||
|
return GetPdfFileVersion(data);
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Implements scanning the PDF file version.
|
|||
|
/// </summary>
|
|||
|
internal static int GetPdfFileVersion(byte[] bytes)
|
|||
|
{
|
|||
|
try
|
|||
|
{
|
|||
|
// Acrobat accepts headers like <20>%!PS-Adobe-N.n PDF-M.m<>...
|
|||
|
string header = PdfEncoders.RawEncoding.GetString(bytes, 0, bytes.Length); // Encoding.ASCII.GetString(bytes);
|
|||
|
if (header[0] == '%' || header.IndexOf("%PDF", StringComparison.Ordinal) >= 0)
|
|||
|
{
|
|||
|
int ich = header.IndexOf("PDF-", StringComparison.Ordinal);
|
|||
|
if (ich > 0 && header[ich + 5] == '.')
|
|||
|
{
|
|||
|
char major = header[ich + 4];
|
|||
|
char minor = header[ich + 6];
|
|||
|
if (major >= '1' && major < '2' && minor >= '0' && minor <= '9')
|
|||
|
return (major - '0') * 10 + (minor - '0');
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
// ReSharper disable once EmptyGeneralCatchClause
|
|||
|
catch { }
|
|||
|
return 0;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(string path, PdfDocumentOpenMode openmode)
|
|||
|
{
|
|||
|
return Open(path, null, openmode, null);
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(string path, PdfDocumentOpenMode openmode, PdfPasswordProvider provider)
|
|||
|
{
|
|||
|
return Open(path, null, openmode, provider);
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(string path, string password, PdfDocumentOpenMode openmode)
|
|||
|
{
|
|||
|
return Open(path, password, openmode, null);
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(string path, string password, PdfDocumentOpenMode openmode, PdfPasswordProvider provider)
|
|||
|
{
|
|||
|
#if !NETFX_CORE
|
|||
|
PdfDocument document;
|
|||
|
Stream stream = null;
|
|||
|
try
|
|||
|
{
|
|||
|
stream = new FileStream(path, FileMode.Open, FileAccess.Read);
|
|||
|
document = Open(stream, password, openmode, provider);
|
|||
|
if (document != null)
|
|||
|
{
|
|||
|
document._fullPath = Path.GetFullPath(path);
|
|||
|
}
|
|||
|
}
|
|||
|
finally
|
|||
|
{
|
|||
|
if (stream != null)
|
|||
|
#if !UWP
|
|||
|
stream.Close();
|
|||
|
#else
|
|||
|
stream.Dispose();
|
|||
|
#endif
|
|||
|
}
|
|||
|
return document;
|
|||
|
#else
|
|||
|
return null;
|
|||
|
#endif
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(string path)
|
|||
|
{
|
|||
|
return Open(path, null, PdfDocumentOpenMode.Modify, null);
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(string path, string password)
|
|||
|
{
|
|||
|
return Open(path, password, PdfDocumentOpenMode.Modify, null);
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(Stream stream, PdfDocumentOpenMode openmode)
|
|||
|
{
|
|||
|
return Open(stream, null, openmode);
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(Stream stream, PdfDocumentOpenMode openmode, PdfPasswordProvider passwordProvider)
|
|||
|
{
|
|||
|
return Open(stream, null, openmode, passwordProvider);
|
|||
|
}
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(Stream stream, string password, PdfDocumentOpenMode openmode)
|
|||
|
{
|
|||
|
return Open(stream, password, openmode, null);
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(Stream stream, string password, PdfDocumentOpenMode openmode, PdfPasswordProvider passwordProvider)
|
|||
|
{
|
|||
|
PdfDocument document;
|
|||
|
try
|
|||
|
{
|
|||
|
Lexer lexer = new Lexer(stream);
|
|||
|
document = new PdfDocument(lexer);
|
|||
|
document._state |= DocumentState.Imported;
|
|||
|
document._openMode = openmode;
|
|||
|
document._fileSize = stream.Length;
|
|||
|
|
|||
|
// Get file version.
|
|||
|
byte[] header = new byte[1024];
|
|||
|
stream.Position = 0;
|
|||
|
stream.Read(header, 0, 1024);
|
|||
|
document._version = GetPdfFileVersion(header);
|
|||
|
if (document._version == 0)
|
|||
|
throw new InvalidOperationException(PSSR.InvalidPdf);
|
|||
|
|
|||
|
document._irefTable.IsUnderConstruction = true;
|
|||
|
Parser parser = new Parser(document);
|
|||
|
// Read all trailers or cross-reference streams, but no objects.
|
|||
|
document._trailer = parser.ReadTrailer();
|
|||
|
if (document._trailer == null)
|
|||
|
ParserDiagnostics.ThrowParserException("Invalid PDF file: no trailer found."); // TODO L10N using PSSR.
|
|||
|
|
|||
|
Debug.Assert(document._irefTable.IsUnderConstruction);
|
|||
|
document._irefTable.IsUnderConstruction = false;
|
|||
|
|
|||
|
// Is document encrypted?
|
|||
|
PdfReference xrefEncrypt = document._trailer.Elements[PdfTrailer.Keys.Encrypt] as PdfReference;
|
|||
|
if (xrefEncrypt != null)
|
|||
|
{
|
|||
|
//xrefEncrypt.Value = parser.ReadObject(null, xrefEncrypt.ObjectID, false);
|
|||
|
PdfObject encrypt = parser.ReadObject(null, xrefEncrypt.ObjectID, false, false);
|
|||
|
|
|||
|
encrypt.Reference = xrefEncrypt;
|
|||
|
xrefEncrypt.Value = encrypt;
|
|||
|
PdfStandardSecurityHandler securityHandler = document.SecurityHandler;
|
|||
|
TryAgain:
|
|||
|
PasswordValidity validity = securityHandler.ValidatePassword(password);
|
|||
|
if (validity == PasswordValidity.Invalid)
|
|||
|
{
|
|||
|
if (passwordProvider != null)
|
|||
|
{
|
|||
|
PdfPasswordProviderArgs args = new PdfPasswordProviderArgs();
|
|||
|
passwordProvider(args);
|
|||
|
if (args.Abort)
|
|||
|
return null;
|
|||
|
password = args.Password;
|
|||
|
goto TryAgain;
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
if (password == null)
|
|||
|
throw new PdfReaderException(PSSR.PasswordRequired);
|
|||
|
else
|
|||
|
throw new PdfReaderException(PSSR.InvalidPassword);
|
|||
|
}
|
|||
|
}
|
|||
|
else if (validity == PasswordValidity.UserPassword && openmode == PdfDocumentOpenMode.Modify)
|
|||
|
{
|
|||
|
if (passwordProvider != null)
|
|||
|
{
|
|||
|
PdfPasswordProviderArgs args = new PdfPasswordProviderArgs();
|
|||
|
passwordProvider(args);
|
|||
|
if (args.Abort)
|
|||
|
return null;
|
|||
|
password = args.Password;
|
|||
|
goto TryAgain;
|
|||
|
}
|
|||
|
else
|
|||
|
throw new PdfReaderException(PSSR.OwnerPasswordRequired);
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
if (password != null)
|
|||
|
{
|
|||
|
// Password specified but document is not encrypted.
|
|||
|
// ignore
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
PdfReference[] irefs2 = document._irefTable.AllReferences;
|
|||
|
int count2 = irefs2.Length;
|
|||
|
|
|||
|
// 3rd: Create iRefs for all compressed objects.
|
|||
|
Dictionary<int, object> objectStreams = new Dictionary<int, object>();
|
|||
|
for (int idx = 0; idx < count2; idx++)
|
|||
|
{
|
|||
|
PdfReference iref = irefs2[idx];
|
|||
|
PdfCrossReferenceStream xrefStream = iref.Value as PdfCrossReferenceStream;
|
|||
|
if (xrefStream != null)
|
|||
|
{
|
|||
|
for (int idx2 = 0; idx2 < xrefStream.Entries.Count; idx2++)
|
|||
|
{
|
|||
|
PdfCrossReferenceStream.CrossReferenceStreamEntry item = xrefStream.Entries[idx2];
|
|||
|
// Is type xref to compressed object?
|
|||
|
if (item.Type == 2)
|
|||
|
{
|
|||
|
//PdfReference irefNew = parser.ReadCompressedObject(new PdfObjectID((int)item.Field2), (int)item.Field3);
|
|||
|
//document._irefTable.Add(irefNew);
|
|||
|
int objectNumber = (int)item.Field2;
|
|||
|
if (!objectStreams.ContainsKey(objectNumber))
|
|||
|
{
|
|||
|
objectStreams.Add(objectNumber, null);
|
|||
|
PdfObjectID objectID = new PdfObjectID((int)item.Field2);
|
|||
|
parser.ReadIRefsFromCompressedObject(objectID);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// 4th: Read compressed objects.
|
|||
|
for (int idx = 0; idx < count2; idx++)
|
|||
|
{
|
|||
|
PdfReference iref = irefs2[idx];
|
|||
|
PdfCrossReferenceStream xrefStream = iref.Value as PdfCrossReferenceStream;
|
|||
|
if (xrefStream != null)
|
|||
|
{
|
|||
|
for (int idx2 = 0; idx2 < xrefStream.Entries.Count; idx2++)
|
|||
|
{
|
|||
|
PdfCrossReferenceStream.CrossReferenceStreamEntry item = xrefStream.Entries[idx2];
|
|||
|
// Is type xref to compressed object?
|
|||
|
if (item.Type == 2)
|
|||
|
{
|
|||
|
PdfReference irefNew = parser.ReadCompressedObject(new PdfObjectID((int)item.Field2),
|
|||
|
(int)item.Field3);
|
|||
|
Debug.Assert(document._irefTable.Contains(iref.ObjectID));
|
|||
|
//document._irefTable.Add(irefNew);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
PdfReference[] irefs = document._irefTable.AllReferences;
|
|||
|
int count = irefs.Length;
|
|||
|
|
|||
|
// Read all indirect objects.
|
|||
|
for (int idx = 0; idx < count; idx++)
|
|||
|
{
|
|||
|
PdfReference iref = irefs[idx];
|
|||
|
if (iref.Value == null)
|
|||
|
{
|
|||
|
#if DEBUG_
|
|||
|
if (iref.ObjectNumber == 1074)
|
|||
|
iref.GetType();
|
|||
|
#endif
|
|||
|
try
|
|||
|
{
|
|||
|
Debug.Assert(document._irefTable.Contains(iref.ObjectID));
|
|||
|
PdfObject pdfObject = parser.ReadObject(null, iref.ObjectID, false, false);
|
|||
|
Debug.Assert(pdfObject.Reference == iref);
|
|||
|
pdfObject.Reference = iref;
|
|||
|
Debug.Assert(pdfObject.Reference.Value != null, "Something went wrong.");
|
|||
|
}
|
|||
|
catch (Exception ex)
|
|||
|
{
|
|||
|
Debug.WriteLine(ex.Message);
|
|||
|
// 4STLA rethrow exception to notify caller.
|
|||
|
throw;
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
Debug.Assert(document._irefTable.Contains(iref.ObjectID));
|
|||
|
//iref.GetType();
|
|||
|
}
|
|||
|
// Set maximum object number.
|
|||
|
document._irefTable._maxObjectNumber = Math.Max(document._irefTable._maxObjectNumber,
|
|||
|
iref.ObjectNumber);
|
|||
|
}
|
|||
|
|
|||
|
// Encrypt all objects.
|
|||
|
if (xrefEncrypt != null)
|
|||
|
{
|
|||
|
document.SecurityHandler.EncryptDocument();
|
|||
|
}
|
|||
|
|
|||
|
// Fix references of trailer values and then objects and irefs are consistent.
|
|||
|
document._trailer.Finish();
|
|||
|
|
|||
|
#if DEBUG_
|
|||
|
// Some tests...
|
|||
|
PdfReference[] reachables = document.xrefTable.TransitiveClosure(document.trailer);
|
|||
|
reachables.GetType();
|
|||
|
reachables = document.xrefTable.AllXRefs;
|
|||
|
document.xrefTable.CheckConsistence();
|
|||
|
#endif
|
|||
|
|
|||
|
if (openmode == PdfDocumentOpenMode.Modify)
|
|||
|
{
|
|||
|
// Create new or change existing document IDs.
|
|||
|
if (document.Internals.SecondDocumentID == "")
|
|||
|
document._trailer.CreateNewDocumentIDs();
|
|||
|
else
|
|||
|
{
|
|||
|
byte[] agTemp = Guid.NewGuid().ToByteArray();
|
|||
|
document.Internals.SecondDocumentID = PdfEncoders.RawEncoding.GetString(agTemp, 0, agTemp.Length);
|
|||
|
}
|
|||
|
|
|||
|
// Change modification date
|
|||
|
document.Info.ModificationDate = DateTime.Now;
|
|||
|
|
|||
|
// Remove all unreachable objects
|
|||
|
int removed = document._irefTable.Compact();
|
|||
|
if (removed != 0)
|
|||
|
Debug.WriteLine("Number of deleted unreachable objects: " + removed);
|
|||
|
|
|||
|
// Force flattening of page tree
|
|||
|
PdfPages pages = document.Pages;
|
|||
|
Debug.Assert(pages != null);
|
|||
|
|
|||
|
//bool b = document.irefTable.Contains(new PdfObjectID(1108));
|
|||
|
//b.GetType();
|
|||
|
|
|||
|
document._irefTable.CheckConsistence();
|
|||
|
document._irefTable.Renumber();
|
|||
|
document._irefTable.CheckConsistence();
|
|||
|
}
|
|||
|
}
|
|||
|
catch (Exception ex)
|
|||
|
{
|
|||
|
Debug.WriteLine(ex.Message);
|
|||
|
throw;
|
|||
|
}
|
|||
|
return document;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Opens an existing PDF document.
|
|||
|
/// </summary>
|
|||
|
public static PdfDocument Open(Stream stream)
|
|||
|
{
|
|||
|
return Open(stream, PdfDocumentOpenMode.Modify);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|