#region PDFsharp - A .NET library for processing PDF // // Authors: // Stefan Lange // // Copyright (c) 2005-2017 empira Software GmbH, Cologne Area (Germany) // // http://www.pdfsharp.com // http://sourceforge.net/projects/pdfsharp // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), // to deal in the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. #endregion using System; using System.Diagnostics; using System.Text; using PdfSharp.Pdf.IO; using PdfSharp.Pdf.Internal; namespace PdfSharp.Pdf { /// /// Determines the encoding of a PdfString or PdfStringObject. /// [Flags] public enum PdfStringEncoding { /// /// The characters of the string are actually bytes with an unknown or context specific meaning or encoding. /// With this encoding the 8 high bits of each character is zero. /// RawEncoding = PdfStringFlags.RawEncoding, /// /// Not yet used by PDFsharp. /// StandardEncoding = PdfStringFlags.StandardEncoding, /// /// The characters of the string are actually bytes with PDF document encoding. /// With this encoding the 8 high bits of each character is zero. /// // ReSharper disable InconsistentNaming because the name is spelled as in the Adobe reference. PDFDocEncoding = PdfStringFlags.PDFDocEncoding, // ReSharper restore InconsistentNaming /// /// The characters of the string are actually bytes with Windows ANSI encoding. /// With this encoding the 8 high bits of each character is zero. /// WinAnsiEncoding = PdfStringFlags.WinAnsiEncoding, /// /// Not yet used by PDFsharp. /// MacRomanEncoding = PdfStringFlags.MacExpertEncoding, /// /// Not yet used by PDFsharp. /// MacExpertEncoding = PdfStringFlags.MacExpertEncoding, /// /// The characters of the string are Unicode characters. /// Unicode = PdfStringFlags.Unicode, } /// /// Internal wrapper for PdfStringEncoding. /// [Flags] enum PdfStringFlags { // ReSharper disable InconsistentNaming RawEncoding = 0x00, StandardEncoding = 0x01, // not used by PDFsharp PDFDocEncoding = 0x02, WinAnsiEncoding = 0x03, MacRomanEncoding = 0x04, // not used by PDFsharp MacExpertEncoding = 0x05, // not used by PDFsharp Unicode = 0x06, EncodingMask = 0x0F, HexLiteral = 0x80, // ReSharper restore InconsistentNaming } /// /// Represents a direct text string value. /// [DebuggerDisplay("({Value})")] public sealed class PdfString : PdfItem { /// /// Initializes a new instance of the class. /// public PdfString() { // Redundant assignment. //_flags = PdfStringFlags.RawEncoding; } /// /// Initializes a new instance of the class. /// /// The value. public PdfString(string value) { #if true if (!IsRawEncoding(value)) _flags = PdfStringFlags.Unicode; _value = value; #else CheckRawEncoding(value); _value = value; //_flags = PdfStringFlags.RawEncoding; #endif } /// /// Initializes a new instance of the class. /// /// The value. /// The encoding. public PdfString(string value, PdfStringEncoding encoding) { switch (encoding) { case PdfStringEncoding.RawEncoding: CheckRawEncoding(value); break; case PdfStringEncoding.StandardEncoding: break; case PdfStringEncoding.PDFDocEncoding: break; case PdfStringEncoding.WinAnsiEncoding: CheckRawEncoding(value); break; case PdfStringEncoding.MacRomanEncoding: break; case PdfStringEncoding.Unicode: break; default: throw new ArgumentOutOfRangeException("encoding"); } _value = value; //if ((flags & PdfStringFlags.EncodingMask) == 0) // flags |= PdfStringFlags.PDFDocEncoding; _flags = (PdfStringFlags)encoding; } internal PdfString(string value, PdfStringFlags flags) { _value = value; _flags = flags; } /// /// Gets the number of characters in this string. /// public int Length { get { return _value == null ? 0 : _value.Length; } } /// /// Gets the encoding. /// public PdfStringEncoding Encoding { get { return (PdfStringEncoding)(_flags & PdfStringFlags.EncodingMask); } } /// /// Gets a value indicating whether the string is a hexadecimal literal. /// public bool HexLiteral { get { return (_flags & PdfStringFlags.HexLiteral) != 0; } } internal PdfStringFlags Flags { get { return _flags; } } readonly PdfStringFlags _flags; /// /// Gets the string value. /// public string Value { // This class must behave like a value type. Therefore it cannot be changed (like System.String). get { return _value ?? ""; } } string _value; /// /// Gets or sets the string value for encryption purposes. /// internal byte[] EncryptionValue { // TODO: Unicode case is not handled! get { return _value == null ? new byte[0] : PdfEncoders.RawEncoding.GetBytes(_value); } // BUG: May lead to trouble with the value semantics of PdfString set { _value = PdfEncoders.RawEncoding.GetString(value, 0, value.Length); } } /// /// Returns the string. /// public override string ToString() { #if true PdfStringEncoding encoding = (PdfStringEncoding)(_flags & PdfStringFlags.EncodingMask); string pdf = (_flags & PdfStringFlags.HexLiteral) == 0 ? PdfEncoders.ToStringLiteral(_value, encoding, null) : PdfEncoders.ToHexStringLiteral(_value, encoding, null); return pdf; #else return _value; #endif } /// /// Hack for document encoded bookmarks. /// public string ToStringFromPdfDocEncoded() { int length = _value.Length; char[] bytes = new char[length]; for (int idx = 0; idx < length; idx++) { char ch = _value[idx]; if (ch <= 255) { bytes[idx] = Encode[ch]; } else { //Debug-Break.Break(); throw new InvalidOperationException("DocEncoded string contains char greater 255."); } } StringBuilder sb = new StringBuilder(length); for (int idx = 0; idx < length; idx++) sb.Append((char)bytes[idx]); return sb.ToString(); } static readonly char[] Encode = { '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E', '\x0F', '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1A', '\x1B', '\x1C', '\x1D', '\x1E', '\x1F', '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', '\x28', '\x29', '\x2A', '\x2B', '\x2C', '\x2D', '\x2E', '\x2F', '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', '\x38', '\x39', '\x3A', '\x3B', '\x3C', '\x3D', '\x3E', '\x3F', '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', '\x48', '\x49', '\x4A', '\x4B', '\x4C', '\x4D', '\x4E', '\x4F', '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', '\x58', '\x59', '\x5A', '\x5B', '\x5C', '\x5D', '\x5E', '\x5F', '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', '\x68', '\x69', '\x6A', '\x6B', '\x6C', '\x6D', '\x6E', '\x6F', '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', '\x78', '\x79', '\x7A', '\x7B', '\x7C', '\x7D', '\x7E', '\x7F', '\x2022', '\x2020', '\x2021', '\x2026', '\x2014', '\x2013', '\x0192', '\x2044', '\x2039', '\x203A', '\x2212', '\x2030', '\x201E', '\x201C', '\x201D', '\x2018', '\x2019', '\x201A', '\x2122', '\xFB01', '\xFB02', '\x0141', '\x0152', '\x0160', '\x0178', '\x017D', '\x0131', '\x0142', '\x0153', '\x0161', '\x017E', '\xFFFD', '\x20AC', '\xA1', '\xA2', '\xA3', '\xA4', '\xA5', '\xA6', '\xA7', '\xA8', '\xA9', '\xAA', '\xAB', '\xAC', '\xAD', '\xAE', '\xAF', '\xB0', '\xB1', '\xB2', '\xB3', '\xB4', '\xB5', '\xB6', '\xB7', '\xB8', '\xB9', '\xBA', '\xBB', '\xBC', '\xBD', '\xBE', '\xBF', '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7', '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF', '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\xF7', '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF', }; static void CheckRawEncoding(string s) { if (String.IsNullOrEmpty(s)) return; int length = s.Length; for (int idx = 0; idx < length; idx++) { Debug.Assert(s[idx] < 256, "RawString contains invalid character."); } } static bool IsRawEncoding(string s) { if (String.IsNullOrEmpty(s)) return true; int length = s.Length; for (int idx = 0; idx < length; idx++) { if (!(s[idx] < 256)) return false; } return true; } /// /// Writes the string DocEncoded. /// internal override void WriteObject(PdfWriter writer) { writer.Write(this); } } }