903 lines
30 KiB
C#
903 lines
30 KiB
C#
#region PDFsharp - A .NET library for processing PDF
|
||
//
|
||
// Authors:
|
||
// Stefan Lange
|
||
//
|
||
// Copyright (c) 2005-2017 empira Software GmbH, Cologne Area (Germany)
|
||
//
|
||
// http://www.pdfsharp.com
|
||
// http://sourceforge.net/projects/pdfsharp
|
||
//
|
||
// Permission is hereby granted, free of charge, to any person obtaining a
|
||
// copy of this software and associated documentation files (the "Software"),
|
||
// to deal in the Software without restriction, including without limitation
|
||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
// and/or sell copies of the Software, and to permit persons to whom the
|
||
// Software is furnished to do so, subject to the following conditions:
|
||
//
|
||
// The above copyright notice and this permission notice shall be included
|
||
// in all copies or substantial portions of the Software.
|
||
//
|
||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||
// DEALINGS IN THE SOFTWARE.
|
||
#endregion
|
||
|
||
using System;
|
||
using System.Globalization;
|
||
using System.Diagnostics;
|
||
using System.Text;
|
||
using System.IO;
|
||
using PdfSharp.Internal;
|
||
using PdfSharp.Pdf.Internal;
|
||
|
||
#pragma warning disable 1591
|
||
|
||
namespace PdfSharp.Pdf.IO
|
||
{
|
||
/// <summary>
|
||
/// Lexical analyzer for PDF files. Technically a PDF file is a stream of bytes. Some chunks
|
||
/// of bytes represent strings in several encodings. The actual encoding depends on the
|
||
/// context where the string is used. Therefore the bytes are 'raw encoded' into characters,
|
||
/// i.e. a character or token read by the lexer has always character values in the range from
|
||
/// 0 to 255.
|
||
/// </summary>
|
||
public class Lexer
|
||
{
|
||
/// <summary>
|
||
/// Initializes a new instance of the Lexer class.
|
||
/// </summary>
|
||
public Lexer(Stream pdfInputStream)
|
||
{
|
||
_pdfSteam = pdfInputStream;
|
||
_pdfLength = (int)_pdfSteam.Length;
|
||
_idxChar = 0;
|
||
Position = 0;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Gets or sets the position within the PDF stream.
|
||
/// </summary>
|
||
public int Position
|
||
{
|
||
get { return _idxChar; }
|
||
set
|
||
{
|
||
_idxChar = value;
|
||
_pdfSteam.Position = value;
|
||
// ReadByte return -1 (eof) at the end of the stream.
|
||
_currChar = (char)_pdfSteam.ReadByte();
|
||
_nextChar = (char)_pdfSteam.ReadByte();
|
||
_token = new StringBuilder();
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Reads the next token and returns its type. If the token starts with a digit, the parameter
|
||
/// testReference specifies how to treat it. If it is false, the lexer scans for a single integer.
|
||
/// If it is true, the lexer checks if the digit is the prefix of a reference. If it is a reference,
|
||
/// the token is set to the object ID followed by the generation number separated by a blank
|
||
/// (the 'R' is omitted from the token).
|
||
/// </summary>
|
||
// /// <param name="testReference">Indicates whether to test the next token if it is a reference.</param>
|
||
public Symbol ScanNextToken()
|
||
{
|
||
Again:
|
||
_token = new StringBuilder();
|
||
|
||
char ch = MoveToNonWhiteSpace();
|
||
switch (ch)
|
||
{
|
||
case '%':
|
||
// Eat comments, the parser doesn't handle them
|
||
//return symbol = ScanComment();
|
||
ScanComment();
|
||
goto Again;
|
||
|
||
case '/':
|
||
return _symbol = ScanName();
|
||
|
||
//case 'R':
|
||
// if (Lexer.IsWhiteSpace(nextChar))
|
||
// {
|
||
// ScanNextChar();
|
||
// return Symbol.R;
|
||
// }
|
||
// break;
|
||
|
||
case '+': //TODO is it so easy?
|
||
case '-':
|
||
return _symbol = ScanNumber();
|
||
|
||
case '(':
|
||
return _symbol = ScanLiteralString();
|
||
|
||
case '[':
|
||
ScanNextChar(true);
|
||
return _symbol = Symbol.BeginArray;
|
||
|
||
case ']':
|
||
ScanNextChar(true);
|
||
return _symbol = Symbol.EndArray;
|
||
|
||
case '<':
|
||
if (_nextChar == '<')
|
||
{
|
||
ScanNextChar(true);
|
||
ScanNextChar(true);
|
||
return _symbol = Symbol.BeginDictionary;
|
||
}
|
||
return _symbol = ScanHexadecimalString();
|
||
|
||
case '>':
|
||
if (_nextChar == '>')
|
||
{
|
||
ScanNextChar(true);
|
||
ScanNextChar(true);
|
||
return _symbol = Symbol.EndDictionary;
|
||
}
|
||
ParserDiagnostics.HandleUnexpectedCharacter(_nextChar);
|
||
break;
|
||
|
||
case '.':
|
||
return _symbol = ScanNumber();
|
||
}
|
||
if (char.IsDigit(ch))
|
||
#if true_
|
||
return ScanNumberOrReference();
|
||
#else
|
||
if (PeekReference())
|
||
return _symbol = ScanNumber();
|
||
else
|
||
return _symbol = ScanNumber();
|
||
#endif
|
||
|
||
if (char.IsLetter(ch))
|
||
return _symbol = ScanKeyword();
|
||
|
||
if (ch == Chars.EOF)
|
||
return _symbol = Symbol.Eof;
|
||
|
||
// #???
|
||
|
||
ParserDiagnostics.HandleUnexpectedCharacter(ch);
|
||
return _symbol = Symbol.None;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Reads the raw content of a stream.
|
||
/// </summary>
|
||
public byte[] ReadStream(int length)
|
||
{
|
||
int pos;
|
||
|
||
// Skip illegal blanks behind <20>stream<61>.
|
||
while (_currChar == Chars.SP)
|
||
ScanNextChar(true);
|
||
|
||
// Skip new line behind <20>stream<61>.
|
||
if (_currChar == Chars.CR)
|
||
{
|
||
if (_nextChar == Chars.LF)
|
||
pos = _idxChar + 2;
|
||
else
|
||
pos = _idxChar + 1;
|
||
}
|
||
else
|
||
pos = _idxChar + 1;
|
||
|
||
_pdfSteam.Position = pos;
|
||
byte[] bytes = new byte[length];
|
||
int read = _pdfSteam.Read(bytes, 0, length);
|
||
Debug.Assert(read == length);
|
||
// With corrupted files, read could be different from length.
|
||
if (bytes.Length != read)
|
||
{
|
||
Array.Resize(ref bytes, read);
|
||
}
|
||
|
||
// Synchronize idxChar etc.
|
||
Position = pos + read;
|
||
return bytes;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Reads a string in raw encoding.
|
||
/// </summary>
|
||
public String ReadRawString(int position, int length)
|
||
{
|
||
_pdfSteam.Position = position;
|
||
byte[] bytes = new byte[length];
|
||
_pdfSteam.Read(bytes, 0, length);
|
||
return PdfEncoders.RawEncoding.GetString(bytes, 0, bytes.Length);
|
||
}
|
||
|
||
/// <summary>
|
||
/// Scans a comment line.
|
||
/// </summary>
|
||
public Symbol ScanComment()
|
||
{
|
||
Debug.Assert(_currChar == Chars.Percent);
|
||
|
||
_token = new StringBuilder();
|
||
while (true)
|
||
{
|
||
char ch = AppendAndScanNextChar();
|
||
if (ch == Chars.LF || ch == Chars.EOF)
|
||
break;
|
||
}
|
||
// TODO: not correct
|
||
if (_token.ToString().StartsWith("%%EOF"))
|
||
return Symbol.Eof;
|
||
return _symbol = Symbol.Comment;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Scans a name.
|
||
/// </summary>
|
||
public Symbol ScanName()
|
||
{
|
||
Debug.Assert(_currChar == Chars.Slash);
|
||
|
||
_token = new StringBuilder();
|
||
while (true)
|
||
{
|
||
char ch = AppendAndScanNextChar();
|
||
if (IsWhiteSpace(ch) || IsDelimiter(ch) || ch == Chars.EOF)
|
||
return _symbol = Symbol.Name;
|
||
|
||
if (ch == '#')
|
||
{
|
||
ScanNextChar(true);
|
||
char[] hex = new char[2];
|
||
hex[0] = _currChar;
|
||
hex[1] = _nextChar;
|
||
ScanNextChar(true);
|
||
// TODO Check syntax
|
||
ch = (char)(ushort)int.Parse(new string(hex), NumberStyles.AllowHexSpecifier);
|
||
_currChar = ch;
|
||
}
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Scans a number.
|
||
/// </summary>
|
||
public Symbol ScanNumber()
|
||
{
|
||
// I found a PDF file created with Acrobat 7 with this entry
|
||
// /Checksum 2996984786
|
||
// What is this? It is neither an integer nor a real.
|
||
// I introduced an UInteger...
|
||
bool period = false;
|
||
//bool sign;
|
||
|
||
_token = new StringBuilder();
|
||
char ch = _currChar;
|
||
if (ch == '+' || ch == '-')
|
||
{
|
||
//sign = true;
|
||
_token.Append(ch);
|
||
ch = ScanNextChar(true);
|
||
}
|
||
while (true)
|
||
{
|
||
if (char.IsDigit(ch))
|
||
{
|
||
_token.Append(ch);
|
||
}
|
||
else if (ch == '.')
|
||
{
|
||
if (period)
|
||
ParserDiagnostics.ThrowParserException("More than one period in number.");
|
||
|
||
period = true;
|
||
_token.Append(ch);
|
||
}
|
||
else
|
||
break;
|
||
ch = ScanNextChar(true);
|
||
}
|
||
|
||
if (period)
|
||
return Symbol.Real;
|
||
long l = Int64.Parse(_token.ToString(), CultureInfo.InvariantCulture);
|
||
if (l >= Int32.MinValue && l <= Int32.MaxValue)
|
||
return Symbol.Integer;
|
||
if (l > 0 && l <= UInt32.MaxValue)
|
||
return Symbol.UInteger;
|
||
|
||
// Got an AutoCAD PDF file that contains this: /C 264584027963392
|
||
// Best we can do is to convert it to real value.
|
||
return Symbol.Real;
|
||
//thr ow new PdfReaderException("Number exceeds integer range.");
|
||
}
|
||
|
||
public Symbol ScanNumberOrReference()
|
||
{
|
||
Symbol result = ScanNumber();
|
||
if (result == Symbol.Integer)
|
||
{
|
||
int pos = Position;
|
||
string objectNumber = Token;
|
||
}
|
||
return result;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Scans a keyword.
|
||
/// </summary>
|
||
public Symbol ScanKeyword()
|
||
{
|
||
_token = new StringBuilder();
|
||
char ch = _currChar;
|
||
// Scan token
|
||
while (true)
|
||
{
|
||
if (char.IsLetter(ch))
|
||
_token.Append(ch);
|
||
else
|
||
break;
|
||
ch = ScanNextChar(false);
|
||
}
|
||
|
||
// Check known tokens.
|
||
switch (_token.ToString())
|
||
{
|
||
case "obj":
|
||
return _symbol = Symbol.Obj;
|
||
|
||
case "endobj":
|
||
return _symbol = Symbol.EndObj;
|
||
|
||
case "null":
|
||
return _symbol = Symbol.Null;
|
||
|
||
case "true":
|
||
case "false":
|
||
return _symbol = Symbol.Boolean;
|
||
|
||
case "R":
|
||
return _symbol = Symbol.R;
|
||
|
||
case "stream":
|
||
return _symbol = Symbol.BeginStream;
|
||
|
||
case "endstream":
|
||
return _symbol = Symbol.EndStream;
|
||
|
||
case "xref":
|
||
return _symbol = Symbol.XRef;
|
||
|
||
case "trailer":
|
||
return _symbol = Symbol.Trailer;
|
||
|
||
case "startxref":
|
||
return _symbol = Symbol.StartXRef;
|
||
}
|
||
|
||
// Anything else is treated as a keyword. Samples are f or n in iref.
|
||
return _symbol = Symbol.Keyword;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Scans a literal string, contained between "(" and ")".
|
||
/// </summary>
|
||
public Symbol ScanLiteralString()
|
||
{
|
||
// Reference: 3.2.3 String Objects / Page 53
|
||
// Reference: TABLE 3.32 String Types / Page 157
|
||
|
||
Debug.Assert(_currChar == Chars.ParenLeft);
|
||
_token = new StringBuilder();
|
||
int parenLevel = 0;
|
||
char ch = ScanNextChar(false);
|
||
|
||
// Phase 1: deal with escape characters.
|
||
while (ch != Chars.EOF)
|
||
{
|
||
switch (ch)
|
||
{
|
||
case '(':
|
||
parenLevel++;
|
||
break;
|
||
|
||
case ')':
|
||
if (parenLevel == 0)
|
||
{
|
||
ScanNextChar(false);
|
||
// Is goto evil? We could move Phase 2 code here or create a subroutine for Phase 1.
|
||
goto Phase2;
|
||
}
|
||
parenLevel--;
|
||
break;
|
||
|
||
case '\\':
|
||
{
|
||
ch = ScanNextChar(false);
|
||
switch (ch)
|
||
{
|
||
case 'n':
|
||
ch = Chars.LF;
|
||
break;
|
||
|
||
case 'r':
|
||
ch = Chars.CR;
|
||
break;
|
||
|
||
case 't':
|
||
ch = Chars.HT;
|
||
break;
|
||
|
||
case 'b':
|
||
ch = Chars.BS;
|
||
break;
|
||
|
||
case 'f':
|
||
ch = Chars.FF;
|
||
break;
|
||
|
||
case '(':
|
||
ch = Chars.ParenLeft;
|
||
break;
|
||
|
||
case ')':
|
||
ch = Chars.ParenRight;
|
||
break;
|
||
|
||
case '\\':
|
||
ch = Chars.BackSlash;
|
||
break;
|
||
|
||
// AutoCAD PDFs my contain such strings: (\ )
|
||
case ' ':
|
||
ch = ' ';
|
||
break;
|
||
|
||
case Chars.CR:
|
||
case Chars.LF:
|
||
ch = ScanNextChar(false);
|
||
continue;
|
||
|
||
default:
|
||
// TODO IsOctalDigit(ch).
|
||
if (char.IsDigit(ch) && _nextChar != '8' && _nextChar != '9') // First octal character.
|
||
{
|
||
//// Octal character code.
|
||
//if (ch >= '8')
|
||
// ParserDiagnostics.HandleUnexpectedCharacter(ch);
|
||
|
||
int n = ch - '0';
|
||
if (char.IsDigit(_nextChar) && _nextChar != '8' && _nextChar != '9') // Second octal character.
|
||
{
|
||
ch = ScanNextChar(false);
|
||
//if (ch >= '8')
|
||
// ParserDiagnostics.HandleUnexpectedCharacter(ch);
|
||
|
||
n = n * 8 + ch - '0';
|
||
if (char.IsDigit(_nextChar) && _nextChar != '8' && _nextChar != '9') // Third octal character.
|
||
{
|
||
ch = ScanNextChar(false);
|
||
//if (ch >= '8')
|
||
// ParserDiagnostics.HandleUnexpectedCharacter(ch);
|
||
|
||
n = n * 8 + ch - '0';
|
||
}
|
||
}
|
||
ch = (char)n;
|
||
}
|
||
else
|
||
{
|
||
// PDF 32000: "If the character following the REVERSE SOLIDUS is not one of those shown in Table 3, the REVERSE SOLIDUS shall be ignored."
|
||
//TODO
|
||
// Debug.As sert(false, "Not implemented; unknown escape character.");
|
||
// ParserDiagnostics.HandleUnexpectedCharacter(ch);
|
||
//GetType();
|
||
}
|
||
break;
|
||
}
|
||
break;
|
||
}
|
||
default:
|
||
break;
|
||
}
|
||
|
||
_token.Append(ch);
|
||
ch = ScanNextChar(false);
|
||
}
|
||
|
||
// Phase 2: deal with UTF-16BE if necessary.
|
||
// UTF-16BE Unicode strings start with U+FEFF ("<22><>"). There can be empty strings with UTF-16BE prefix.
|
||
Phase2:
|
||
if (_token.Length >= 2 && _token[0] == '\xFE' && _token[1] == '\xFF')
|
||
{
|
||
// Combine two ANSI characters to get one Unicode character.
|
||
StringBuilder temp = _token;
|
||
int length = temp.Length;
|
||
if ((length & 1) == 1)
|
||
{
|
||
// TODO What does the PDF Reference say about this case? Assume (char)0 or treat the file as corrupted?
|
||
temp.Append(0);
|
||
++length;
|
||
DebugBreak.Break();
|
||
}
|
||
_token = new StringBuilder();
|
||
for (int i = 2; i < length; i += 2)
|
||
{
|
||
_token.Append((char)(256 * temp[i] + temp[i + 1]));
|
||
}
|
||
return _symbol = Symbol.UnicodeString;
|
||
}
|
||
// Adobe Reader also supports UTF-16LE.
|
||
if (_token.Length >= 2 && _token[0] == '\xFF' && _token[1] == '\xFE')
|
||
{
|
||
// Combine two ANSI characters to get one Unicode character.
|
||
StringBuilder temp = _token;
|
||
int length = temp.Length;
|
||
if ((length & 1) == 1)
|
||
{
|
||
// TODO What does the PDF Reference say about this case? Assume (char)0 or treat the file as corrupted?
|
||
temp.Append(0);
|
||
++length;
|
||
DebugBreak.Break();
|
||
}
|
||
_token = new StringBuilder();
|
||
for (int i = 2; i < length; i += 2)
|
||
{
|
||
_token.Append((char)(256 * temp[i + 1] + temp[i]));
|
||
}
|
||
return _symbol = Symbol.UnicodeString;
|
||
}
|
||
return _symbol = Symbol.String;
|
||
}
|
||
|
||
public Symbol ScanHexadecimalString()
|
||
{
|
||
Debug.Assert(_currChar == Chars.Less);
|
||
|
||
_token = new StringBuilder();
|
||
char[] hex = new char[2];
|
||
ScanNextChar(true);
|
||
while (true)
|
||
{
|
||
MoveToNonWhiteSpace();
|
||
if (_currChar == '>')
|
||
{
|
||
ScanNextChar(true);
|
||
break;
|
||
}
|
||
if (char.IsLetterOrDigit(_currChar))
|
||
{
|
||
hex[0] = char.ToUpper(_currChar);
|
||
// Second char is optional in PDF spec.
|
||
if (char.IsLetterOrDigit(_nextChar))
|
||
{
|
||
hex[1] = char.ToUpper(_nextChar);
|
||
ScanNextChar(true);
|
||
}
|
||
else
|
||
{
|
||
// We could check for ">" here and throw if we find anything else. The throw comes after the next iteration anyway.
|
||
hex[1] = '0';
|
||
}
|
||
ScanNextChar(true);
|
||
|
||
int ch = int.Parse(new string(hex), NumberStyles.AllowHexSpecifier);
|
||
_token.Append(Convert.ToChar(ch));
|
||
}
|
||
else
|
||
ParserDiagnostics.HandleUnexpectedCharacter(_currChar);
|
||
}
|
||
string chars = _token.ToString();
|
||
int count = chars.Length;
|
||
if (count > 2 && chars[0] == (char)0xFE && chars[1] == (char)0xFF)
|
||
{
|
||
Debug.Assert(count % 2 == 0);
|
||
_token.Length = 0;
|
||
for (int idx = 2; idx < count; idx += 2)
|
||
_token.Append((char)(chars[idx] * 256 + chars[idx + 1]));
|
||
return _symbol = Symbol.UnicodeHexString;
|
||
}
|
||
return _symbol = Symbol.HexString;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Move current position one character further in PDF stream.
|
||
/// </summary>
|
||
internal char ScanNextChar(bool handleCRLF)
|
||
{
|
||
if (_pdfLength <= _idxChar)
|
||
{
|
||
_currChar = Chars.EOF;
|
||
_nextChar = Chars.EOF;
|
||
}
|
||
else
|
||
{
|
||
_currChar = _nextChar;
|
||
_nextChar = (char)_pdfSteam.ReadByte();
|
||
_idxChar++;
|
||
if (handleCRLF && _currChar == Chars.CR)
|
||
{
|
||
if (_nextChar == Chars.LF)
|
||
{
|
||
// Treat CR LF as LF.
|
||
_currChar = _nextChar;
|
||
_nextChar = (char)_pdfSteam.ReadByte();
|
||
_idxChar++;
|
||
}
|
||
else
|
||
{
|
||
// Treat single CR as LF.
|
||
_currChar = Chars.LF;
|
||
}
|
||
}
|
||
}
|
||
return _currChar;
|
||
}
|
||
|
||
///// <summary>
|
||
///// Resets the current token to the empty string.
|
||
///// </summary>
|
||
//void ClearToken()
|
||
//{
|
||
// _token.Length = 0;
|
||
//}
|
||
|
||
bool PeekReference()
|
||
{
|
||
// A Reference has the form "nnn mmm R". The implementation of the parser used a
|
||
// reduce/shift algorithm in the first place. But this case is the only one we need to
|
||
// look ahead 3 tokens.
|
||
int positon = Position;
|
||
|
||
// Skip digits.
|
||
while (char.IsDigit(_currChar))
|
||
ScanNextChar(true);
|
||
|
||
// Space expected.
|
||
if (_currChar != Chars.SP)
|
||
goto False;
|
||
|
||
// Skip spaces.
|
||
while (_currChar == Chars.SP)
|
||
ScanNextChar(true);
|
||
|
||
// Digit expected.
|
||
if (!char.IsDigit(_currChar))
|
||
goto False;
|
||
|
||
// Skip digits.
|
||
while (char.IsDigit(_currChar))
|
||
ScanNextChar(true);
|
||
|
||
// Space expected.
|
||
if (_currChar != Chars.SP)
|
||
goto False;
|
||
|
||
// Skip spaces.
|
||
while (_currChar == Chars.SP)
|
||
ScanNextChar(true);
|
||
|
||
// "R" expected.
|
||
// We can ignore _nextChar because there is no other valid token that starts with an 'R'.
|
||
if (_currChar != 'R')
|
||
goto False;
|
||
|
||
Position = positon;
|
||
return true;
|
||
|
||
False:
|
||
Position = positon;
|
||
return false;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Appends current character to the token and reads next one.
|
||
/// </summary>
|
||
internal char AppendAndScanNextChar()
|
||
{
|
||
if (_currChar == Chars.EOF)
|
||
ParserDiagnostics.ThrowParserException("Undetected EOF reached.");
|
||
|
||
_token.Append(_currChar);
|
||
return ScanNextChar(true);
|
||
}
|
||
|
||
/// <summary>
|
||
/// If the current character is not a white space, the function immediately returns it.
|
||
/// Otherwise the PDF cursor is moved forward to the first non-white space or EOF.
|
||
/// White spaces are NUL, HT, LF, FF, CR, and SP.
|
||
/// </summary>
|
||
public char MoveToNonWhiteSpace()
|
||
{
|
||
while (_currChar != Chars.EOF)
|
||
{
|
||
switch (_currChar)
|
||
{
|
||
case Chars.NUL:
|
||
case Chars.HT:
|
||
case Chars.LF:
|
||
case Chars.FF:
|
||
case Chars.CR:
|
||
case Chars.SP:
|
||
ScanNextChar(true);
|
||
break;
|
||
|
||
case (char)11:
|
||
case (char)173:
|
||
ScanNextChar(true);
|
||
break;
|
||
|
||
|
||
default:
|
||
return _currChar;
|
||
}
|
||
}
|
||
return _currChar;
|
||
}
|
||
|
||
#if DEBUG
|
||
public string SurroundingsOfCurrentPosition(bool hex)
|
||
{
|
||
const int range = 20;
|
||
int start = Math.Max(Position - range, 0);
|
||
int length = Math.Min(2 * range, PdfLength - start);
|
||
long posOld = _pdfSteam.Position;
|
||
_pdfSteam.Position = start;
|
||
byte[] bytes = new byte[length];
|
||
_pdfSteam.Read(bytes, 0, length);
|
||
_pdfSteam.Position = posOld;
|
||
string result = "";
|
||
if (hex)
|
||
{
|
||
for (int idx = 0; idx < length; idx++)
|
||
result += ((int)bytes[idx]).ToString("x2");
|
||
//result += string.Format("{0:", (int) bytes[idx]);
|
||
}
|
||
else
|
||
{
|
||
for (int idx = 0; idx < length; idx++)
|
||
result += (char)bytes[idx];
|
||
}
|
||
return result;
|
||
}
|
||
#endif
|
||
|
||
/// <summary>
|
||
/// Gets the current symbol.
|
||
/// </summary>
|
||
public Symbol Symbol
|
||
{
|
||
get { return _symbol; }
|
||
set { _symbol = value; }
|
||
}
|
||
|
||
/// <summary>
|
||
/// Gets the current token.
|
||
/// </summary>
|
||
public string Token
|
||
{
|
||
get { return _token.ToString(); }
|
||
}
|
||
|
||
/// <summary>
|
||
/// Interprets current token as boolean literal.
|
||
/// </summary>
|
||
public bool TokenToBoolean
|
||
{
|
||
get
|
||
{
|
||
Debug.Assert(_token.ToString() == "true" || _token.ToString() == "false");
|
||
return _token.ToString()[0] == 't';
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Interprets current token as integer literal.
|
||
/// </summary>
|
||
public int TokenToInteger
|
||
{
|
||
get
|
||
{
|
||
//Debug.As sert(_token.ToString().IndexOf('.') == -1);
|
||
return int.Parse(_token.ToString(), CultureInfo.InvariantCulture);
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Interprets current token as unsigned integer literal.
|
||
/// </summary>
|
||
public uint TokenToUInteger
|
||
{
|
||
get
|
||
{
|
||
//Debug.As sert(_token.ToString().IndexOf('.') == -1);
|
||
return uint.Parse(_token.ToString(), CultureInfo.InvariantCulture);
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Interprets current token as real or integer literal.
|
||
/// </summary>
|
||
public double TokenToReal
|
||
{
|
||
get { return double.Parse(_token.ToString(), CultureInfo.InvariantCulture); }
|
||
}
|
||
|
||
/// <summary>
|
||
/// Interprets current token as object ID.
|
||
/// </summary>
|
||
public PdfObjectID TokenToObjectID
|
||
{
|
||
get
|
||
{
|
||
string[] numbers = Token.Split('|');
|
||
int objectNumber = Int32.Parse(numbers[0]);
|
||
int generationNumber = Int32.Parse(numbers[1]);
|
||
return new PdfObjectID(objectNumber, generationNumber);
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Indicates whether the specified character is a PDF white-space character.
|
||
/// </summary>
|
||
internal static bool IsWhiteSpace(char ch)
|
||
{
|
||
switch (ch)
|
||
{
|
||
case Chars.NUL: // 0 Null
|
||
case Chars.HT: // 9 Horizontal Tab
|
||
case Chars.LF: // 10 Line Feed
|
||
case Chars.FF: // 12 Form Feed
|
||
case Chars.CR: // 13 Carriage Return
|
||
case Chars.SP: // 32 Space
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Indicates whether the specified character is a PDF delimiter character.
|
||
/// </summary>
|
||
internal static bool IsDelimiter(char ch)
|
||
{
|
||
switch (ch)
|
||
{
|
||
case '(':
|
||
case ')':
|
||
case '<':
|
||
case '>':
|
||
case '[':
|
||
case ']':
|
||
case '{':
|
||
case '}':
|
||
case '/':
|
||
case '%':
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Gets the length of the PDF output.
|
||
/// </summary>
|
||
public int PdfLength
|
||
{
|
||
get { return _pdfLength; }
|
||
}
|
||
|
||
readonly int _pdfLength;
|
||
int _idxChar;
|
||
char _currChar;
|
||
char _nextChar;
|
||
StringBuilder _token;
|
||
Symbol _symbol = Symbol.None;
|
||
|
||
readonly Stream _pdfSteam;
|
||
}
|
||
}
|