849 lines
28 KiB
C#
849 lines
28 KiB
C#
|
|
|||
|
#region PDFsharp - A .NET library for processing PDF
|
|||
|
//
|
|||
|
// Authors:
|
|||
|
// Stefan Lange
|
|||
|
//
|
|||
|
// Copyright (c) 2005-2017 empira Software GmbH, Cologne Area (Germany)
|
|||
|
//
|
|||
|
// http://www.pdfsharp.com
|
|||
|
// http://sourceforge.net/projects/pdfsharp
|
|||
|
//
|
|||
|
// Permission is hereby granted, free of charge, to any person obtaining a
|
|||
|
// copy of this software and associated documentation files (the "Software"),
|
|||
|
// to deal in the Software without restriction, including without limitation
|
|||
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|||
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|||
|
// Software is furnished to do so, subject to the following conditions:
|
|||
|
//
|
|||
|
// The above copyright notice and this permission notice shall be included
|
|||
|
// in all copies or substantial portions of the Software.
|
|||
|
//
|
|||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|||
|
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|||
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|||
|
// DEALINGS IN THE SOFTWARE.
|
|||
|
#endregion
|
|||
|
|
|||
|
using System;
|
|||
|
using System.Globalization;
|
|||
|
using System.Diagnostics;
|
|||
|
using System.Text;
|
|||
|
using System.IO;
|
|||
|
using PdfSharp.Internal;
|
|||
|
|
|||
|
#pragma warning disable 1591
|
|||
|
|
|||
|
namespace PdfSharp.Pdf.Content
|
|||
|
{
|
|||
|
/// <summary>
|
|||
|
/// Lexical analyzer for PDF content files. Adobe specifies no grammar, but it seems that it
|
|||
|
/// is a simple post-fix notation.
|
|||
|
/// </summary>
|
|||
|
public class CLexer
|
|||
|
{
|
|||
|
/// <summary>
|
|||
|
/// Initializes a new instance of the Lexer class.
|
|||
|
/// </summary>
|
|||
|
public CLexer(byte[] content)
|
|||
|
{
|
|||
|
_content = content;
|
|||
|
_charIndex = 0;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Initializes a new instance of the Lexer class.
|
|||
|
/// </summary>
|
|||
|
public CLexer(MemoryStream content)
|
|||
|
{
|
|||
|
_content = content.ToArray();
|
|||
|
_charIndex = 0;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Reads the next token and returns its type.
|
|||
|
/// </summary>
|
|||
|
public CSymbol ScanNextToken()
|
|||
|
{
|
|||
|
Again:
|
|||
|
ClearToken();
|
|||
|
char ch = MoveToNonWhiteSpace();
|
|||
|
switch (ch)
|
|||
|
{
|
|||
|
case '%':
|
|||
|
// Eat comments, the parser doesn't handle them
|
|||
|
//return symbol = ScanComment();
|
|||
|
ScanComment();
|
|||
|
goto Again;
|
|||
|
|
|||
|
case '/':
|
|||
|
return _symbol = ScanName();
|
|||
|
|
|||
|
//case 'R':
|
|||
|
// if (Lexer.IsWhiteSpace(nextChar))
|
|||
|
// {
|
|||
|
// ScanNextChar();
|
|||
|
// return Symbol.R;
|
|||
|
// }
|
|||
|
// break;
|
|||
|
|
|||
|
case '+':
|
|||
|
case '-':
|
|||
|
return _symbol = ScanNumber();
|
|||
|
|
|||
|
case '[':
|
|||
|
ScanNextChar();
|
|||
|
return _symbol = CSymbol.BeginArray;
|
|||
|
|
|||
|
case ']':
|
|||
|
ScanNextChar();
|
|||
|
return _symbol = CSymbol.EndArray;
|
|||
|
|
|||
|
case '(':
|
|||
|
return _symbol = ScanLiteralString();
|
|||
|
|
|||
|
case '<':
|
|||
|
if (_nextChar == '<')
|
|||
|
return _symbol = ScanDictionary();
|
|||
|
return _symbol = ScanHexadecimalString();
|
|||
|
|
|||
|
case '.':
|
|||
|
return _symbol = ScanNumber();
|
|||
|
|
|||
|
case '"':
|
|||
|
case '\'':
|
|||
|
return _symbol = ScanOperator();
|
|||
|
}
|
|||
|
if (char.IsDigit(ch))
|
|||
|
return _symbol = ScanNumber();
|
|||
|
|
|||
|
if (char.IsLetter(ch))
|
|||
|
return _symbol = ScanOperator();
|
|||
|
|
|||
|
if (ch == Chars.EOF)
|
|||
|
return _symbol = CSymbol.Eof;
|
|||
|
|
|||
|
ContentReaderDiagnostics.HandleUnexpectedCharacter(ch);
|
|||
|
return _symbol = CSymbol.None;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Scans a comment line. (Not yet used, comments are skipped by lexer.)
|
|||
|
/// </summary>
|
|||
|
public CSymbol ScanComment()
|
|||
|
{
|
|||
|
Debug.Assert(_currChar == Chars.Percent);
|
|||
|
|
|||
|
ClearToken();
|
|||
|
char ch;
|
|||
|
while ((ch = AppendAndScanNextChar()) != Chars.LF && ch != Chars.EOF) { }
|
|||
|
return _symbol = CSymbol.Comment;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Scans the bytes of an inline image.
|
|||
|
/// NYI: Just scans over it.
|
|||
|
/// </summary>
|
|||
|
public CSymbol ScanInlineImage()
|
|||
|
{
|
|||
|
// TODO: Implement inline images.
|
|||
|
// Skip this:
|
|||
|
// BI
|
|||
|
// … Key-value pairs …
|
|||
|
// ID
|
|||
|
// … Image data …
|
|||
|
// EI
|
|||
|
|
|||
|
bool ascii85 = false;
|
|||
|
do
|
|||
|
{
|
|||
|
ScanNextToken();
|
|||
|
// HACK: Is image ASCII85 decoded?
|
|||
|
if (!ascii85 && _symbol == CSymbol.Name && (Token == "/ASCII85Decode" || Token == "/A85"))
|
|||
|
ascii85 = true;
|
|||
|
} while (_symbol != CSymbol.Operator || Token != "ID");
|
|||
|
|
|||
|
if (ascii85)
|
|||
|
{
|
|||
|
// Look for '~>' because 'EI' may be part of the encoded image.
|
|||
|
while (_currChar != Chars.EOF && (_currChar != '~' || _nextChar != '>'))
|
|||
|
ScanNextChar();
|
|||
|
if (_currChar == Chars.EOF)
|
|||
|
ContentReaderDiagnostics.HandleUnexpectedCharacter(_currChar);
|
|||
|
}
|
|||
|
|
|||
|
// Look for '<ws>EI<ws>', as 'EI' may be part of the binary image data here too.
|
|||
|
while (_currChar != Chars.EOF)
|
|||
|
{
|
|||
|
if (IsWhiteSpace(_currChar))
|
|||
|
{
|
|||
|
if (ScanNextChar() == 'E')
|
|||
|
if (ScanNextChar() == 'I')
|
|||
|
if (IsWhiteSpace(ScanNextChar()))
|
|||
|
break;
|
|||
|
}
|
|||
|
else
|
|||
|
ScanNextChar();
|
|||
|
}
|
|||
|
if (_currChar == Chars.EOF)
|
|||
|
ContentReaderDiagnostics.HandleUnexpectedCharacter(_currChar);
|
|||
|
|
|||
|
// We currently do nothing with inline images.
|
|||
|
return CSymbol.None;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Scans a name.
|
|||
|
/// </summary>
|
|||
|
public CSymbol ScanName()
|
|||
|
{
|
|||
|
Debug.Assert(_currChar == Chars.Slash);
|
|||
|
|
|||
|
ClearToken();
|
|||
|
while (true)
|
|||
|
{
|
|||
|
char ch = AppendAndScanNextChar();
|
|||
|
if (IsWhiteSpace(ch) || IsDelimiter(ch))
|
|||
|
return _symbol = CSymbol.Name;
|
|||
|
|
|||
|
if (ch == '#')
|
|||
|
{
|
|||
|
ScanNextChar();
|
|||
|
char[] hex = new char[2];
|
|||
|
hex[0] = _currChar;
|
|||
|
hex[1] = _nextChar;
|
|||
|
ScanNextChar();
|
|||
|
// TODO Check syntax
|
|||
|
ch = (char)(ushort)int.Parse(new string(hex), NumberStyles.AllowHexSpecifier);
|
|||
|
_currChar = ch;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
protected CSymbol ScanDictionary()
|
|||
|
{
|
|||
|
// TODO Do an actual recursive parse instead of this simple scan.
|
|||
|
|
|||
|
ClearToken();
|
|||
|
_token.Append(_currChar); // '<'
|
|||
|
_token.Append(ScanNextChar()); // '<'
|
|||
|
|
|||
|
bool inString = false, inHexString = false;
|
|||
|
int nestedDict = 0, nestedStringParen = 0;
|
|||
|
char ch;
|
|||
|
while (true)
|
|||
|
{
|
|||
|
_token.Append(ch = ScanNextChar());
|
|||
|
if (ch == '<')
|
|||
|
{
|
|||
|
if (_nextChar == '<')
|
|||
|
{
|
|||
|
_token.Append(ScanNextChar());
|
|||
|
++nestedDict;
|
|||
|
}
|
|||
|
else
|
|||
|
inHexString = true;
|
|||
|
}
|
|||
|
else if (!inHexString && ch == '(')
|
|||
|
{
|
|||
|
if (inString)
|
|||
|
++nestedStringParen;
|
|||
|
else
|
|||
|
{
|
|||
|
inString = true;
|
|||
|
nestedStringParen = 0;
|
|||
|
}
|
|||
|
}
|
|||
|
else if (inString && ch == ')')
|
|||
|
{
|
|||
|
if (nestedStringParen > 0)
|
|||
|
--nestedStringParen;
|
|||
|
else
|
|||
|
inString = false;
|
|||
|
}
|
|||
|
else if (inString && ch == '\\')
|
|||
|
_token.Append(ScanNextChar());
|
|||
|
else if (ch == '>')
|
|||
|
{
|
|||
|
if (inHexString)
|
|||
|
inHexString = false;
|
|||
|
else if (_nextChar == '>')
|
|||
|
{
|
|||
|
_token.Append(ScanNextChar());
|
|||
|
if (nestedDict > 0)
|
|||
|
--nestedDict;
|
|||
|
else
|
|||
|
{
|
|||
|
ScanNextChar();
|
|||
|
|
|||
|
#if true
|
|||
|
return CSymbol.Dictionary;
|
|||
|
#else
|
|||
|
return CSymbol.String;
|
|||
|
#endif
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
else if (ch == Chars.EOF)
|
|||
|
ContentReaderDiagnostics.HandleUnexpectedCharacter(ch);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Scans an integer or real number.
|
|||
|
/// </summary>
|
|||
|
public CSymbol ScanNumber()
|
|||
|
{
|
|||
|
long value = 0;
|
|||
|
int decimalDigits = 0;
|
|||
|
bool period = false;
|
|||
|
bool negative = false;
|
|||
|
|
|||
|
ClearToken();
|
|||
|
char ch = _currChar;
|
|||
|
if (ch == '+' || ch == '-')
|
|||
|
{
|
|||
|
if (ch == '-')
|
|||
|
negative = true;
|
|||
|
_token.Append(ch);
|
|||
|
ch = ScanNextChar();
|
|||
|
}
|
|||
|
while (true)
|
|||
|
{
|
|||
|
if (char.IsDigit(ch))
|
|||
|
{
|
|||
|
_token.Append(ch);
|
|||
|
if (decimalDigits < 10)
|
|||
|
{
|
|||
|
value = 10 * value + ch - '0';
|
|||
|
if (period)
|
|||
|
decimalDigits++;
|
|||
|
}
|
|||
|
}
|
|||
|
else if (ch == '.')
|
|||
|
{
|
|||
|
if (period)
|
|||
|
ContentReaderDiagnostics.ThrowContentReaderException("More than one period in number.");
|
|||
|
|
|||
|
period = true;
|
|||
|
_token.Append(ch);
|
|||
|
}
|
|||
|
else
|
|||
|
break;
|
|||
|
ch = ScanNextChar();
|
|||
|
}
|
|||
|
|
|||
|
if (negative)
|
|||
|
value = -value;
|
|||
|
if (period)
|
|||
|
{
|
|||
|
if (decimalDigits > 0)
|
|||
|
{
|
|||
|
_tokenAsReal = value / PowersOf10[decimalDigits];
|
|||
|
//_tokenAsLong = value / PowersOf10[decimalDigits];
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
_tokenAsReal = value;
|
|||
|
_tokenAsLong = value;
|
|||
|
}
|
|||
|
return CSymbol.Real;
|
|||
|
}
|
|||
|
_tokenAsLong = value;
|
|||
|
_tokenAsReal = Convert.ToDouble(value);
|
|||
|
|
|||
|
Debug.Assert(Int64.Parse(_token.ToString(), CultureInfo.InvariantCulture) == value);
|
|||
|
|
|||
|
if (value >= Int32.MinValue && value < Int32.MaxValue)
|
|||
|
return CSymbol.Integer;
|
|||
|
|
|||
|
ContentReaderDiagnostics.ThrowNumberOutOfIntegerRange(value);
|
|||
|
return CSymbol.Error;
|
|||
|
}
|
|||
|
static readonly double[] PowersOf10 = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000 };
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Scans an operator.
|
|||
|
/// </summary>
|
|||
|
public CSymbol ScanOperator()
|
|||
|
{
|
|||
|
ClearToken();
|
|||
|
char ch = _currChar;
|
|||
|
// Scan token
|
|||
|
while (IsOperatorChar(ch))
|
|||
|
ch = AppendAndScanNextChar();
|
|||
|
|
|||
|
return _symbol = CSymbol.Operator;
|
|||
|
}
|
|||
|
|
|||
|
// TODO
|
|||
|
public CSymbol ScanLiteralString()
|
|||
|
{
|
|||
|
Debug.Assert(_currChar == Chars.ParenLeft);
|
|||
|
|
|||
|
ClearToken();
|
|||
|
int parenLevel = 0;
|
|||
|
char ch = ScanNextChar();
|
|||
|
// Test UNICODE string
|
|||
|
if (ch == '\xFE' && _nextChar == '\xFF')
|
|||
|
{
|
|||
|
// I'm not sure if the code is correct in any case.
|
|||
|
// ? Can a UNICODE character not start with ')' as hibyte
|
|||
|
// ? What about \# escape sequences
|
|||
|
ScanNextChar();
|
|||
|
char chHi = ScanNextChar();
|
|||
|
if (chHi == ')')
|
|||
|
{
|
|||
|
// The empty unicode string...
|
|||
|
ScanNextChar();
|
|||
|
return _symbol = CSymbol.String;
|
|||
|
}
|
|||
|
char chLo = ScanNextChar();
|
|||
|
ch = (char)(chHi * 256 + chLo);
|
|||
|
while (true)
|
|||
|
{
|
|||
|
SkipChar:
|
|||
|
switch (ch)
|
|||
|
{
|
|||
|
case '(':
|
|||
|
parenLevel++;
|
|||
|
break;
|
|||
|
|
|||
|
case ')':
|
|||
|
if (parenLevel == 0)
|
|||
|
{
|
|||
|
ScanNextChar();
|
|||
|
return _symbol = CSymbol.String;
|
|||
|
}
|
|||
|
parenLevel--;
|
|||
|
break;
|
|||
|
|
|||
|
case '\\':
|
|||
|
{
|
|||
|
// TODO: not sure that this is correct...
|
|||
|
ch = ScanNextChar();
|
|||
|
switch (ch)
|
|||
|
{
|
|||
|
case 'n':
|
|||
|
ch = Chars.LF;
|
|||
|
break;
|
|||
|
|
|||
|
case 'r':
|
|||
|
ch = Chars.CR;
|
|||
|
break;
|
|||
|
|
|||
|
case 't':
|
|||
|
ch = Chars.HT;
|
|||
|
break;
|
|||
|
|
|||
|
case 'b':
|
|||
|
ch = Chars.BS;
|
|||
|
break;
|
|||
|
|
|||
|
case 'f':
|
|||
|
ch = Chars.FF;
|
|||
|
break;
|
|||
|
|
|||
|
case '(':
|
|||
|
ch = Chars.ParenLeft;
|
|||
|
break;
|
|||
|
|
|||
|
case ')':
|
|||
|
ch = Chars.ParenRight;
|
|||
|
break;
|
|||
|
|
|||
|
case '\\':
|
|||
|
ch = Chars.BackSlash;
|
|||
|
break;
|
|||
|
|
|||
|
case Chars.LF:
|
|||
|
ch = ScanNextChar();
|
|||
|
goto SkipChar;
|
|||
|
|
|||
|
default:
|
|||
|
if (char.IsDigit(ch))
|
|||
|
{
|
|||
|
// Octal character code
|
|||
|
int n = ch - '0';
|
|||
|
if (char.IsDigit(_nextChar))
|
|||
|
{
|
|||
|
n = n * 8 + ScanNextChar() - '0';
|
|||
|
if (char.IsDigit(_nextChar))
|
|||
|
n = n * 8 + ScanNextChar() - '0';
|
|||
|
}
|
|||
|
ch = (char)n;
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
//case '#':
|
|||
|
// ContentReaderDiagnostics.HandleUnexpectedCharacter('#');
|
|||
|
// break;
|
|||
|
|
|||
|
default:
|
|||
|
// Every other char is appended to the token.
|
|||
|
break;
|
|||
|
}
|
|||
|
_token.Append(ch);
|
|||
|
chHi = ScanNextChar();
|
|||
|
if (chHi == ')')
|
|||
|
{
|
|||
|
ScanNextChar();
|
|||
|
return _symbol = CSymbol.String;
|
|||
|
}
|
|||
|
chLo = ScanNextChar();
|
|||
|
ch = (char)(chHi * 256 + chLo);
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
// 8-bit characters
|
|||
|
while (true)
|
|||
|
{
|
|||
|
SkipChar:
|
|||
|
switch (ch)
|
|||
|
{
|
|||
|
case '(':
|
|||
|
parenLevel++;
|
|||
|
break;
|
|||
|
|
|||
|
case ')':
|
|||
|
if (parenLevel == 0)
|
|||
|
{
|
|||
|
ScanNextChar();
|
|||
|
return _symbol = CSymbol.String;
|
|||
|
}
|
|||
|
parenLevel--;
|
|||
|
break;
|
|||
|
|
|||
|
case '\\':
|
|||
|
{
|
|||
|
ch = ScanNextChar();
|
|||
|
switch (ch)
|
|||
|
{
|
|||
|
case 'n':
|
|||
|
ch = Chars.LF;
|
|||
|
break;
|
|||
|
|
|||
|
case 'r':
|
|||
|
ch = Chars.CR;
|
|||
|
break;
|
|||
|
|
|||
|
case 't':
|
|||
|
ch = Chars.HT;
|
|||
|
break;
|
|||
|
|
|||
|
case 'b':
|
|||
|
ch = Chars.BS;
|
|||
|
break;
|
|||
|
|
|||
|
case 'f':
|
|||
|
ch = Chars.FF;
|
|||
|
break;
|
|||
|
|
|||
|
case '(':
|
|||
|
ch = Chars.ParenLeft;
|
|||
|
break;
|
|||
|
|
|||
|
case ')':
|
|||
|
ch = Chars.ParenRight;
|
|||
|
break;
|
|||
|
|
|||
|
case '\\':
|
|||
|
ch = Chars.BackSlash;
|
|||
|
break;
|
|||
|
|
|||
|
case Chars.LF:
|
|||
|
ch = ScanNextChar();
|
|||
|
goto SkipChar;
|
|||
|
|
|||
|
default:
|
|||
|
if (char.IsDigit(ch))
|
|||
|
{
|
|||
|
// Octal character code.
|
|||
|
int n = ch - '0';
|
|||
|
if (char.IsDigit(_nextChar))
|
|||
|
{
|
|||
|
n = n * 8 + ScanNextChar() - '0';
|
|||
|
if (char.IsDigit(_nextChar))
|
|||
|
n = n * 8 + ScanNextChar() - '0';
|
|||
|
}
|
|||
|
ch = (char)n;
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
//case '#':
|
|||
|
// ContentReaderDiagnostics.HandleUnexpectedCharacter('#');
|
|||
|
// break;
|
|||
|
|
|||
|
default:
|
|||
|
// Every other char is appended to the token.
|
|||
|
break;
|
|||
|
}
|
|||
|
_token.Append(ch);
|
|||
|
//token.Append(Encoding.GetEncoding(1252).GetString(new byte[] { (byte)ch }));
|
|||
|
ch = ScanNextChar();
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// TODO
|
|||
|
public CSymbol ScanHexadecimalString()
|
|||
|
{
|
|||
|
Debug.Assert(_currChar == Chars.Less);
|
|||
|
|
|||
|
ClearToken();
|
|||
|
char[] hex = new char[2];
|
|||
|
ScanNextChar();
|
|||
|
while (true)
|
|||
|
{
|
|||
|
MoveToNonWhiteSpace();
|
|||
|
if (_currChar == '>')
|
|||
|
{
|
|||
|
ScanNextChar();
|
|||
|
break;
|
|||
|
}
|
|||
|
if (char.IsLetterOrDigit(_currChar))
|
|||
|
{
|
|||
|
hex[0] = char.ToUpper(_currChar);
|
|||
|
hex[1] = char.ToUpper(_nextChar);
|
|||
|
int ch = int.Parse(new string(hex), NumberStyles.AllowHexSpecifier);
|
|||
|
_token.Append(Convert.ToChar(ch));
|
|||
|
ScanNextChar();
|
|||
|
ScanNextChar();
|
|||
|
}
|
|||
|
}
|
|||
|
string chars = _token.ToString();
|
|||
|
int count = chars.Length;
|
|||
|
if (count > 2 && chars[0] == (char)0xFE && chars[1] == (char)0xFF)
|
|||
|
{
|
|||
|
Debug.Assert(count % 2 == 0);
|
|||
|
_token.Length = 0;
|
|||
|
for (int idx = 2; idx < count; idx += 2)
|
|||
|
_token.Append((char)(chars[idx] * 256 + chars[idx + 1]));
|
|||
|
}
|
|||
|
return _symbol = CSymbol.HexString;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Move current position one character further in content stream.
|
|||
|
/// </summary>
|
|||
|
internal char ScanNextChar()
|
|||
|
{
|
|||
|
if (ContLength <= _charIndex)
|
|||
|
{
|
|||
|
_currChar = Chars.EOF;
|
|||
|
if (IsOperatorChar(_nextChar))
|
|||
|
_token.Append(_nextChar);
|
|||
|
_nextChar = Chars.EOF;
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
_currChar = _nextChar;
|
|||
|
_nextChar = (char)_content[_charIndex++];
|
|||
|
if (_currChar == Chars.CR)
|
|||
|
{
|
|||
|
if (_nextChar == Chars.LF)
|
|||
|
{
|
|||
|
// Treat CR LF as LF
|
|||
|
_currChar = _nextChar;
|
|||
|
if (ContLength <= _charIndex)
|
|||
|
_nextChar = Chars.EOF;
|
|||
|
else
|
|||
|
_nextChar = (char)_content[_charIndex++];
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
// Treat single CR as LF
|
|||
|
_currChar = Chars.LF;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
return _currChar;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Resets the current token to the empty string.
|
|||
|
/// </summary>
|
|||
|
void ClearToken()
|
|||
|
{
|
|||
|
_token.Length = 0;
|
|||
|
_tokenAsLong = 0;
|
|||
|
_tokenAsReal = 0;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Appends current character to the token and reads next one.
|
|||
|
/// </summary>
|
|||
|
internal char AppendAndScanNextChar()
|
|||
|
{
|
|||
|
_token.Append(_currChar);
|
|||
|
return ScanNextChar();
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// If the current character is not a white space, the function immediately returns it.
|
|||
|
/// Otherwise the PDF cursor is moved forward to the first non-white space or EOF.
|
|||
|
/// White spaces are NUL, HT, LF, FF, CR, and SP.
|
|||
|
/// </summary>
|
|||
|
public char MoveToNonWhiteSpace()
|
|||
|
{
|
|||
|
while (_currChar != Chars.EOF)
|
|||
|
{
|
|||
|
switch (_currChar)
|
|||
|
{
|
|||
|
case Chars.NUL:
|
|||
|
case Chars.HT:
|
|||
|
case Chars.LF:
|
|||
|
case Chars.FF:
|
|||
|
case Chars.CR:
|
|||
|
case Chars.SP:
|
|||
|
ScanNextChar();
|
|||
|
break;
|
|||
|
|
|||
|
default:
|
|||
|
return _currChar;
|
|||
|
}
|
|||
|
}
|
|||
|
return _currChar;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Gets or sets the current symbol.
|
|||
|
/// </summary>
|
|||
|
public CSymbol Symbol
|
|||
|
{
|
|||
|
get { return _symbol; }
|
|||
|
set { _symbol = value; }
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Gets the current token.
|
|||
|
/// </summary>
|
|||
|
public string Token
|
|||
|
{
|
|||
|
get { return _token.ToString(); }
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Interprets current token as integer literal.
|
|||
|
/// </summary>
|
|||
|
internal int TokenToInteger
|
|||
|
{
|
|||
|
get
|
|||
|
{
|
|||
|
Debug.Assert(_tokenAsLong == int.Parse(_token.ToString(), CultureInfo.InvariantCulture));
|
|||
|
return (int)_tokenAsLong;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Interpret current token as real or integer literal.
|
|||
|
/// </summary>
|
|||
|
internal double TokenToReal
|
|||
|
{
|
|||
|
get
|
|||
|
{
|
|||
|
// ReSharper disable once CompareOfFloatsByEqualityOperator
|
|||
|
Debug.Assert(_tokenAsReal == double.Parse(_token.ToString(), CultureInfo.InvariantCulture));
|
|||
|
return _tokenAsReal;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Indicates whether the specified character is a content stream white-space character.
|
|||
|
/// </summary>
|
|||
|
internal static bool IsWhiteSpace(char ch)
|
|||
|
{
|
|||
|
switch (ch)
|
|||
|
{
|
|||
|
case Chars.NUL: // 0 Null
|
|||
|
case Chars.HT: // 9 Tab
|
|||
|
case Chars.LF: // 10 Line feed
|
|||
|
case Chars.FF: // 12 Form feed
|
|||
|
case Chars.CR: // 13 Carriage return
|
|||
|
case Chars.SP: // 32 Space
|
|||
|
return true;
|
|||
|
}
|
|||
|
return false;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Indicates whether the specified character is an content operator character.
|
|||
|
/// </summary>
|
|||
|
internal static bool IsOperatorChar(char ch)
|
|||
|
{
|
|||
|
if (char.IsLetter(ch))
|
|||
|
return true;
|
|||
|
switch (ch)
|
|||
|
{
|
|||
|
case Chars.Asterisk: // *
|
|||
|
case Chars.QuoteSingle: // '
|
|||
|
case Chars.QuoteDbl: // "
|
|||
|
return true;
|
|||
|
}
|
|||
|
return false;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Indicates whether the specified character is a PDF delimiter character.
|
|||
|
/// </summary>
|
|||
|
internal static bool IsDelimiter(char ch)
|
|||
|
{
|
|||
|
switch (ch)
|
|||
|
{
|
|||
|
case '(':
|
|||
|
case ')':
|
|||
|
case '<':
|
|||
|
case '>':
|
|||
|
case '[':
|
|||
|
case ']':
|
|||
|
//case '{':
|
|||
|
//case '}':
|
|||
|
case '/':
|
|||
|
case '%':
|
|||
|
return true;
|
|||
|
}
|
|||
|
return false;
|
|||
|
}
|
|||
|
|
|||
|
/// <summary>
|
|||
|
/// Gets the length of the content.
|
|||
|
/// </summary>
|
|||
|
public int ContLength
|
|||
|
{
|
|||
|
get { return _content.Length; }
|
|||
|
}
|
|||
|
|
|||
|
// ad
|
|||
|
public int Position
|
|||
|
{
|
|||
|
get { return _charIndex; }
|
|||
|
set
|
|||
|
{
|
|||
|
_charIndex = value;
|
|||
|
_currChar = (char)_content[_charIndex - 1];
|
|||
|
_nextChar = (char)_content[_charIndex - 1];
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
readonly byte[] _content;
|
|||
|
int _charIndex;
|
|||
|
char _currChar;
|
|||
|
char _nextChar;
|
|||
|
|
|||
|
readonly StringBuilder _token = new StringBuilder();
|
|||
|
long _tokenAsLong;
|
|||
|
double _tokenAsReal;
|
|||
|
CSymbol _symbol = CSymbol.None;
|
|||
|
}
|
|||
|
}
|