ASCU_ALL/PrintPDF/MigraDoc.DocumentObjectModel/DocumentObjectModel.IO/DdlScanner.cs
2021-05-25 17:00:45 +05:00

1566 lines
48 KiB
C#

#region MigraDoc - Creating Documents on the Fly
//
// Authors:
// Stefan Lange
// Klaus Potzesny
// David Stephensen
//
// Copyright (c) 2001-2017 empira Software GmbH, Cologne Area (Germany)
//
// http://www.pdfsharp.com
// http://www.migradoc.com
// http://sourceforge.net/projects/pdfsharp
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
#endregion
using System;
using System.Diagnostics;
using System.Globalization;
using System.Text;
/*
ddl = <document> | <empty>
table-element:
\table «attributes»opt { «columns-element» «rows-element» }
table-element:
\table «attributes»opt { «columns-element» «rows-element» }
*/
namespace MigraDoc.DocumentObjectModel.IO
{
/// <summary>
/// DdlScanner
/// </summary>
public class DdlScanner
{
/// <summary>
/// Initializes a new instance of the DdlScanner class.
/// </summary>
public DdlScanner(string documentFileName, string ddl, DdlReaderErrors errors)
{
_errors = errors;
Init(ddl, documentFileName);
}
/// <summary>
/// Initializes a new instance of the DdlScanner class.
/// </summary>
public DdlScanner(string ddl, DdlReaderErrors errors)
: this("", ddl, errors)
{ }
/// <summary>
/// Initializes all members and prepares the scanner.
/// </summary>
public bool Init(string document, string documentFileName)
{
_documentPath = documentFileName;
_strDocument = document;
_ddlLength = _strDocument.Length;
_idx = 0;
_idxLine = 1;
_idxLinePos = 0;
_documentFileName = documentFileName;
_nCurDocumentIndex = _idx;
_nCurDocumentLine = _idxLine;
_nCurDocumentLinePos = _idxLinePos;
ScanNextChar();
return true;
}
/// <summary>
/// Reads to the next DDL token. Comments are ignored.
/// </summary>
/// <returns>
/// Returns the current symbol.
/// It is Symbol.Eof if the end of the DDL string is reached.
/// </returns>
public Symbol ReadCode()
{
Again:
_symbol = Symbol.None;
TokenType = TokenType.None;
_token = "";
MoveToNonWhiteSpace();
SaveCurDocumentPos();
if (_currChar == Chars.Null)
{
_symbol = Symbol.Eof;
return Symbol.Eof;
}
if (IsIdentifierChar(_currChar, true))
{
// Token is identifier.
_symbol = ScanIdentifier();
TokenType = TokenType.Identifier;
// Some keywords do not start with a backslash: true, false, and null.
Symbol sym = KeyWords.SymbolFromName(_token);
if (sym != Symbol.None)
{
_symbol = sym;
TokenType = TokenType.KeyWord;
}
}
else if (_currChar == '"')
{
// Token is string literal.
_token += ScanStringLiteral();
_symbol = Symbol.StringLiteral;
TokenType = TokenType.StringLiteral;
}
//NYI: else if (IsNumber())
// symbol = ScanNumber(false);
else if (IsDigit(_currChar) ||
_currChar == '-' && IsDigit(_nextChar) ||
_currChar == '+' && IsDigit(_nextChar))
{
// Token is number literal.
_symbol = ScanNumber(false);
TokenType = _symbol == Symbol.RealLiteral ? TokenType.RealLiteral : TokenType.IntegerLiteral;
}
else if (_currChar == '.' && IsDigit(_nextChar))
{
// Token is real literal.
_symbol = ScanNumber(true);
TokenType = TokenType.RealLiteral;
}
else if (_currChar == '\\')
{
// Token is keyword.
_token = "\\";
_symbol = ScanKeyword();
TokenType = _symbol != Symbol.None ? TokenType.KeyWord : TokenType.None;
}
else if (_currChar == '/' && _nextChar == '/')
{
// Token is comment. In code comments are ignored.
ScanSingleLineComment();
goto Again;
}
else if (_currChar == '@' && _nextChar == '"')
{
// Token is verbatim string literal.
ScanNextChar();
_token += ScanVerbatimStringLiteral();
_symbol = Symbol.StringLiteral;
TokenType = _symbol != Symbol.None ? TokenType.StringLiteral : TokenType.None;
}
else
{
// Punctuator or syntax error.
_symbol = ScanPunctuator();
}
return _symbol;
}
/// <summary>
/// Gets the next keyword at the current position without touching the DDL cursor.
/// </summary>
public Symbol PeekKeyword()
{
Debug.Assert(_currChar == Chars.BackSlash);
return PeekKeyword(_idx);
}
/// <summary>
/// Gets the next keyword without touching the DDL cursor.
/// </summary>
public Symbol PeekKeyword(int index)
{
// Check special keywords
switch (_strDocument[index])
{
case '{':
case '}':
case '\\':
case '-':
case '(':
return Symbol.Character;
}
string token = "\\";
int idx = index;
int length = _ddlLength - idx;
while (length > 0)
{
char ch = _strDocument[idx++];
if (IsLetter(ch))
{
token += ch;
length--;
}
else
break;
}
return KeyWords.SymbolFromName(token);
}
/// <summary>
/// Gets the next punctuator terminal symbol without touching the DDL cursor.
/// </summary>
protected Symbol PeekPunctuator(int index)
{
Symbol sym = Symbol.None;
char ch = _strDocument[index];
switch (ch)
{
case '{':
sym = Symbol.BraceLeft;
break;
case '}':
sym = Symbol.BraceRight;
break;
case '[':
sym = Symbol.BracketLeft;
break;
case ']':
sym = Symbol.BracketRight;
break;
case '(':
sym = Symbol.ParenLeft;
break;
case ')':
sym = Symbol.ParenRight;
break;
case ':':
sym = Symbol.Colon;
break;
case ';':
sym = Symbol.Semicolon;
break;
case '.':
sym = Symbol.Dot;
break;
case ',':
sym = Symbol.Comma;
break;
case '%':
sym = Symbol.Percent;
break;
case '$':
sym = Symbol.Dollar;
break;
case '@':
sym = Symbol.At;
break;
case '#':
sym = Symbol.Hash;
break;
//case '?':
// sym = Symbol.Question;
// break;
case '¤':
sym = Symbol.Currency; //??? used in DDL?
break;
//case '|':
// sym = Symbol.Bar;
// break;
case '=':
sym = Symbol.Assign;
break;
case '/':
sym = Symbol.Slash;
break;
case '\\':
sym = Symbol.BackSlash;
break;
case '+':
if (_ddlLength >= index + 1 && _strDocument[index + 1] == '=')
sym = Symbol.PlusAssign;
else
sym = Symbol.Plus;
break;
case '-':
if (_ddlLength >= index + 1 && _strDocument[index + 1] == '=')
sym = Symbol.MinusAssign;
else
sym = Symbol.Minus;
break;
case Chars.CR:
sym = Symbol.CR;
break;
case Chars.LF:
sym = Symbol.LF;
break;
case Chars.Space:
sym = Symbol.Blank;
break;
case Chars.Null:
sym = Symbol.Eof;
break;
}
return sym;
}
/// <summary>
/// Gets the next symbol without touching the DDL cursor.
/// </summary>
public Symbol PeekSymbol()
{
int idx = _idx - 1;
int length = _ddlLength - idx;
// Move to first non whitespace
char ch = char.MinValue;
while (length > 0)
{
ch = _strDocument[idx++];
if (!IsWhiteSpace(ch))
break;
length--;
}
if (IsLetter(ch))
return Symbol.Text;
if (ch == '\\')
return PeekKeyword(idx);
return PeekPunctuator(idx - 1);
}
/// <summary>
/// Reads either text or \keyword from current position.
/// </summary>
public Symbol ReadText(bool rootLevel)
{
// Previous call encountered an empty line.
if (_emptyLine)
{
_emptyLine = false;
_symbol = Symbol.EmptyLine;
TokenType = TokenType.None;
_token = "";
return Symbol.EmptyLine;
}
// Init for scanning.
_prevSymbol = _symbol;
_symbol = Symbol.None;
TokenType = TokenType.None;
_token = "";
// Save where we are
SaveCurDocumentPos();
// Check for EOF.
if (_currChar == Chars.Null)
{
_symbol = Symbol.Eof;
return Symbol.Eof;
}
// Check for keyword or escaped character.
if (_currChar == '\\')
{
switch (_nextChar)
{
case '\\':
case '{':
case '}':
case '/':
case '-':
return ReadPlainText(rootLevel);
}
// Either key word or syntax error.
_token = "\\";
return ScanKeyword();
}
// Check for reserved terminal symbols in text.
switch (_currChar)
{
case '{':
AppendAndScanNextChar();
_symbol = Symbol.BraceLeft;
TokenType = TokenType.OperatorOrPunctuator;
return Symbol.BraceLeft; // Syntax error in any case.
case '}':
AppendAndScanNextChar();
_symbol = Symbol.BraceRight;
TokenType = TokenType.OperatorOrPunctuator;
return Symbol.BraceRight;
}
// Check for end of line.
if (_currChar == Chars.LF)
{
// The line ends here. See if the paragraph continues in the next line.
if (MoveToNextParagraphContentLine(rootLevel))
{
// Paragraph continues in next line. Simulate the read of a blank to separate words.
_token = " ";
if (IgnoreLineBreak())
_token = "";
_symbol = Symbol.Text;
return Symbol.Text;
}
else
{
// Paragraph ends here. Return NewLine or BraceRight.
if (_currChar != Chars.BraceRight)
{
_symbol = Symbol.EmptyLine;
TokenType = TokenType.None; //???
return Symbol.EmptyLine;
}
else
{
AppendAndScanNextChar();
_symbol = Symbol.BraceRight;
TokenType = TokenType.OperatorOrPunctuator;
return Symbol.BraceRight;
}
}
}
return ReadPlainText(rootLevel);
}
/// <summary>
/// Returns whether the linebreak should be ignored, because the previous symbol is already a whitespace.
/// </summary>
bool IgnoreLineBreak()
{
switch (_prevSymbol)
{
case Symbol.LineBreak:
case Symbol.Space:
case Symbol.Tab:
return true;
}
return false;
}
/// <summary>
/// Read text from current position until block ends or \keyword occurs.
/// </summary>
Symbol ReadPlainText(bool rootLevel)
{
bool foundSpace = false;
bool loop = true;
while (loop && _currChar != Chars.Null)
{
// Check for escaped character or keyword.
if (_currChar == '\\')
{
switch (_nextChar)
{
case '\\':
case '{':
case '}':
case '/':
ScanNextChar();
AppendAndScanNextChar();
break;
case '-':
// Treat \- as soft hyphen.
ScanNextChar();
// Fake soft hyphen and go on as usual.
_currChar = Chars.SoftHyphen;
break;
// Keyword
default:
loop = false;
break;
}
continue;
}
// Check for reserved terminal symbols in text
switch (_currChar)
{
case '{':
// Syntax error any way
loop = false;
continue;
case '}':
// Block end
loop = false;
continue;
case '/':
if (_nextChar != '/')
goto ValidCharacter;
ScanToEol();
break;
}
// Check for end of line.
if (_currChar == Chars.LF)
{
// The line ends here. See if the paragraph continues in the next line.
if (MoveToNextParagraphContentLine(rootLevel))
{
// Paragraph continues in next line. Add a blank to separate words.
if (!_token.EndsWith(" "))
_token += ' ';
continue;
}
else
{
// Paragraph ends here. Remember that for next call except the reason
// for end is '}'
_emptyLine = _currChar != Chars.BraceRight;
break;
}
}
ValidCharacter:
// Compress multiple blanks to one
if (_currChar == ' ')
{
if (foundSpace)
{
ScanNextChar();
continue;
}
foundSpace = true;
}
else
foundSpace = false;
AppendAndScanNextChar();
}
_symbol = Symbol.Text;
TokenType = TokenType.Text;
return Symbol.Text;
}
/// <summary>
/// Moves to the next DDL token if Symbol is not set to a valid position.
/// </summary>
public Symbol MoveToCode()
{
if (_symbol == Symbol.None || _symbol == Symbol.CR /*|| this .symbol == Symbol.comment*/)
ReadCode();
return _symbol;
}
/// <summary>
/// Moves to the first character the content of a paragraph starts with. Empty lines
/// and comments are skipped. Returns true if such a character exists, and false if the
/// paragraph ends without content.
/// </summary>
public bool MoveToParagraphContent()
{
Again:
MoveToNonWhiteSpace();
if (_currChar == Chars.Slash && _nextChar == Chars.Slash)
{
MoveBeyondEol();
goto Again;
}
return _currChar != Chars.BraceRight;
}
/// <summary>
/// Moves to the first character of the content of a paragraph beyond an EOL.
/// Returns true if such a character exists and belongs to the current paragraph.
/// Returns false if a new line (at root level) or '}' occurs. If a new line caused
/// the end of the paragraph, the DDL cursor is moved to the next valid content
/// character or '}' respectively.
/// </summary>
public bool MoveToNextParagraphContentLine(bool rootLevel)
{
Debug.Assert(_currChar == Chars.LF);
bool loop = true;
ScanNextChar();
while (loop)
{
// Scan to next EOL and ignore any white space.
MoveToNonWhiteSpaceOrEol();
switch (_currChar)
{
case Chars.Null:
loop = false;
break;
case Chars.LF:
ScanNextChar(); // read beyond EOL
if (rootLevel)
{
// At nesting level 0 (root level) a new line ends the paragraph content.
// Move to next content block or '}' respectively.
MoveToParagraphContent();
return false;
}
else
{
// Skip new lines at the end of the paragraph.
if (PeekSymbol() == Symbol.BraceRight)
{
MoveToNonWhiteSpace();
return false;
}
//TODO NiSc
//NYI
//Check.NotImplemented("empty line at non-root level");
}
break;
case Chars.Slash:
if (_nextChar == Chars.Slash)
{
// A line with comment is not treated as empty.
// Skip this line.
MoveBeyondEol();
}
else
{
// Current character is a slash.
return true;
}
break;
case Chars.BraceRight:
return false;
default:
return true;
}
}
return false;
}
/// <summary>
/// If the current character is not a white space, the function immediately returns it.
/// Otherwise the DDL cursor is moved forward to the first non-white space or EOF.
/// White spaces are SPACE, HT, VT, CR, and LF.???
/// </summary>
public char MoveToNonWhiteSpaceOrEol()
{
while (_currChar != Chars.Null)
{
switch (_currChar)
{
case Chars.Space:
case Chars.HT:
case Chars.VT:
ScanNextChar();
break;
default:
return _currChar;
}
}
return _currChar;
}
/// <summary>
/// If the current character is not a white space, the function immediately returns it.
/// Otherwise the DDL cursor is moved forward to the first non-white space or EOF.
/// White spaces are SPACE, HT, VT, CR, and LF.
/// </summary>
public char MoveToNonWhiteSpace()
{
while (_currChar != Chars.Null)
{
switch (_currChar)
{
case Chars.Space:
case Chars.HT:
case Chars.VT:
case Chars.CR:
case Chars.LF:
ScanNextChar();
break;
default:
return _currChar;
}
}
return _currChar;
}
/// <summary>
/// Moves to the first character beyond the next EOL.
/// </summary>
public void MoveBeyondEol()
{
// Similar to ScanSingleLineComment but do not scan the token.
ScanNextChar();
while (_currChar != Chars.Null && _currChar != Chars.LF)
ScanNextChar();
ScanNextChar(); // read beyond EOL
}
/// <summary>
/// Reads a single line comment.
/// </summary>
public Symbol ScanSingleLineComment()
{
char ch = ScanNextChar();
while (ch != Chars.Null && ch != Chars.LF)
{
_token += _currChar;
ch = ScanNextChar();
}
ScanNextChar(); // read beyond EOL
return Symbol.Comment;
}
/// <summary>
/// Gets the current symbol.
/// </summary>
public Symbol Symbol
{
get { return _symbol; }
}
/// <summary>
/// Gets the current token type.
/// </summary>
public TokenType TokenType { get; private set; } = TokenType.None;
/// <summary>
/// Gets the current token.
/// </summary>
public string Token
{
get { return _token; }
}
/// <summary>
/// Interpret current token as integer literal.
/// </summary>
/// <returns></returns>
public int GetTokenValueAsInt()
{
if (_symbol == Symbol.IntegerLiteral)
return Int32.Parse(_token, CultureInfo.InvariantCulture);
if (_symbol == Symbol.HexIntegerLiteral)
{
string number = _token.Substring(2);
return Int32.Parse(number, NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
}
//TODO NiSc
//Check.Assert(false);
return 0;
}
/// <summary>
/// Interpret current token as unsigned integer literal.
/// </summary>
/// <returns></returns>
public uint GetTokenValueAsUInt()
{
if (_symbol == Symbol.IntegerLiteral)
return UInt32.Parse(_token, CultureInfo.InvariantCulture);
if (_symbol == Symbol.HexIntegerLiteral)
{
string number = _token.Substring(2);
return UInt32.Parse(number, NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
}
//TODO NiSc
//Check.Assert(false);
return 0;
}
/// <summary>
/// Interpret current token as real literal.
/// </summary>
/// <returns></returns>
public double GetTokenValueAsReal()
{
return Double.Parse(_token, CultureInfo.InvariantCulture);
}
/// <summary>
/// Gets the current character or EOF.
/// </summary>
public char Char
{
get { return _currChar; }
}
/// <summary>
/// Gets the character after the current character or EOF.
/// </summary>
public char NextChar
{
get { return _nextChar; }
}
/// <summary>
/// Move DDL cursor one character further.
/// </summary>
public char ScanNextChar()
{
if (_ddlLength <= _idx)
{
_currChar = Chars.Null;
_nextChar = Chars.Null;
}
else
{
SkipChar:
_currChar = _strDocument[_idx++];
_nextChar = _ddlLength <= _idx ? Chars.Null : _strDocument[_idx];
++_idxLinePos;
switch (_currChar)
{
case Chars.Null: //???
++_idxLine;
_idxLinePos = 0;
break;
// ignore CR
case Chars.CR:
if (_nextChar == Chars.LF)
{
goto SkipChar;
}
//else
//{
// //TODO NiSc
// //NYI: MacOS uses CR only
// //Check.NotImplemented();
//}
break;
case Chars.LF:
//NYI: Unix uses LF only
_idxLine++;
_idxLinePos = 0;
break;
}
}
return _currChar;
}
/// <summary>
/// Move DDL cursor to the next EOL (or EOF).
/// </summary>
public void ScanToEol()
{
while (!IsEof(_currChar) && _currChar != Chars.LF)
ScanNextChar();
}
/// <summary>
/// Appends current character to the token and reads next character.
/// </summary>
public char AppendAndScanNextChar()
{
_token += _currChar;
return ScanNextChar();
}
/// <summary>
/// Appends all next characters to current token until end of line or end of file is reached.
/// CR/LF or EOF is not part of the token.
/// </summary>
public void AppendAndScanToEol()
{
char ch = ScanNextChar();
while (ch != Chars.Null && ch != Chars.CR && ch != Chars.LF) //BUG Chars.Null == CharLF
{
_token += _currChar;
ch = ScanNextChar();
}
}
/// <summary>
/// Is character in '0' ... '9'.
/// </summary>
public static bool IsDigit(char ch)
{
return char.IsDigit(ch);
}
/// <summary>
/// Is character a hexadecimal digit.
/// </summary>
public static bool IsHexDigit(char ch)
{
return Char.IsDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f');
}
/// <summary>
/// Is character an octal digit.
/// </summary>
public static bool IsOctDigit(char ch)
{
return Char.IsDigit(ch) && ch < '8';
}
/// <summary>
/// Is character an alphabetic letter.
/// </summary>
public static bool IsLetter(char ch)
{
return Char.IsLetter(ch);
}
/// <summary>
/// Is character a white space.
/// </summary>
public static bool IsWhiteSpace(char ch)
{
return Char.IsWhiteSpace(ch);
}
/// <summary>
/// Is character an identifier character. First character can be letter or underscore, following
/// letters, digits or underscores.
/// </summary>
public static bool IsIdentifierChar(char ch, bool firstChar) //IsId..Char
{
if (firstChar)
return Char.IsLetter(ch) | ch == '_';
return Char.IsLetterOrDigit(ch) | ch == '_';
}
/// <summary>
/// Is character the end of file character.
/// </summary>
public static bool IsEof(char ch)
{
return ch == Chars.Null;
}
//public bool IsNumber();
//public bool IsFormat();
//public bool IsParagraphFormat(Symbol* _docSym /*= null*/);
//public bool IsField();
//public bool IsFieldSpecifier();
//public bool IsSymbol();
////bool IsSymbolSpecifier();
//public bool IsFootnote();
//public bool IsComment();
//public bool IsInlineShape();
//
//public bool IsValueSymbole();
//public bool IsScriptSymbole(Symbol _docSym);
//public bool IsParagraphToken();
//public bool IsExtendedParagraphToken();
//public bool IsParagraphElement();
//public bool IsHardHyphen();
//public bool IsNewLine();
//public bool IsWhiteSpace(Symbol _docSym);
/// <summary>
/// Determines whether the given symbol is a valid keyword for a document element.
/// </summary>
public static bool IsDocumentElement(Symbol symbol)
{
switch (symbol)
{
case Symbol.Paragraph:
case Symbol.Table:
case Symbol.Image:
case Symbol.TextFrame:
case Symbol.Chart:
case Symbol.PageBreak:
case Symbol.Barcode:
return true;
}
return false;
}
/// <summary>
/// Determines whether the given symbol is a valid keyword for a section element.
/// </summary>
public static bool IsSectionElement(Symbol symbol)
{
switch (symbol)
{
case Symbol.Paragraph:
case Symbol.Table:
case Symbol.Image:
case Symbol.TextFrame:
case Symbol.Chart:
case Symbol.PageBreak:
case Symbol.Barcode:
case Symbol.Header:
case Symbol.PrimaryHeader:
case Symbol.FirstPageHeader:
case Symbol.EvenPageHeader:
case Symbol.Footer:
case Symbol.PrimaryFooter:
case Symbol.FirstPageFooter:
case Symbol.EvenPageFooter:
return true;
}
return false;
}
/// <summary>
/// Determines whether the given symbol is a valid keyword for a paragraph element.
/// </summary>
public static bool IsParagraphElement(Symbol symbol)
{
switch (symbol)
{
case Symbol.Blank:
case Symbol.Bold:
case Symbol.Italic:
case Symbol.Underline:
case Symbol.Font:
case Symbol.FontColor:
case Symbol.FontSize:
case Symbol.Field:
case Symbol.Hyperlink:
case Symbol.Footnote:
case Symbol.Image:
case Symbol.Tab:
case Symbol.SoftHyphen:
case Symbol.Space:
case Symbol.Symbol:
case Symbol.Chr:
case Symbol.LineBreak:
case Symbol.Text:
return true;
}
return false;
}
/// <summary>
/// Determines whether the given symbol is a valid keyword for a header or footer element.
/// </summary>
public static bool IsHeaderFooterElement(Symbol symbol)
{
// All paragraph elements.
if (IsParagraphElement(symbol))
return true;
// All document elements except pagebreak.
if (IsDocumentElement(symbol))
{
if (symbol == Symbol.PageBreak)
return false;
return true;
}
return false;
}
/// <summary>
/// Determines whether the given symbol is a valid keyword for a footnote element.
/// </summary>
public static bool IsFootnoteElement(Symbol symbol)
{
// All paragraph elements except footnote.
if (IsParagraphElement(symbol))
{
if (symbol == Symbol.Footnote)
return false; // BUG: ??? RETURN TRUE
return true;
}
return false;
}
/// <summary>
/// Gets the current filename of the document.
/// </summary>
public string DocumentFileName
{
get { return _documentFileName; }
}
/// <summary>
/// Gets the current path of the document.
/// </summary>
public string DocumentPath
{
get { return _documentPath; }
}
/// <summary>
/// Gets the current scanner line in the document.
/// </summary>
public int CurrentLine
{
get { return _nCurDocumentLine; }
}
/// <summary>
/// Gets the current scanner column in the document.
/// </summary>
public int CurrentLinePos
{
get { return _nCurDocumentLinePos; }
}
/// <summary>
/// Scans an identifier.
/// </summary>
protected Symbol ScanIdentifier()
{
char ch = AppendAndScanNextChar();
while (IsIdentifierChar(ch, false))
ch = AppendAndScanNextChar();
return Symbol.Identifier;
}
/// <summary>
/// Scans an integer or real literal.
/// </summary>
protected Symbol ScanNumber(bool mantissa)
{
char ch = _currChar;
_token += _currChar;
ScanNextChar();
if (!mantissa && ch == '0' && (_currChar == 'x' || _currChar == 'X'))
return ReadHexNumber();
while (_currChar != Chars.Null)
{
if (IsDigit(_currChar))
AppendAndScanNextChar();
else if (!mantissa && _currChar == Chars.Period)
{
//token += currChar;
return ScanNumber(true);
}
else //if (!IsIdentifierChar(currChar))
break;
//else
// THROW_COMPILER_ERROR (COMPERR_LEX_NUMBER);
}
return mantissa ? Symbol.RealLiteral : Symbol.IntegerLiteral;
}
/// <summary>
/// Scans an hexadecimal literal.
/// </summary>
protected Symbol ReadHexNumber()
{
_token = "0x";
ScanNextChar();
while (_currChar != Chars.Null)
{
if (IsHexDigit(_currChar))
AppendAndScanNextChar();
else if (!IsIdentifierChar(_currChar, false)) //???
break;
else
//THROW_COMPILER_ERROR (COMPERR_LEX_NUMBER);
AppendAndScanNextChar();
}
return Symbol.HexIntegerLiteral;
}
/// <summary>
/// Scans a DDL keyword that starts with a backslash.
/// </summary>
Symbol ScanKeyword()
{
char ch = ScanNextChar();
// \- is a soft hyphen == char(173).
if (ch == '-')
{
_token += "-";
ScanNextChar();
return Symbol.SoftHyphen;
}
// \( is a short cut for symbol.
if (ch == '(')
{
_token += "(";
_symbol = Symbol.Chr;
return Symbol.Chr; // Short cut for \chr(
}
while (!IsEof(ch) && IsIdentifierChar(ch, false))
ch = AppendAndScanNextChar();
_symbol = KeyWords.SymbolFromName(_token);
return _symbol;
}
/// <summary>
/// Scans punctuator terminal symbols.
/// </summary>
protected Symbol ScanPunctuator()
{
Symbol sym = Symbol.None;
switch (_currChar)
{
case '{':
sym = Symbol.BraceLeft;
break;
case '}':
sym = Symbol.BraceRight;
break;
case '[':
sym = Symbol.BracketLeft;
break;
case ']':
sym = Symbol.BracketRight;
break;
case '(':
sym = Symbol.ParenLeft;
break;
case ')':
sym = Symbol.ParenRight;
break;
case ':':
sym = Symbol.Colon;
break;
case ';':
sym = Symbol.Semicolon;
break;
case '.':
sym = Symbol.Dot;
break;
case ',':
sym = Symbol.Comma;
break;
case '%':
sym = Symbol.Percent;
break;
case '$':
sym = Symbol.Dollar;
break;
case '@':
sym = Symbol.At;
break;
case '#':
sym = Symbol.Hash;
break;
//case '?':
// sym = Symbol.Question;
// break;
case '¤':
sym = Symbol.Currency; //??? used in DDL?
break;
//case '|':
// sym = Symbol.Bar;
// break;
case '=':
sym = Symbol.Assign;
break;
case '/':
sym = Symbol.Slash;
break;
case '\\':
sym = Symbol.BackSlash;
break;
case '+':
if (_nextChar == '=')
{
_token += _currChar;
ScanNextChar();
sym = Symbol.PlusAssign;
}
else
sym = Symbol.Plus;
break;
case '-':
if (_nextChar == '=')
{
_token += _currChar;
ScanNextChar();
sym = Symbol.MinusAssign;
}
else
sym = Symbol.Minus;
break;
case Chars.CR:
sym = Symbol.CR;
break;
case Chars.LF:
sym = Symbol.LF;
break;
case Chars.Space:
sym = Symbol.Blank;
break;
case Chars.Null:
sym = Symbol.Eof;
return sym;
}
_token += _currChar;
ScanNextChar();
return sym;
}
// protected Symbol ReadValueIdentifier();
///// <summary>
///// Scans string literals used as identifiers.
///// </summary>
///// <returns></returns>
//protected string ReadRawString() //ScanStringLiteralIdentifier
//{
// string str = "";
// char ch = ScanNextChar();
// while (!IsEof(ch))
// {
// if (ch == Chars.QuoteDbl)
// {
// if (nextChar == Chars.QuoteDbl)
// {
// str += ch;
// ch = ScanNextChar();
// }
// else
// break;
// }
//
// str += ch;
// ch = ScanNextChar();
// }
//
// ScanNextChar();
// return str;
//}
/// <summary>
/// Scans verbatim strings like «@"String with ""quoted"" text"».
/// </summary>
protected string ScanVerbatimStringLiteral()
{
string str = "";
char ch = ScanNextChar();
while (!IsEof(ch))
{
if (ch == Chars.QuoteDbl)
{
if (_nextChar == Chars.QuoteDbl)
ch = ScanNextChar();
else
break;
}
str += ch;
ch = ScanNextChar();
}
ScanNextChar();
return str;
}
/// <summary>
/// Scans regular string literals like «"String with \"escaped\" text"».
/// </summary>
protected string ScanStringLiteral()
{
Debug.Assert(Char == '\"');
StringBuilder str = new StringBuilder();
ScanNextChar();
while (_currChar != Chars.QuoteDbl && !IsEof(_currChar))
{
if (_currChar == '\\')
{
ScanNextChar(); // read escaped characters
switch (_currChar)
{
case 'a':
str.Append('\a');
break;
case 'b':
str.Append('\b');
break;
case 'f':
str.Append('\f');
break;
case 'n':
str.Append('\n');
break;
case 'r':
str.Append('\r');
break;
case 't':
str.Append('\t');
break;
case 'v':
str.Append('\v');
break;
case '\'':
str.Append('\'');
break;
case '\"':
str.Append('\"');
break;
case '\\':
str.Append('\\');
break;
case 'x':
{
ScanNextChar();
int hexNrCount = 0;
//string hexString = "0x";
while (IsHexDigit(_currChar))
{
++hexNrCount;
//hexString += _currChar;
ScanNextChar();
}
if (hexNrCount <= 2)
str.Append("?????"); //(char)AscULongFromHexString(hexString);
else
throw new DdlParserException(DdlErrorLevel.Error,
DomSR.GetString(DomMsgID.EscapeSequenceNotAllowed), DomMsgID.EscapeSequenceNotAllowed);
}
break;
//NYI: octal numbers
//case '0':
//{
// ScanNextChar();
// int hexNrCount = 0;
// string hexString = "0x";
// while (IsOctDigit(currChar))
// {
// ++hexNrCount;
// hexString += currChar;
// ScanNextChar();
// }
// if (hexNrCount <=2)
// str += "?????"; //(char)AscULongFromHexString(hexString);
// else
// throw new DdlParserException(DdlErrorLevel.Error, "DdlScanner",DomMsgID.EscapeSequenceNotAllowed, null);
//}
// break;
default:
throw new DdlParserException(DdlErrorLevel.Error,
DomSR.GetString(DomMsgID.EscapeSequenceNotAllowed), DomMsgID.EscapeSequenceNotAllowed);
}
}
else if (_currChar == Chars.Null || _currChar == Chars.CR || _currChar == Chars.LF)
throw new DdlParserException(DdlErrorLevel.Error,
DomSR.GetString(DomMsgID.NewlineInString), DomMsgID.NewlineInString);
else
str.Append(_currChar);
ScanNextChar();
}
ScanNextChar(); // read '"'
return str.ToString();
}
/// <summary>
/// Save the current scanner location in the document for error handling.
/// </summary>
void SaveCurDocumentPos()
{
_nCurDocumentIndex = _idx - 1;
_nCurDocumentLine = _idxLine;
_nCurDocumentLinePos = _idxLinePos;
}
int _nCurDocumentIndex;
int _nCurDocumentLine;
int _nCurDocumentLinePos;
string _documentFileName;
string _documentPath;
string _strDocument;
int _ddlLength;
int _idx;
int _idxLine;
int _idxLinePos;
char _currChar;
char _nextChar;
string _token = "";
Symbol _symbol = Symbol.None;
Symbol _prevSymbol = Symbol.None;
bool _emptyLine;
DdlReaderErrors _errors;
}
}