Re: [OT] S-expression parser in C#?



"Jamie Border" <jamie@xxxxxxxxxxx> writes:

> Tangentially CL-related, I guess:
>
> Anybody know of a good(ish) example of this?
>
> I've just hand-rolled a reasonable (but slowish) s-exp parser in C#, but I'm
> not happy with the result (slow).
>
> Jamie

Here is a trivial one I wrote for playing around. I never benchmarked
it, so it may be dog slow. In addition to reading SExps, it tracks
the line and column. It also returns tokens that represent whitespace
and comments (I wanted to transform Lisp files but retain the
formatting and comments).

I have omitted the definitions of the `Form' class and its subclasses,
but that should be pretty obvious.

If this is of interest at all, I'll put it on the web.

~jrm

-------

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;


namespace hack
{
public class Reader
{
TextReader inputStream;
FilePosition FilePos;
int PeekCount;
int ListDepth;

public Reader (TextReader inputStream)
{
this.inputStream = inputStream;
this.FilePos = new FilePosition (0, 0, 0);
// PeekCount = 0;
// ListDepth = 0;
}

public void CheckPeekCount ()
{
this.PeekCount += 1;
if (this.PeekCount > 1000000)
throw new ReaderException ();
}

public bool AtEndOfFile ()
{
CheckPeekCount ();
return this.inputStream.Peek () == -1;
}

public char PeekChar ()
{
CheckPeekCount ();
return (char)(this.inputStream.Peek ());
}

public char ReadChar ()
{
CheckPeekCount ();
char result = (char)(this.inputStream.Read ());
this.FilePos = this.FilePos.Advance (result);
return result;
}

public void DiscardChar ()
{
ReadChar ();
return;
}

public Form ReadComment (FilePosition commentStart)
{
List<char> commentText = new List<char> ();
while (true) {
if (AtEndOfFile()) {
return new CommentForm (commentStart, new String (commentText.ToArray ()));
}
char peeked = PeekChar ();
if (peeked == '\n') {
commentText.Add (ReadChar ());
return new CommentForm (commentStart, new String (commentText.ToArray ()));
}
commentText.Add (ReadChar ());
}
}

Form ReadDot ()
{
FilePosition dotStart = this.FilePos;
this.DiscardChar ();
if (this.AtEndOfFile ()) return new DotForm (dotStart);
char peeked = this.PeekChar ();
if (Char.IsWhiteSpace (peeked))
return new DotForm (dotStart);
else if (peeked == '.')
return ReadDotDot (dotStart);
else
return ReadSymbol (dotStart);
}

Form ReadDotDot (FilePosition dotStart)
{
this.DiscardChar ();
if (this.AtEndOfFile ())
throw new ReaderException ();
char peeked = this.PeekChar ();
if (peeked == '.')
return ReadDotDotDot (dotStart);
else
throw new ReaderException ();
}

Form ReadDotDotDot (FilePosition dotStart)
{
this.DiscardChar ();
if (this.AtEndOfFile ())
return new EllipsesForm (dotStart);
char peeked = this.PeekChar ();
if (IsSymbolConstituent (peeked))
throw new ReaderException ();
else
return new EllipsesForm (dotStart);
}

Form ReadList (FilePosition listStart)
{
bool improper = false;
List<Form> subelements = new List<Form>();
this.ListDepth += 1;
this.DiscardChar ();

while (true) {
if (this.AtEndOfFile ())
throw new ReaderException ();

char peeked = this.PeekChar ();

if (peeked == ')') {
// Console.WriteLine ("Finishing list.");
this.DiscardChar ();
this.ListDepth -= 1;
return improper
? (ListForm) (new ImproperListForm (listStart, subelements.ToArray()))
: (ListForm) (new ProperListForm (listStart, subelements.ToArray ()));
}
else if (peeked == '.') {
Form element = this.ReadDot ();
if (element is DotForm) {
if (improper)
throw new ReaderException ();
else
improper = true;
}
subelements.Add (element);
}
else {

Form element = this.Read ();
// Console.WriteLine ("Adding element {0} to list.", element);
subelements.Add (element);
}
}
}

Form ReadVector (FilePosition listStart)
{
List<Form> subelements = new List<Form> ();
this.ListDepth += 1;
this.DiscardChar ();

while (true) {
if (this.AtEndOfFile ())
throw new ReaderException ();

char peeked = this.PeekChar ();

if (peeked == ')') {
// Console.WriteLine ("Finishing list.");
this.DiscardChar ();
this.ListDepth -= 1;
return new VectorForm (listStart, subelements.ToArray ());
}
else if (peeked == '.') {
Form element = this.ReadDot ();
if (element is DotForm)
throw new ReaderException ();

subelements.Add (element);
}
else {

Form element = this.Read ();
// Console.WriteLine ("Adding element {0} to list.", element);
subelements.Add (element);
}
}
}

public static bool IsSymbolConstituent (char c)
{
return Char.IsLetterOrDigit (c)
|| c == '-'
|| c == '_'
|| c == '/'
|| c == '?'
|| c == '*'
|| c == '!'
|| c == '='
|| c == '>';
}

public static bool IsSymbolInitial (char c)
{
return IsSymbolConstituent (c);
}

public Form ReadSymbol (FilePosition symbolStart)
{
List<char> characters = new List<char> ();
while (true) {
if (this.AtEndOfFile ())
return new SymbolForm (symbolStart, new string (characters.ToArray ()));
char peeked = this.PeekChar ();

if (IsSymbolConstituent (peeked))
characters.Add (this.ReadChar());
else
return new SymbolForm (symbolStart, new string (characters.ToArray()));
}
}

Form ReadWhitespace (FilePosition whitespaceStart)
{
List<char> characters = new List<char> ();
while (true) {
if (this.AtEndOfFile ())
return new WhitespaceForm (whitespaceStart, new string (characters.ToArray ()));
char peeked = this.PeekChar ();
if (Char.IsWhiteSpace (peeked))
characters.Add (this.ReadChar ());
else
return new WhitespaceForm (whitespaceStart, new string (characters.ToArray ()));
}
}

Form ReadBoolean (FilePosition sharpStart)
{
char b = this.ReadChar ();
if (this.AtEndOfFile ())
return new BooleanForm (sharpStart, b);
char peeked = this.PeekChar ();
if (IsSymbolConstituent (peeked))
throw new ReaderException ();
else
return new BooleanForm (sharpStart, b);
}

Form ReadSharp (FilePosition sharpStart)
{
this.DiscardChar ();
if (this.AtEndOfFile ())
throw new ReaderException ();
char peeked = this.PeekChar ();
if (peeked == 't' || peeked == 'T' || peeked == 'f' || peeked == 'F')
return ReadBoolean (sharpStart);
else if (peeked == '(')
return ReadVector (sharpStart);
else
throw new ReaderException ();
}

Form ReadSingleQuote (FilePosition quoteStart)
{
List<Form> quoted = new List<Form>();
this.DiscardChar();
while (true) {
if (this.AtEndOfFile())
throw new ReaderException();
Form quoteTarget = this.Read();
if (quoteTarget.CanFollowQuote)
quoted.Add (quoteTarget);
else
throw new ReaderException ();

if (! quoteTarget.IsNoise)
return new QuoteForm (quoteStart, quoted.ToArray());
}
}

Form ReadBackQuote (FilePosition quoteStart)
{
List<Form> quoted = new List<Form>();
this.DiscardChar();
while (true) {
if (this.AtEndOfFile())
throw new ReaderException();
Form quoteTarget = this.Read();
if (quoteTarget.CanFollowQuote)
quoted.Add (quoteTarget);
else
throw new ReaderException ();

if (! quoteTarget.IsNoise)
return new QuasiQuoteForm (quoteStart, quoted.ToArray());
}
}

Form ReadCommaAt (FilePosition commaStart)
{
List<Form> quoted = new List<Form> ();
this.DiscardChar ();
while (true) {
if (this.AtEndOfFile ())
throw new ReaderException ();
Form quoteTarget = this.Read ();
if (quoteTarget.CanFollowQuote)
quoted.Add (quoteTarget);
else
throw new ReaderException ();

if (! quoteTarget.IsNoise)
return new UnquoteSplicingForm (commaStart, quoted.ToArray ());
}
}

Form ReadComma (FilePosition commaStart)
{
this.DiscardChar ();
if (this.AtEndOfFile ())
throw new ReaderException ();
char peeked = this.PeekChar ();
if (peeked == '@')
return ReadCommaAt (commaStart);
else {
List<Form> quoted = new List<Form> ();
while (true) {
if (this.AtEndOfFile ())
throw new ReaderException ();
Form quoteTarget = this.Read ();
if (quoteTarget.CanFollowQuote)
quoted.Add (quoteTarget);
else
throw new ReaderException ();

if (!quoteTarget.IsNoise)
return new UnquoteForm (commaStart, quoted.ToArray ());
}
}
}

public Form Read ()
{
if (this.AtEndOfFile ())
return null;

char peeked = (this.PeekChar());
if (peeked == ';')
return ReadComment (this.FilePos);
else if (peeked == ' ' || peeked == '\r')
return ReadWhitespace (this.FilePos);
else if (peeked == '#')
return ReadSharp (this.FilePos);
else if (peeked == '\'')
return ReadSingleQuote (this.FilePos);
else if (peeked == '`')
return ReadBackQuote (this.FilePos);
else if (peeked == '(')
return ReadList (this.FilePos);
else if (peeked == '.')
return ReadDot ();
else if (peeked == ',')
return ReadComma (this.FilePos);
else if (IsSymbolInitial (peeked))
return ReadSymbol (FilePos);

else
throw new ReaderBadCharacterException (peeked, this.FilePos);
}
}
}
.