Re: How do I create a programming language?

From: Ed Davis (ed_davis2_at_yahoo.com)
Date: 12/16/03


Date: 16 Dec 2003 08:04:22 -0800

Leif K-Brooks <eurleif@ecritters.biz> wrote:
> I'm not really at the stage of language design yet, I'm just trying to
> figure out how to write basic parsers. I would be happy with a print
> statment and simple arithmetic (print 2+2 resulting in 4 being
> displayed, for instance).

Here is a little language interpreter that supports printing of
simple integer arithmetic expressions. An example run might be:

tiny <test

Where test contains:

print "Hello", " ", "World!"
print " "
print 1000000 + 2000000 + 3000000
print " "
print "Good bye"

This would display:

Hello World! 6000000 Good bye

Code follows:

/****************************************************************
  program = {printstmt}.
  printstmt = PRINT expr|string {, expr|string}.
  expr = term {addop term}.
  term = factor {mulop factor}.
  factor = integer | "(" expr ")".
 ****************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>

typedef enum {EOI,STRING='"',LPAREN='(',RPAREN=')',MULTIPLY='*',
  PLUS='+',COMMA=',',MINUS='-',DIVIDE='/',INTEGER,PRINT,IDENT}
  Symbol;
typedef enum {OP_HALT, OP_PUSH_INT, OP_ADD, OP_SUB, OP_MUL,
  OP_DIV, OP_PRT_STR, OP_PRT_INT} Opcode;
typedef unsigned char uchar;

#define MAX_ID_LEN 255
#define MAXCODE 200
#define MAXSTACK 100

typedef struct {
  Symbol sym;
  int num;
  int str_len;
  char str[MAX_ID_LEN];
} Token;

Token tok;
uchar code[MAXCODE];
int codeindex;

void expr(void);

void error(const char *fmt, ...) {
  char buf[255];
  va_list argptr;

  va_start(argptr, fmt);
  vsprintf(buf, fmt, argptr);
  va_end(argptr);
  printf(buf);
  exit(1);
}

/*** Byte-code interpreter **************************************/

int get_int_from_code(int *pc) {
  int data;

  memcpy(&data, &code[*pc], sizeof(data));
  (*pc) += sizeof(data);
  return data;
}

void interpret(const int maxcode) {
  int pc; // program counter
  int sp; // stack pointer
  int halted; // status
  int stack[MAXSTACK]; // runtime stack

  halted = pc = sp = 0;
  do {
    Opcode opcode;
    int operand;

    opcode = code[pc++];
    if (pc > maxcode) error("pc > maxcode");
    if (sp >= MAXSTACK) error("stack overflow");
    if (sp < 0) error("stack underflow");
    switch (opcode) {
      case OP_PUSH_INT:
        stack[sp++] = get_int_from_code(&pc);
        break;
      case OP_ADD: sp--; stack[sp - 1] += stack[sp]; break;
      case OP_SUB: sp--; stack[sp - 1] -= stack[sp]; break;
      case OP_MUL: sp--; stack[sp - 1] *= stack[sp]; break;
      case OP_DIV:
        sp--;
        if (stack[sp] == 0) error("divide by zero");
        stack[sp - 1] /= stack[sp];
        break;
      case OP_PRT_STR:
        for (operand = get_int_from_code(&pc);
            operand;
            operand--) {
          printf("%c", code[pc++]);
        }
        break;
      case OP_PRT_INT: sp--; printf("%d", stack[sp]); break;
      case OP_HALT: halted++; break;
      default: error("Unknown opcode %d\n", opcode);
    }
  } while (!halted);
}

/*** Code emitting **********************************************/

void emit(int opcode) {
  if (codeindex >= MAXCODE)
    error("too much code - need to increase codesize\n");
  code[codeindex++] = (uchar)opcode;
}

void emit2(int opcode, int operand) {
  emit(opcode);
  memcpy(&code[codeindex], &operand, sizeof(operand));
  codeindex += sizeof(operand);
}

/*** Scanner or Lexical analyzer ********************************/

void getdigit(int ch) {
  tok.num = 0; tok.sym = INTEGER;
  do {
    tok.num = 10 * tok.num + (ch - '0');
  } while ((ch = getc(stdin)) >= 0 && isdigit(ch));
  ungetc(ch, stdin);
}

void getident(int ch) {
  int k = 0; tok.sym = IDENT;
  do {
    if (k < MAX_ID_LEN) {
      tok.str[k] = (char)ch;
      k++;
    }
  } while ((ch = getc(stdin)) >= 0 && isalnum(ch));
  tok.str[k] = '\0';
  ungetc(ch, stdin);
  if (stricmp(tok.str, "PRINT") == 0) tok.sym = PRINT;
}

void getstring(int ch) {
  int k = 0; tok.sym = STRING;
  while ((ch = getc(stdin)) >= 0 && ch != '"') {
    if (k < MAX_ID_LEN) {
      tok.str[k] = (char)ch;
      k++;
    }
  }
  tok.str[k] = '\0'; tok.str_len = k;
}

void gettok(void) {
  for (;;) {
    int ch = getc(stdin);
    switch (ch) {
      case '\n': case '\t': case '\r': case ' ': continue;
      case '+': case '-': case '*': case '/': case ',': case '(':
      case ')': tok.sym = (Symbol)ch; return;
      case '"': getstring(ch); return;
      case -1: tok.sym = EOI; return;
      default:
        if (isdigit(ch)) { getdigit(ch); return; }
        if (isalpha(ch)) { getident(ch); return; }
        error("unrecognized character %c\n", ch);
    }
  }
}

/*** Parser *****************************************************/

void factor(void) {
  switch (tok.sym) {
    case INTEGER: emit2(OP_PUSH_INT, tok.num); gettok(); break;
    case LPAREN:
      gettok();
      expr();
      if (tok.sym != RPAREN) error("expecting ')'");
      gettok();
      break;
    default: error("expecting number"); break;
  }
}

void term(void) {
  factor();
  for (;;) {
    Opcode opcode;
    switch (tok.sym) {
      case MULTIPLY: opcode = OP_MUL; break;
      case DIVIDE: opcode = OP_DIV; break;
      default: return;
    }
    gettok(); factor(); emit(opcode);
  }
}

void expr(void) {
  term();
  for (;;) {
    Opcode opcode;
    switch (tok.sym) {
      case PLUS: opcode = OP_ADD; break;
      case MINUS: opcode = OP_SUB; break;
      default: return;
    }
    gettok(); term(); emit(opcode);
  }
}

void do_string(void) {
  int i;

  for (i = 0; i < tok.str_len; i++) emit(tok.str[i]);
  gettok();
}

void printstmt(void) {
  do {
    gettok();
    if (tok.sym == STRING) {
      emit2(OP_PRT_STR, tok.str_len); do_string();
    } else if (tok.sym == INTEGER) {
      expr(); emit(OP_PRT_INT);
    } else error("expecting string or integer");
  } while (tok.sym == COMMA);
}

void parse(void) {
  gettok();
  while (tok.sym == PRINT) printstmt();
  if (tok.sym != EOI) error("PRINT expected");
  emit(OP_HALT);
}

int main(void) {
  parse();
  interpret(codeindex);
  return 0;
}



Relevant Pages

  • Help in Java swings(internal Frame)
    ... public int getSize() ... public void valueChanged{ ... private JScrollPane scrollPane1; ... public class PeakContainer extends JInternalFrame ...
    (comp.lang.java.programmer)
  • [PATCH] get rid if __cpuinit and __cpuexit
    ... unsigned long action, void *hcpu) ... unsigned int cpu = hcpu; ... -static int __cpuinit ... __cpu_up(unsigned int cpu) ...
    (Linux-Kernel)
  • [PATCH,RFC 2.6.14 09/15] KGDB: SuperH-specific changes
    ... This adds basic support for KGDB on SuperH as well as adding some architecture ... -static int kgdb_uart_getchar ... -static void kgdb_uart_putchar ... * The command-line option can include a serial port specification ...
    (Linux-Kernel)
  • problem in java swings
    ... public int getSize() ... public void valueChanged{ ... private JScrollPane scrollPane1; ... public class PeakContainer extends JInternalFrame ...
    (comp.lang.java.programmer)
  • ToolTips in a View and TTN_NEEDTEXT
    ... extern int g_minWorkPeriod; ... void CDayView::CreateAllFonts ... void CDayView::DrawDayLog(CDC* pDC) ... BOOL CDayView::TimeToY ...
    (microsoft.public.vc.mfc)