IMPLEMENTATION MODULE Lex; (* VAX 11/780: Sohail Aslam *) FROM Tools IMPORT LETA , LETZ , UNDERSCORE, BIGA, BIGZ , BLANK , DIG0 , DIG9 , COMMA , PLUS, STAR , SLASH , EQUAL , MINUS , GREATERTHAN, DECIMAL, QUOTE , COLON , SEMICOLON , BACKSLASH , NEWLINE, LESSTHAN , OPENPAREN, CLOSEPAREN, ENDFILE , DEL , ENDSTR , MAXSTR , String , Character, Getc , Putc , EchoInput, PutStr , PutQStr, Strloc ; FROM InOut IMPORT in , out , WriteCard , WriteInt , ReadCard , WriteString, WriteLn ; FROM FileSystem IMPORT File , Done , Open , Close , ReadChar , WriteChar , Name ; CONST MaxState = 27; MaxTokenTypes = 23; MAXKEYWORDS = 77; TYPE CharClass = ( cc_Letter, cc_Colon, cc_Decimal, cc_Digit, cc_Minus, cc_GreaterThan, cc_Quote, cc_LessThan, cc_Equal, cc_Plus, cc_Star, cc_Slash, cc_Lparen, cc_Rparen, cc_Comma, cc_Semicolon, cc_Newline, cc_Backslash, cc_Ignore, cc_FileEnd, cc_Err); TYPE ClassIndexRange = [ cc_Letter..cc_Err]; StateIndexRange = [ 1..MaxState ]; TableArray = ARRAY StateIndexRange OF ARRAY ClassIndexRange OF CHAR; TYPE ShortStr = ARRAY [ 1..12 ] OF Character; Kwrecord = RECORD KeyWord : ShortStr; LexCode : TokenType END; VAR NextStateTable : TableArray; OutPutStateTable : TableArray; ArrayOfTokenTypes : ARRAY [ 1..MaxTokenTypes ] OF TokenType; State : CARDINAL; (* Need to save these two across calls *) SavedChar : Character; ClassArray: ARRAY Character OF CharClass; LexKeyWordTable : ARRAY [ 1..MAXKEYWORDS] OF Kwrecord; Startingout : BOOLEAN; (* ------------------------------------------------------------------------- *) PROCEDURE LoadTables; (* This procedure is used to load the statetables from their files. The files and their contents are: LexNext.chr NextStateTable_entries written out as CHAR LexOut.chr OutPutStateTable_entries written out as CHAR *) VAR inf : File; c : CHAR; i : CARDINAL; j : ClassIndexRange; BEGIN Open( inf, "PIC$TABLES:LexNext.chr", FALSE ); IF NOT Done( ) THEN PutQStr( "%PIC-F-BUGCHECK, could not open LexNext.chr. Please "); PutQStr( "inform your PIC manager\n" ); HALT END; FOR i := 1 TO MaxState DO FOR j := cc_Letter TO cc_Err DO ReadChar( inf, c ); NextStateTable[ i,j ] := c END END; Close( inf ); Open( inf, "PIC$TABLES:LexOut.chr", FALSE ); IF NOT Done( ) THEN PutQStr( "%PIC-F-BUGCHECK, could not open LexOut.chr. Please "); PutQStr( "inform your PIC manager\n" ); HALT END; FOR i := 1 TO MaxState DO FOR j := cc_Letter TO cc_Err DO ReadChar( inf, c ); OutPutStateTable[ i,j ] := c END END; Close( inf ) END LoadTables; (* ------------------------------------------------------------------------- *) PROCEDURE DumpTables; (* This procedure can be used to dump the statetables onto files. The files and their contents are: LexNext.chr NextStateTable_entries written out as CHAR LexNext.tab NextStateTable_entries written out as CARDINAL, one row/line LexOut.chr OutPutStateTable_entries written out as CHAR LexOut.tab OutPutStateTable_entries written out as CARDINAL, one row/line *) VAR outf : File; Saveout : ARRAY [ 1..60 ] OF CHAR; c : CHAR; i : CARDINAL; j : ClassIndexRange; BEGIN PutQStr("%PIC-I-Internal," ); PutQStr("Please make sure that LexNext.tab,.chr and LexOut.tab,.chr\n" ); PutQStr(" " ); PutQStr("already exists otherwise access violation will occur\n" ); (* Get name of file associated with "out" from RMS and save it. Later on, out would be restored back to this saved file name. *) Name( out, Saveout ); Open( outf, "PIC$TABLES:LexNext.chr", TRUE ); Open( out , "PIC$TABLES:LexNext.tab", TRUE ); FOR i := 1 TO MaxState DO FOR j := cc_Letter TO cc_Err DO c := NextStateTable[ i,j ]; WriteChar( outf, c ); WriteCard( ORD( c ),2 ); WriteString(" " ) END; WriteLn END; Close( outf ); Close( out ); Open( outf, "PIC$TABLES:LexOut.chr", TRUE ); Open( out , "PIC$TABLES:LexOut.tab", TRUE ); FOR i := 1 TO MaxState DO FOR j := cc_Letter TO cc_Err DO c := OutPutStateTable[ i,j ]; WriteChar( outf, c ); WriteCard( ORD( c ),2 ); WriteString(" " ) END; WriteLn END; Close( outf ); Close( out ); (* restore out to original file *) Open( out, Saveout, TRUE ) END DumpTables; (* InitScanner --- Initialize internal tables for scanner *) PROCEDURE InitScanner( VAR State : CARDINAL (* Out *) ); BEGIN State := 1; (* Scanner would never be in this state again *) LoadTables; (* load Next and Output state tables from files *) (* Init array of Token Types *) ArrayOfTokenTypes[ 1 ] := Identifier ; ArrayOfTokenTypes[ 2 ] := Label ; ArrayOfTokenTypes[ 3 ] := Compass ; ArrayOfTokenTypes[ 4 ] := IntegerNumber ; ArrayOfTokenTypes[ 5 ] := RealNumber ; ArrayOfTokenTypes[ 6 ] := MinusSign ; ArrayOfTokenTypes[ 7 ] := RightArrow ; ArrayOfTokenTypes[ 8 ] := QuotedString ; ArrayOfTokenTypes[ 9 ] := LeftArrow ; ArrayOfTokenTypes[ 10] := TwoWayArrow ; ArrayOfTokenTypes[ 11] := Assignment ; ArrayOfTokenTypes[ 12] := PlusSign ; ArrayOfTokenTypes[ 13] := Multiply ; ArrayOfTokenTypes[ 14] := Divide ; ArrayOfTokenTypes[ 15] := OpenParen ; ArrayOfTokenTypes[ 16] := CloseParen ; ArrayOfTokenTypes[ 17] := Comma ; ArrayOfTokenTypes[ 18] := EndOfCommand ; ArrayOfTokenTypes[ 19] := EndOfRecord ; ArrayOfTokenTypes[ 20] := Continuation ; ArrayOfTokenTypes[ 21] := CharToIgnore ; ArrayOfTokenTypes[ 22] := EndOfFile ; ArrayOfTokenTypes[ 23] := Error END InitScanner; (* NextState -- Return entry from NextState array *) PROCEDURE NextState( State : CARDINAL; (* In *) Class : CharClass (* In *) ) : CARDINAL; VAR HoldIt : CARDINAL; BEGIN HoldIt := ORD ( NextStateTable[ State, Class ] ); RETURN HoldIt END NextState; (* OutPut --- return entry from OutPutState table array *) PROCEDURE OutPut( State : CARDINAL; (* In *) Class : CharClass (* In *) ) : CARDINAL; BEGIN RETURN ORD ( OutPutStateTable[ State, Class ] ) END OutPut; (* LookUpTypeOfToken -- return token type from array of token types *) PROCEDURE LookUpTypeOfToken( Index : CARDINAL (* In *) ) : TokenType; BEGIN RETURN ArrayOfTokenTypes[ Index ] END LookUpTypeOfToken; (* LoadKeyWord -- called during initialization only *) PROCEDURE LoadKeyWord( Kw : ARRAY OF CHAR (* in *); Code : TokenType (* in *)); (* This procedure is invokes by InitLex ONLY in order to load the key words table for Lexical's use. *) VAR c, j : CARDINAL; BEGIN j := 0; WHILE ( j <= HIGH( Kw ) ) AND ( Kw[j] # 0C ) DO c := ORD( Kw[j] ); LexKeyWordTable[ tindex ].KeyWord[ j+1 ] := c; INC( j ) END; LexKeyWordTable[ tindex ].KeyWord[ j+1 ] := ENDSTR; LexKeyWordTable[ tindex ].LexCode := Code; INC( tindex ) END LoadKeyWord; (* GetCharAndClass -- get character and it's class *) PROCEDURE GetCharAndClass( VAR c : Character (* Out *); VAR Class : CharClass (* Out *) ); VAR JunkC : Character; BEGIN (* Need to echo the first line of input if Startingout is TRUE. *) IF( Startingout ) AND ( Echo ) THEN EchoInput( ); Startingout := FALSE; END; JunkC := Getc( c ); Class := ClassArray[ c ]; (* If the character gotten is NEWLINE, echo the next line of input whose characters will be "Getc"ed next time around. *) IF( Echo ) AND ( c = NEWLINE ) THEN EchoInput( ); END; END GetCharAndClass; (* GetToken -- Return next token and it's type *) PROCEDURE GetToken( VAR Token : String (* Out *); VAR TokenCode : TokenType (* Out *) ): TokenType; VAR i, j : CARDINAL; Found : BOOLEAN; Class : CharClass; Out : CARDINAL; BEGIN i := 0; REPEAT IF State <> 1 THEN IF i < MAXSTR-1 THEN INC( i ); Token[ i ] := SavedChar (* Stuff the current input character *) END END; GetCharAndClass( SavedChar, Class ); (* Get next char from input *) Out := OutPut( State, Class ); (* Get entry from OutPut table *) State := NextState( State, Class ); IF LexDebug THEN PutQStr(" State & Out > " ); WriteCard( State,3 ); PutQStr( ", " ); WriteCard( Out,3 ); PutQStr(" \n" ) END UNTIL Out <> 0; INC( i ); Token[ i ] := ENDSTR; TokenCode := LookUpTypeOfToken( Out ); RETURN TokenCode END GetToken; (* ShowToken -- Prints token and its type. *) PROCEDURE ShowToken( Token : String (* In *); TokenCode : TokenType (* In *) ); VAR i : TokenType; BEGIN PutStr( Token ); FOR i := box TO with DO IF TokenCode = i THEN PutQStr(" { Keyword } \n" ); RETURN END END; IF TokenCode = Identifier THEN PutQStr(" { Identifier }\n" ) ELSIF TokenCode = Label THEN PutQStr(" { Label }\n" ) ELSIF TokenCode = Compass THEN PutQStr(" { Compass }\n" ) ELSIF TokenCode = IntegerNumber THEN PutQStr(" { IntegerNumber }\n" ) ELSIF TokenCode = RealNumber THEN PutQStr(" { RealNumber }\n" ) ELSIF TokenCode = MinusSign THEN PutQStr(" { MinusSign }\n" ) ELSIF TokenCode = RightArrow THEN PutQStr(" { RightArrow }\n" ) ELSIF TokenCode = QuotedString THEN PutQStr(" { QuotedString }\n" ) ELSIF TokenCode = LeftArrow THEN PutQStr(" { LeftArrow }\n" ) ELSIF TokenCode = TwoWayArrow THEN PutQStr(" { TwoWayArrow }\n" ) ELSIF TokenCode = Assignment THEN PutQStr(" { Assignment }\n" ) ELSIF TokenCode = PlusSign THEN PutQStr(" { PlusSign }\n" ) ELSIF TokenCode = Multiply THEN PutQStr(" { Multiply }\n" ) ELSIF TokenCode = Divide THEN PutQStr(" { Divide }\n" ) ELSIF TokenCode = OpenParen THEN PutQStr(" { OpenParen }\n" ) ELSIF TokenCode = CloseParen THEN PutQStr(" { CloseParen }\n" ) ELSIF TokenCode = Comma THEN PutQStr(" { Comma }\n" ) ELSIF TokenCode = EndOfCommand THEN PutQStr(" { EndOfCommand }\n" ) ELSIF TokenCode = EndOfRecord THEN PutQStr(" { EndOfRecord }\n" ) ELSIF TokenCode = Continuation THEN PutQStr(" { Continuation }\n" ) ELSIF TokenCode = CharToIgnore THEN PutQStr(" { CharToIgnore }\n" ) ELSIF TokenCode = EndOfFile THEN PutQStr(" { EndOfFile }\n" ) ELSIF TokenCode = Error THEN PutQStr(" { Error }\n" ) END END ShowToken; (* ShowKeyWord -- Print a key word *) PROCEDURE ShowKeyWord( TokenCode : TokenType (* in *) ); VAR I, J : CARDINAL; BEGIN FOR I := 1 TO MAXKEYWORDS DO IF TokenCode = LexKeyWordTable[ I ].LexCode THEN PutQStr(" Keyword: " ); WITH LexKeyWordTable[ I ] DO J := 1; WHILE KeyWord[ J ] # ENDSTR DO Putc( KeyWord[ J ] ); INC( J ) END END; Putc( NEWLINE ); RETURN END END; PutQStr(" Not a Keyword\n" ) END ShowKeyWord; (* Lexical -- return next token and its type. Comments and white spaces are ignored *) PROCEDURE Lexical( VAR Token : String (* Out *); VAR TokenCode : TokenType (* Out *) ); PROCEDURE PutSStr ( Str : ShortStr (* In *) ); (* Put out a string of type ShortStr to Stdout. Invoked in Lexical only *) VAR i : CARDINAL; BEGIN i := 1; WHILE Str[ i ] <> ENDSTR DO Putc( Str[ i ] ); INC( i ) END END PutSStr; PROCEDURE MayBeAKeyWord( Token : String (* in *); VAR TokenCode : TokenType (* in/out *) ); (* This procedure is invokes by Lexical ONLY to determine if an identifier seen is a key word. The procedure does a binary search of the LexKeyWordTable and if identifier matches one of the keywords, the TokenCode is changed to reflect that. *) PROCEDURE Eqs( s1 : String; s2 : ShortStr ): INTEGER; (* This procedure is local to MayBeAKeyWord and is a special version of the "Strcmp" procedure in that, the second string is of type "ShortStr". *) VAR j : CARDINAL; BEGIN j := 1; WHILE ( s1[j] = s2[j] ) DO IF ( s1[j] = ENDSTR ) OR ( s2[j] = ENDSTR ) THEN RETURN s1[j] - s2[j] END; INC ( j ) END; (* mismatch found at j *) RETURN s1[j] - s2[j] END Eqs; VAR i : CARDINAL; eqv : INTEGER; high, low, mid : CARDINAL; BEGIN low := 1; high := MAXKEYWORDS; WHILE low <= high DO mid := ( low + high ) DIV 2; eqv := Eqs( Token, LexKeyWordTable[mid].KeyWord ); IF LexDebug THEN PutQStr("Debug: in Maybekw, compared "); PutStr( Token ); PutQStr(" " ); PutSStr( LexKeyWordTable[mid].KeyWord ); PutQStr( " eqv " ); WriteInt( eqv, 2 ); WriteLn END; IF eqv = 0 THEN TokenCode := LexKeyWordTable[mid].LexCode; RETURN END; IF eqv < 0 THEN high := mid - 1 ELSE low := mid + 1 END END END MayBeAKeyWord; PROCEDURE StripDot ( VAR Token : String (* out *) ); (* Called by Lexical only, strips off the "." from a compass Location, e.g ".ne". *) VAR I : CARDINAL; BEGIN I := 1; WHILE Token[ I+1 ] # ENDSTR DO Token[ I ] := Token[ I+1 ]; INC( I ) END; Token[ I ] := ENDSTR END StripDot; PROCEDURE StripQuotes( VAR Token : String (* out *) ); (* Called by Lexical only, strips off the quotes from strings. *) VAR I : CARDINAL; BEGIN I := 1; WHILE Token[ I+1 ] # ENDSTR DO Token[ I ] := Token[ I+1 ]; INC( I ) END; Token[ I-1 ] := ENDSTR (* kill ending quote *) END StripQuotes; PROCEDURE StripColon( VAR Token : String (* out *) ); (* Called by Lexical only, strips off the : from a label token *) VAR I : CARDINAL; BEGIN I := Strloc( Token, COLON ); IF I > 0 THEN Token[ I ] := ENDSTR END END StripColon; VAR tok : TokenType; BEGIN tok := GetToken( Token, TokenCode ); IF tok = EndOfFile THEN RETURN END; (* Strip off ' or " from strings *) IF TokenCode = QuotedString THEN StripQuotes( Token ); RETURN END; IF TokenCode = Label THEN StripColon( Token ); RETURN END; (* if compass location, strip off "." *) IF TokenCode = Compass THEN StripDot( Token ) END; (* check for keywords *) MayBeAKeyWord( Token, TokenCode ) END Lexical; (* Begin initialization of LEX *) VAR c : Character; tindex : CARDINAL; BEGIN InitScanner( State ); (* This routine will do all the work *) (* fill table with keywords *) tindex := 1; LoadKeyWord( 'acw', acw ); LoadKeyWord( 'and', and ); LoadKeyWord( 'arc', arc ); LoadKeyWord( 'arrow', arrow ); LoadKeyWord( 'arrowlen', arrowlen ); LoadKeyWord( 'at', at ); LoadKeyWord( 'between', between ); LoadKeyWord( 'black', black ); LoadKeyWord( 'blue', blue ); LoadKeyWord( 'bottom', bottom ); LoadKeyWord( 'box', box ); LoadKeyWord( 'boxht', boxht ); LoadKeyWord( 'boxwid', boxwid ); LoadKeyWord( 'center', center ); LoadKeyWord( 'chop', chop ); LoadKeyWord( 'cht', cht ); LoadKeyWord( 'circle', circle ); LoadKeyWord( 'color', color ); LoadKeyWord( 'cw', cw ); LoadKeyWord( 'cwid', cwid ); LoadKeyWord( 'cyan', cyan ); LoadKeyWord( 'dashed', dashed ); LoadKeyWord( 'dotted', dotted ); LoadKeyWord( 'down', down ); LoadKeyWord( 'downleft', downleft ); LoadKeyWord( 'downright', downright ); LoadKeyWord( 'east', east ); LoadKeyWord( 'ellipse', ellipse ); LoadKeyWord( 'end', end ); LoadKeyWord( 'fifth', fifth ); LoadKeyWord( 'first', first ); LoadKeyWord( 'fourth', fourth ); LoadKeyWord( 'from', from ); LoadKeyWord( 'green', green ); LoadKeyWord( 'ht', ht ); LoadKeyWord( 'invisible', invisible ); LoadKeyWord( 'last', last ); LoadKeyWord( 'left', left ); LoadKeyWord( 'line', line ); LoadKeyWord( 'linelen', linelen ); LoadKeyWord( 'magenta', magenta ); LoadKeyWord( 'majax', majax ); LoadKeyWord( 'minax', minax ); LoadKeyWord( 'move', move ); LoadKeyWord( 'ne', ne ); LoadKeyWord( 'ninth', ninth ); LoadKeyWord( 'north', north ); LoadKeyWord( 'nw', nw ); LoadKeyWord( 'of', of ); LoadKeyWord( 'orange', orange ); LoadKeyWord( 'origin', origin ); LoadKeyWord( 'rad', rad ); LoadKeyWord( 'red', red ); LoadKeyWord( 'right', right ); LoadKeyWord( 'same', same ); LoadKeyWord( 'se', se ); LoadKeyWord( 'second', second ); LoadKeyWord( 'seventh', seventh ); LoadKeyWord( 'sixth', sixth ); LoadKeyWord( 'solid', solid ); LoadKeyWord( 'south', south ); LoadKeyWord( 'spline', spline ); LoadKeyWord( 'start', start ); LoadKeyWord( 'sw', sw ); LoadKeyWord( 'tenth', tenth ); LoadKeyWord( 'third', third ); LoadKeyWord( 'to', to ); LoadKeyWord( 'top', top ); LoadKeyWord( 'up', up ); LoadKeyWord( 'upleft', upleft ); LoadKeyWord( 'upright', upright ); LoadKeyWord( 'west', west ); LoadKeyWord( 'white', white ); LoadKeyWord( 'wid', width ); LoadKeyWord( 'width', width ); LoadKeyWord( 'with', with ); LoadKeyWord( 'yellow', yellow ); FOR c := ENDFILE TO DEL DO IF ( ( c >= LETA ) AND ( c <= LETZ ) ) OR ( ( c >= BIGA ) AND ( c <= BIGZ ) ) OR ( c = UNDERSCORE ) THEN ClassArray[ c ] := cc_Letter ELSIF ( c >= DIG0 ) AND ( c <= DIG9 ) THEN ClassArray[ c ] := cc_Digit ELSIF c = COMMA THEN ClassArray[ c ] := cc_Comma ELSIF c = PLUS THEN ClassArray[ c ] := cc_Plus ELSIF c = STAR THEN ClassArray[ c ] := cc_Star ELSIF c = SLASH THEN ClassArray[ c ] := cc_Slash ELSIF c = EQUAL THEN ClassArray[ c ] := cc_Equal ELSIF c = MINUS THEN ClassArray[ c ] := cc_Minus ELSIF c = GREATERTHAN THEN ClassArray[ c ] := cc_GreaterThan ELSIF c = DECIMAL THEN ClassArray[ c ] := cc_Decimal ELSIF c = QUOTE THEN ClassArray[ c ] := cc_Quote ELSIF c = COLON THEN ClassArray[ c ] := cc_Colon ELSIF c = SEMICOLON THEN ClassArray[ c ] := cc_Semicolon ELSIF c = BACKSLASH THEN ClassArray[ c ] := cc_Backslash ELSIF c = NEWLINE THEN ClassArray[ c ] := cc_Newline ELSIF c = LESSTHAN THEN ClassArray[ c ] := cc_LessThan ELSIF c = OPENPAREN THEN ClassArray[ c ] := cc_Lparen ELSIF c = CLOSEPAREN THEN ClassArray[ c ] := cc_Rparen ELSIF c = ENDFILE THEN ClassArray[ c ] := cc_FileEnd ELSIF c < BLANK THEN ClassArray[ c ] := cc_Ignore ELSE ClassArray[ c ] := cc_Ignore END END; (* FOR *) Echo := FALSE; Startingout := TRUE; END Lex.