Wednesday, August 24, 2016

Flex Breaking Code into Lexme and Token Insertion in Symbol Table


Explanation:

This code requires linux OS. I recommend using Linux Mint. It also requires g++ and flex is installed. In case they are not installed just type their name on the terminal and it will show appropriate commands to install it.

The symbol table insertion can be seen with command "pst" when input is given through command line. In the file cs.sh uncomment the line ./samp and comment ./samp < in.txt > out.txt to give command through terminal and see the output there also.

File Structure:


Bash Script:

#!/bin/bash
flex -t assignment2.l > test.c
g++ -c -o test.o test.c
g++ -o samp test.o -ll
./samp
#./samp < in.txt > out.txt
view raw cs.sh hosted with ❤ by GitHub

Code:

%{
#include<bits/stdc++.h>
using namespace std;
#define CHAIN_LENGTH 53
#define M 128
struct symbol_info{
char name[M];
char classtype[M];
struct symbol_info *next;
} *block[CHAIN_LENGTH];
int cHash(char* name){
int idx = 0;
for(int i = 0; name[i]; ++i){
idx = idx + name[i];
}
return (idx % CHAIN_LENGTH);
}
void cInsert(char* name, char* classtype){
int pos = cHash(name);
if( block[pos] == NULL ){
block[pos] = new symbol_info();
strcpy(block[pos]->name, name);
//block[pos]->name = name;
strcpy(block[pos]->classtype, classtype);
//block[pos]->classtype = classtype;
block[pos]->next = NULL;
}
else{
symbol_info* newNode = new symbol_info();
strcpy(newNode->name, name);
//newNode->name = name;
strcpy(newNode->classtype, classtype);
//newNode->classtype = classtype;
// pointer swap
symbol_info* nextNode = block[pos];
block[pos] = newNode;
newNode->next = nextNode;
}
}
bool cSearch(char* name, char* classtype){
// Implement
int pos = cHash(name);
symbol_info* temp = block[pos];
while( temp != NULL ){
if( !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){
return true;
}
temp = temp->next;
}
return false;
}
void cDelete(char* name, char* classtype){
int pos = cHash(name);
symbol_info* temp = block[pos];
if(temp == NULL) return;
// At head but no one to follow
if( temp->next == NULL && !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){
block[pos] = NULL;
}
// At head has followers
else if( !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){
block[pos] = temp->next;
}
else{
while( temp->next != NULL ){
if ( !strcmp( temp->next->name, name ) && !strcmp( temp->next->classtype, classtype ) ){
printf("FOUND IT %s : %s\n", temp->name, temp->classtype );
break;
}
temp = temp->next;
}
if( temp != NULL ){
symbol_info* found = temp->next;
temp->next = found->next;
delete(found);
}
}
}
void cUpdate(char* name, char* classtype, char* updatedClasstype){
int pos = cHash(name);
symbol_info* temp = block[pos];
while( temp != NULL ){
if( !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){
strcpy(temp->classtype, updatedClasstype);
//temp->classtype = updatedClasstype;
return;
}
temp = temp->next;
}
}
void showSymbolTable(){
// Implement
for(int i = 0; i < CHAIN_LENGTH; ++i){
printf("%d:", i);
// Do not modify the head
symbol_info* temp = block[i];
while( temp != NULL ){
printf("->[%s|%s]", temp->name, temp->classtype);
temp = temp->next;
}
printf("\n");
}
}
int showMenu(){
cout << "Menu:\n";
cout << "=====\n";
string message = "Enter 1 to insert(name, class type)\n"
"Enter 2 to update(name, class type, new class type)\n"
"Enter 3 to search(name, class type)\n"
"Enter 4 to delete(name, class type)\n"
"Enter 5 to show the symbol table\n";
cout << message << "\n";
cout << "User Choice: ";
int choice;
scanf("%d", &choice);
return choice;
}
int line_count = 0;
%}
printsymboltable "pst"
group1 "]"|"["|"("|")"|","|";"|":"
addgroup "+"|"-"|"or"
mulgroup "*"|"/"|"mod"|"and"|"div"
relgroup "="|"<>"|">="|"<"|">"|"<="
keywordgroup "program"|"if"|"not"|"end"|"begin"|"else"|"then"|"do"|"while"|"function"|"Procedure"|"integer"|"real"|"var"|"oh"|"array"|"write"
pascalcomment ^\{.*\}$
numbersgroup ([0-9]*)|([+-]?([1-9][0-9]*[.][0-9]+)([E][-+][1-9][0-9]*)?)
variablegroup [a-zA-Z_][A-Za-z0-9_]*
singlelinecomments (\/\*.*\*\/)|(\/\/.*)
doublequotedstrings \".*\"
%%
[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] {
printf("MULTILINE COMMENT:\n%s\n", yytext);
}
[\n] {
++line_count;
printf("\nLine Count:%d\n", line_count);
}
{keywordgroup} {
string s(yytext);
transform(s.begin(), s.end(), s.begin(), ::toupper);
cout << "Keyword:" << s << "\n";
}
{printsymboltable} {
showSymbolTable();
}
{group1} {
printf("Symbols:%s\n", yytext);
}
{addgroup} {
// Remove the warnings about deprecated strings conversion
char classtype_attribute[] = "ADDOP";
cInsert(yytext, classtype_attribute);
printf("ADDOP:%s\n", yytext);
}
{mulgroup} {
char classtype_attribute[] = "MULOP";
cInsert(yytext, classtype_attribute);
printf("MULOP:%s\n", yytext);
}
{relgroup} {
char classtype_attribute[] = "RELOP";
cInsert(yytext, classtype_attribute);
printf("RELOP:%s\n", yytext);
}
".." {
char classtype_attribute[] = "DOTDOT";
cInsert(yytext, classtype_attribute);
printf("DOTDOT:%s\n", yytext);
}
":=" {
char classtype_attribute[] = "ASSIGNOP";
cInsert(yytext, classtype_attribute);
printf("ASSIGNOP:%s\n", yytext);
}
{pascalcomment} {
printf("Pascal Comment:%s\n", yytext);
}
{numbersgroup} {
char classtype_attribute[] = "Number";
cInsert(yytext, classtype_attribute);
printf("Number:%s\n", yytext);
}
{variablegroup} {
char classtype_attribute[] = "Indentifier";
cInsert(yytext, classtype_attribute);
printf("Variable:%s\n", yytext);
}
{singlelinecomments} {
printf("Comment:%s\n", yytext);
}
{doublequotedstrings} {
printf("String: %s\n", yytext);
}
%%
int main(){
yylex();
return 0;
}

Sample Input:

/*
This is a comment
*/
{Another}
// And another
/* Another */
if x >= 14 then
begin
x = x + 2
end
Procedure( 3.3E+9 )
view raw in.txt hosted with ❤ by GitHub

Symbol Table using "pst" Command:


Sample Output:

MULTILINE COMMENT:
/*
This is a comment
*/
Line Count:1
Line Count:2
Pascal Comment:{Another}
Line Count:3
Line Count:4
Comment:// And another
Line Count:5
Line Count:6
MULTILINE COMMENT:
/* Another */
Line Count:7
Line Count:8
Keyword:IF
Variable:x
RELOP:>=
Number:14
Keyword:THEN
Line Count:9
Keyword:BEGIN
Line Count:10
Variable:x
RELOP:=
Variable:x
ADDOP:+
Number:2
Line Count:11
Keyword:END
Line Count:12
Line Count:13
Keyword:PROCEDURE
Symbols:(
Number:3.3E+9
Symbols:)
Line Count:14
view raw out.txt hosted with ❤ by GitHub

No comments: