Wednesday, August 24, 2016
Flex Breaking Code into Lexme and Token Insertion in Symbol Table
August 24, 2016
code
,
compiler
,
cpp
,
flex
,
lexical analysis
,
symbol table
,
tokenizing
Explanation:
This code requires linux OS. I recommend using Linux Mint. It also requires g++ and flex is installed. In case they are not installed just type their name on the terminal and it will show appropriate commands to install it.The symbol table insertion can be seen with command "pst" when input is given through command line. In the file cs.sh uncomment the line ./samp and comment ./samp < in.txt > out.txt to give command through terminal and see the output there also.
File Structure:

Bash Script:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
flex -t assignment2.l > test.c | |
g++ -c -o test.o test.c | |
g++ -o samp test.o -ll | |
./samp | |
#./samp < in.txt > out.txt |
Code:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%{ | |
#include<bits/stdc++.h> | |
using namespace std; | |
#define CHAIN_LENGTH 53 | |
#define M 128 | |
struct symbol_info{ | |
char name[M]; | |
char classtype[M]; | |
struct symbol_info *next; | |
} *block[CHAIN_LENGTH]; | |
int cHash(char* name){ | |
int idx = 0; | |
for(int i = 0; name[i]; ++i){ | |
idx = idx + name[i]; | |
} | |
return (idx % CHAIN_LENGTH); | |
} | |
void cInsert(char* name, char* classtype){ | |
int pos = cHash(name); | |
if( block[pos] == NULL ){ | |
block[pos] = new symbol_info(); | |
strcpy(block[pos]->name, name); | |
//block[pos]->name = name; | |
strcpy(block[pos]->classtype, classtype); | |
//block[pos]->classtype = classtype; | |
block[pos]->next = NULL; | |
} | |
else{ | |
symbol_info* newNode = new symbol_info(); | |
strcpy(newNode->name, name); | |
//newNode->name = name; | |
strcpy(newNode->classtype, classtype); | |
//newNode->classtype = classtype; | |
// pointer swap | |
symbol_info* nextNode = block[pos]; | |
block[pos] = newNode; | |
newNode->next = nextNode; | |
} | |
} | |
bool cSearch(char* name, char* classtype){ | |
// Implement | |
int pos = cHash(name); | |
symbol_info* temp = block[pos]; | |
while( temp != NULL ){ | |
if( !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){ | |
return true; | |
} | |
temp = temp->next; | |
} | |
return false; | |
} | |
void cDelete(char* name, char* classtype){ | |
int pos = cHash(name); | |
symbol_info* temp = block[pos]; | |
if(temp == NULL) return; | |
// At head but no one to follow | |
if( temp->next == NULL && !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){ | |
block[pos] = NULL; | |
} | |
// At head has followers | |
else if( !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){ | |
block[pos] = temp->next; | |
} | |
else{ | |
while( temp->next != NULL ){ | |
if ( !strcmp( temp->next->name, name ) && !strcmp( temp->next->classtype, classtype ) ){ | |
printf("FOUND IT %s : %s\n", temp->name, temp->classtype ); | |
break; | |
} | |
temp = temp->next; | |
} | |
if( temp != NULL ){ | |
symbol_info* found = temp->next; | |
temp->next = found->next; | |
delete(found); | |
} | |
} | |
} | |
void cUpdate(char* name, char* classtype, char* updatedClasstype){ | |
int pos = cHash(name); | |
symbol_info* temp = block[pos]; | |
while( temp != NULL ){ | |
if( !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){ | |
strcpy(temp->classtype, updatedClasstype); | |
//temp->classtype = updatedClasstype; | |
return; | |
} | |
temp = temp->next; | |
} | |
} | |
void showSymbolTable(){ | |
// Implement | |
for(int i = 0; i < CHAIN_LENGTH; ++i){ | |
printf("%d:", i); | |
// Do not modify the head | |
symbol_info* temp = block[i]; | |
while( temp != NULL ){ | |
printf("->[%s|%s]", temp->name, temp->classtype); | |
temp = temp->next; | |
} | |
printf("\n"); | |
} | |
} | |
int showMenu(){ | |
cout << "Menu:\n"; | |
cout << "=====\n"; | |
string message = "Enter 1 to insert(name, class type)\n" | |
"Enter 2 to update(name, class type, new class type)\n" | |
"Enter 3 to search(name, class type)\n" | |
"Enter 4 to delete(name, class type)\n" | |
"Enter 5 to show the symbol table\n"; | |
cout << message << "\n"; | |
cout << "User Choice: "; | |
int choice; | |
scanf("%d", &choice); | |
return choice; | |
} | |
int line_count = 0; | |
%} | |
printsymboltable "pst" | |
group1 "]"|"["|"("|")"|","|";"|":" | |
addgroup "+"|"-"|"or" | |
mulgroup "*"|"/"|"mod"|"and"|"div" | |
relgroup "="|"<>"|">="|"<"|">"|"<=" | |
keywordgroup "program"|"if"|"not"|"end"|"begin"|"else"|"then"|"do"|"while"|"function"|"Procedure"|"integer"|"real"|"var"|"oh"|"array"|"write" | |
pascalcomment ^\{.*\}$ | |
numbersgroup ([0-9]*)|([+-]?([1-9][0-9]*[.][0-9]+)([E][-+][1-9][0-9]*)?) | |
variablegroup [a-zA-Z_][A-Za-z0-9_]* | |
singlelinecomments (\/\*.*\*\/)|(\/\/.*) | |
doublequotedstrings \".*\" | |
%% | |
[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] { | |
printf("MULTILINE COMMENT:\n%s\n", yytext); | |
} | |
[\n] { | |
++line_count; | |
printf("\nLine Count:%d\n", line_count); | |
} | |
{keywordgroup} { | |
string s(yytext); | |
transform(s.begin(), s.end(), s.begin(), ::toupper); | |
cout << "Keyword:" << s << "\n"; | |
} | |
{printsymboltable} { | |
showSymbolTable(); | |
} | |
{group1} { | |
printf("Symbols:%s\n", yytext); | |
} | |
{addgroup} { | |
// Remove the warnings about deprecated strings conversion | |
char classtype_attribute[] = "ADDOP"; | |
cInsert(yytext, classtype_attribute); | |
printf("ADDOP:%s\n", yytext); | |
} | |
{mulgroup} { | |
char classtype_attribute[] = "MULOP"; | |
cInsert(yytext, classtype_attribute); | |
printf("MULOP:%s\n", yytext); | |
} | |
{relgroup} { | |
char classtype_attribute[] = "RELOP"; | |
cInsert(yytext, classtype_attribute); | |
printf("RELOP:%s\n", yytext); | |
} | |
".." { | |
char classtype_attribute[] = "DOTDOT"; | |
cInsert(yytext, classtype_attribute); | |
printf("DOTDOT:%s\n", yytext); | |
} | |
":=" { | |
char classtype_attribute[] = "ASSIGNOP"; | |
cInsert(yytext, classtype_attribute); | |
printf("ASSIGNOP:%s\n", yytext); | |
} | |
{pascalcomment} { | |
printf("Pascal Comment:%s\n", yytext); | |
} | |
{numbersgroup} { | |
char classtype_attribute[] = "Number"; | |
cInsert(yytext, classtype_attribute); | |
printf("Number:%s\n", yytext); | |
} | |
{variablegroup} { | |
char classtype_attribute[] = "Indentifier"; | |
cInsert(yytext, classtype_attribute); | |
printf("Variable:%s\n", yytext); | |
} | |
{singlelinecomments} { | |
printf("Comment:%s\n", yytext); | |
} | |
{doublequotedstrings} { | |
printf("String: %s\n", yytext); | |
} | |
%% | |
int main(){ | |
yylex(); | |
return 0; | |
} | |
Sample Input:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This is a comment | |
*/ | |
{Another} | |
// And another | |
/* Another */ | |
if x >= 14 then | |
begin | |
x = x + 2 | |
end | |
Procedure( 3.3E+9 ) |
Symbol Table using "pst" Command:

Sample Output:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MULTILINE COMMENT: | |
/* | |
This is a comment | |
*/ | |
Line Count:1 | |
Line Count:2 | |
Pascal Comment:{Another} | |
Line Count:3 | |
Line Count:4 | |
Comment:// And another | |
Line Count:5 | |
Line Count:6 | |
MULTILINE COMMENT: | |
/* Another */ | |
Line Count:7 | |
Line Count:8 | |
Keyword:IF | |
Variable:x | |
RELOP:>= | |
Number:14 | |
Keyword:THEN | |
Line Count:9 | |
Keyword:BEGIN | |
Line Count:10 | |
Variable:x | |
RELOP:= | |
Variable:x | |
ADDOP:+ | |
Number:2 | |
Line Count:11 | |
Keyword:END | |
Line Count:12 | |
Line Count:13 | |
Keyword:PROCEDURE | |
Symbols:( | |
Number:3.3E+9 | |
Symbols:) | |
Line Count:14 |
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment