Wednesday, August 24, 2016
Creating a Simple Compiler Symbol Table using Hashing C++ and Explanation
August 24, 2016
ascii sum hash
,
code
,
compiler construction
,
cpp
,
explanation
,
hash table
,
hashing
,
linked list insertion diagram
,
symbol table
,
tutorial
Explanations:
Compiler symbol table stores information that is needed in various phases if compiler. It may store information such as the data type, value, memory location, class type(identifier, number, operator), for functions(function name, arguments types, argument number, return type) etc.In the code below it contains only two columns name and class type. Ex: $ a = a + b $. Operators are internal nodes and variables are leaf nodes. No need to store a variable twice. In the example above, a will be stored exactly once.
Symbol Table for, $ a = a + b $:
Required Functions:
Functions Names:1. insert 2. delete 3. update 4. search 5. show
Efficient Data Structure:
A 2D array could be used to store the data, but it may have underutilized space. So, linked list is better. Now using linked list if we keep inserting elements then a single chain gets longer and longer. We may have do a linear search on the whole thing to find a necessary element. It is neither good in time and space complexity.We can get rid of this problem using hashing. Through hashing we can insert elements in various chains. Using this method we can skip searching a lot of unnecessary elements by skipping many chains. Collision resolution is done through separate chaining as new elements are appended to the end of chain when an element present.
Worst case time complexity arises when all the elements are present in the single chain. It is also better in terms of space complexity as space is better utilized when using multiple chains and there is less fragmented space.
Selecting Hashing Method:
If hashing method was chosen based on class type some chains may be completely empty and some have all elements. Ex:
This leaves the name to create chain with. The hashing is done adding the corresponding ASCII characters of a string then positioning it inside the hash table by modulo with table size or, chain size.
Hashing Example:
If the input was $ \text{<} abc, identifier \text{>} $ then, the chain or, bucket is calculated using below method,$ idx \xrightarrow[ASCII]{SUM} ( 'i' + 'd' + 'x' ) % CHAIN_LENGTH \rightarrow ( 105 + 100 + 120 ) % 53 \rightarrow ( 325 ) % 53 \rightarrow 7 $
How to use the Code:
Insertion:
Insertion and Collision Resolution Diagram:

Showing the Symbol Table:
Code:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author: Asif Ahmed | |
* Description: Compiler Symbol table creation. | |
* Site: https://quickgrid.blogspot.com | |
* If there is any problem first try the solution below: | |
* 1. Change char pointers inside to fixed size array. | |
* 2. In the insert function use strcpy() function instead of -> | |
*/ | |
#include<bits/stdc++.h> | |
using namespace std; | |
// It has been seen if chain length is < 50 then some space is underutilized, | |
// Theoretically seen if > 50, prime number is taken then chain takes moderate space. | |
// So thats why 53 is used. | |
#define CHAIN_LENGTH 53 | |
#define M 128 | |
// Two columns of the symbol table with name and class type. | |
struct symbol_info{ | |
char *name; | |
char *classtype; | |
struct symbol_info *next; | |
} *block[CHAIN_LENGTH]; | |
// Hashing position is calculated using sum of all character ascii values | |
// Then performing Modulo operation to go to any bucket from 0 to CHAIN_LENGTH | |
int cHash(char* name){ | |
int idx = 0; | |
for(int i = 0; name[i]; ++i){ | |
idx = idx + name[i]; | |
} | |
return (idx % CHAIN_LENGTH); | |
} | |
// If there is no element in the chain then new element is added in front, | |
// otherwise through hashing if we reach a chain or, bucket that contains an | |
// element then we insert the new element at the beginning of the chain and | |
// the rest of the elements is lniked to the end of new node. | |
void cInsert(char* name, char* classtype){ | |
int pos = cHash(name); | |
if( block[pos] == NULL ){ | |
block[pos] = new symbol_info(); | |
block[pos]->name = name; | |
block[pos]->classtype = classtype; | |
block[pos]->next = NULL; | |
} | |
else{ | |
symbol_info* newNode = new symbol_info(); | |
newNode->name = name; | |
newNode->classtype = classtype; | |
// pointer swap | |
symbol_info* nextNode = block[pos]; | |
block[pos] = newNode; | |
newNode->next = nextNode; | |
} | |
} | |
// Go to certain chain through hashing since we know others will not contain the searched value. | |
// Next in that chain do a linear search on all element to see if it is there. | |
bool cSearch(char* name, char* classtype){ | |
// Implement | |
int pos = cHash(name); | |
symbol_info* temp = block[pos]; | |
while( temp != NULL ){ | |
if( !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){ | |
return true; | |
} | |
temp = temp->next; | |
} | |
return false; | |
} | |
// If the name and class type both matches then delete the element. | |
void cDelete(char* name, char* classtype){ | |
int pos = cHash(name); | |
symbol_info* temp = block[pos]; | |
if(temp == NULL) return; | |
// At head but no one to follow | |
if( temp->next == NULL && !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){ | |
block[pos] = NULL; | |
} | |
// At head has followers | |
else if( !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){ | |
block[pos] = temp->next; | |
} | |
else{ | |
while( temp->next != NULL ){ | |
if ( !strcmp( temp->next->name, name ) && !strcmp( temp->next->classtype, classtype ) ){ | |
printf("FOUND IT %s : %s\n", temp->name, temp->classtype ); | |
break; | |
} | |
temp = temp->next; | |
} | |
if( temp != NULL ){ | |
symbol_info* found = temp->next; | |
temp->next = found->next; | |
delete(found); | |
} | |
} | |
} | |
// Update an old class type with a new one. Change based on your need. | |
void cUpdate(char* name, char* classtype, char* updatedClasstype){ | |
int pos = cHash(name); | |
symbol_info* temp = block[pos]; | |
while( temp != NULL ){ | |
if( !strcmp( temp->name, name ) && !strcmp( temp->classtype, classtype ) ){ | |
temp->classtype = updatedClasstype; | |
return; | |
} | |
temp = temp->next; | |
} | |
} | |
// Print the symbol table chain wise. | |
void showSymbolTable(){ | |
// Implement | |
for(int i = 0; i < CHAIN_LENGTH; ++i){ | |
printf("%d:", i); | |
// Do not modify the head | |
symbol_info* temp = block[i]; | |
while( temp != NULL ){ | |
printf("->[%s|%s]", temp->name, temp->classtype); | |
temp = temp->next; | |
} | |
printf("\n"); | |
} | |
} | |
int showMenu(){ | |
cout << "Menu:\n"; | |
cout << "=====\n"; | |
string message = "Enter 1 to insert(name, class type)\n" | |
"Enter 2 to update(name, class type, new class type)\n" | |
"Enter 3 to search(name, class type)\n" | |
"Enter 4 to delete(name, class type)\n" | |
"Enter 5 to show the symbol table\n"; | |
cout << message << "\n"; | |
cout << "User Choice: "; | |
int choice; | |
scanf("%d", &choice); | |
return choice; | |
} | |
int main(){ | |
//take input from a file | |
//freopen("input.txt", "r", stdin); | |
//freopen("output.txt", "w", stdout); | |
int choice = showMenu(); | |
while( 1 ){ | |
char *name = new char[M]; | |
char *classtype = new char[M]; | |
switch(choice){ | |
case 1: | |
{ | |
cout << "Insert Selected:\n"; | |
scanf("%s%s", name, classtype); | |
// Omit the comma character | |
int pos = strlen(name) - 1; | |
if( name[ pos ] == ',' ){ | |
name[ pos ] = '\0'; | |
} | |
printf("%s %s\n", name, classtype); | |
printf("%d\n", cHash(name) ); | |
cInsert(name, classtype); | |
} | |
break; | |
case 2: | |
{ | |
cout << "Update Selected:\n"; | |
char* updatedClasstype = new char[M]; | |
scanf("%s%s%s", name, classtype, updatedClasstype); | |
// Omit the comma character | |
int pos = strlen(name) - 1; | |
if( name[ pos ] == ',' ){ | |
name[ pos ] = '\0'; | |
} | |
pos = strlen(classtype) - 1; | |
if( classtype[ pos ] == ',' ){ | |
classtype[ pos ] = '\0'; | |
} | |
printf("%s %s\n", name, classtype); | |
printf("%d\n", cHash(name) ); | |
cUpdate(name, classtype, updatedClasstype); | |
} | |
break; | |
case 3: | |
{ | |
cout << "Search Selected:\n"; | |
scanf("%s%s", name, classtype); | |
// Omit the comma character | |
int pos = strlen(name) - 1; | |
if( name[ pos ] == ',' ){ | |
name[ pos ] = '\0'; | |
} | |
printf("%s %s\n", name, classtype); | |
printf("%d\n", cHash(name) ); | |
if( cSearch(name, classtype) ){ | |
printf("FOUND\n"); | |
}else{ | |
printf("NOT FOUND\n"); | |
} | |
} | |
break; | |
case 4: | |
{ | |
cout << "Delete Selected:\n"; | |
scanf("%s%s", name, classtype); | |
// Omit the comma character | |
int pos = strlen(name) - 1; | |
if( name[ pos ] == ',' ){ | |
name[ pos ] = '\0'; | |
} | |
printf("%s %s\n", name, classtype); | |
printf("%d\n", cHash(name) ); | |
cDelete(name, classtype); | |
} | |
break; | |
case 5: | |
cout << "Show Selected\n"; | |
showSymbolTable(); | |
break; | |
default: | |
return 0; | |
}; | |
choice = showMenu(); | |
} | |
return 0; | |
} |
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment