Boyer-Moore algorithm C++ implementation
2013-07-25
Here is the Boyer-Moore algorithm C++ implementation.
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include <string.h>
#include <algorithm>
#define ALPHABET_LENGTH 256
#define MAX(x, y) (x) > (y) ? (x) : (y)
using namespace std;
vector <int> L1;
vector <int> L2;
typedef struct ListNode
{
struct ListNode *Next;
int Index;
} ListNode;
typedef struct List
{
struct ListNode *curr, *start;
} List;
List *ListCreate(void)
{
List *list = new List();
list->start = list->curr = NULL;
return list;
}
void ListAddFirst(List *list, int index)
{
ListNode *node = new ListNode();
node->Index = index;
node->Next = list->start;
list->curr = list->start = node;
}
ListNode *ListMovePtr(List *list)
{
if (list->curr != NULL)
list->curr = list->curr->Next;
return list->curr;
}
void ListReset(List *list)
{
list->curr = list->start;
}
void ExitOnError(const char *msg)
{
printf("Error: %s\r\n", msg);
exit(0);
}
// count z-function for linear time O(n)
vector <int> z_function(string s)
{
int n = (int)s.length();
vector <int> z(n);
for (int i = 1, l = 0, r = 0; i < n; i++)
{
if (i <= r)
z[i] = min(r - i + 1, z[i-l]);
while (i + z[i] < n && s[z[i]] == s[i + z[i]])
z[i]++;
if (i + z[i] - 1 > r)
l = i, r = i + z[i] - 1;
}
return z;
}
vector <int> n_function(string s)
{
reverse(s.begin(), s.end());
vector <int> res = z_function(s);
reverse(res.begin(), res.end());
return res;
}
// calculate max suffix of P[1..i] equal to some suffix of P
void CalculateLFuncs(vector <int> n_fun)
{
int len = (int)n_fun.size();
L1.resize(len);
L2.resize(len);
for (int i = 0; i < len; i++)
if (n_fun[i])
L1[len - n_fun[i]] = i + 1;
L2[len - 1] = n_fun[0];
for (int i = 0; i < len; i++)
{
int q = n_fun[i] == i + 1 ? n_fun[i] : 0;
L2[len - i - 1] = MAX(q, L2[len - i]);
}
}
// build bad symbol shift table
void BuildBadSymShiftTable(string s, List **list)
{
for (int i = 0; i < (int)s.size(); i++)
ListAddFirst(list[s[i]], i);
}
int BoyerMoore(string text, string pattern, List **entries)
{
int s_len = (int)text.size();
int p_len = (int)pattern.size();
int k = p_len - 1;
int last_entry = -1;
int not_occured = 1;
while (k < s_len && not_occured)
{
int i = p_len - 1;
int h = k;
for (; i >= 0 && pattern[i] == text[h]; i--, h--)
ListMovePtr(entries[pattern[i]]);
if (i < 0)
{
last_entry = h + 2;
k += p_len - L2[1];
not_occured = 0;
}
else
{
int stop_symbol_rule = !entries]->curr ? i + 1 : i - entries]->curr->Index;
int good_suff_rule = i == p_len - 1 ? 1 : L1[i + 1] == 0 ? p_len - L2[i + 1] - 1: p_len - L1[i + 1];
k += MAX(stop_symbol_rule, good_suff_rule);
}
for (i = 0; i < ALPHABET_LENGTH; i++)
ListReset(entries[i]);
}
return last_entry;
}
int main(void)
{
FILE *fin = freopen("input.txt", "r", stdin);
FILE *fout = freopen("output.txt", "w", stdout);
if (!fin)
ExitOnError("cannot open input file");
if (!fout)
ExitOnError("cannot open output file");
string text, pattern;
getline(cin, text);
getline(cin, pattern);
List *entries[ALPHABET_LENGTH];
for (int i = 0; i < ALPHABET_LENGTH; i++)
entries[i] = ListCreate();
BuildBadSymShiftTable(pattern, entries);
CalculateLFuncs(n_function(pattern));
cout << BoyerMoore(text, pattern, entries);
fclose(fin);
fclose(fout);
}