// MultiMatch.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <Windows.h>
#include <assert.h>
#include "MultiMatch.h"
//we assume that, the characters in patterns are lower letters.
//so there are only 26 letters.
#define ALL_LOWER_CASE 26
#define END_FLAG 0x80
typedef byte (*JMPTABLE)[ALL_LOWER_CASE];
//we assume that, the characters in patterns are lower letters.
MULTI_PATTERN g_PatternTable[] =
{
{false, 0, 5, {'o', 'y', 'i', 'm', 'a'} },
{false, 0, 5, {'y', 'i', 'x', 'i', 'n'} },
{false, 0, 6, {'y', 'i', 'd', 'i', 'a', 'n'} },
};
//in fuzzy match, the prefix at fuzzy position can not same as another pattern
//otherwise, it will make a conflict.
//the fuzzy char can not at the begin or end of the pattern, is meaningless
MULTI_PATTERN g_FuzzyPatternTable[] =
{
{false, 0, 5, {'o', 'y', 'i', 'm', 'a'} },
{true, '*', 5, {'y', 'i', 'x', '*', 'n'} },
{true, '?', 6, {'y', 'i', 'd', 'i', '?', 'n'} },
};
void FillTable(byte *p, byte n)
{
for (int i = 0; i < ALL_LOWER_CASE; i++)
{
assert(p[i] == 0);
p[i] = n;
}
}
JMPTABLE CreateJumpTable(MULTI_PATTERN patternTable[], int nLen)
{
int nTotalLen = 0;
byte byNextRow = 0;
JMPTABLE pJmpTab;
for (int i = 0; i < nLen; i++)
{
nTotalLen += patternTable[i].nLen;
}
//use byte array only for study, maybe you need use integer array in project
pJmpTab = (JMPTABLE)malloc(nTotalLen*ALL_LOWER_CASE);
if (!pJmpTab)
{
return pJmpTab;
}
memset(pJmpTab, 0, nTotalLen*ALL_LOWER_CASE);
for (int i = 0; i < nLen; i++)
{
byte byCurRow = 0;
for (int j = 0; j < patternTable[i].nLen; j++)
{
byte cSign = patternTable[i].SigData[j] - 'a';
byte cNext = pJmpTab[byCurRow][cSign];
byNextRow++;
if (cNext == 0)
{
if (j == patternTable[i].nLen-1)//is the last character of the pattern, and the inner loop will over
{
pJmpTab[byCurRow][cSign] = i | END_FLAG;// 'i' indicate the pattern number
}
else
{
pJmpTab[byCurRow][cSign] = byNextRow;
}
byCurRow = byNextRow;
}
else
{
assert(0 == (cNext & END_FLAG));//if reach a end position, the prefix of current pattern is another pattern
byCurRow = cNext;
}
}
}
return pJmpTab;
}
JMPTABLE CreateFuzzyJumpTable(MULTI_PATTERN patternTable[], int nLen)
{
int nTotalLen = 0;
byte byNextRow = 0;
JMPTABLE pJmpTab;
//we can reduce some rows
//1 every pattern's head char mapping in first row,
// so we can save one row for every pattern begin at second pattern
//2 if a pattern's prefix is same as other pattern's prefix, the repeat part don't need new rows
// but, if the rows reduce, the code must modify for it
for (int i = 0; i < nLen; i++)
{
nTotalLen += patternTable[i].nLen;
}
//use byte array only for study, maybe you need use integer array in project
pJmpTab = (JMPTABLE)malloc(nTotalLen*ALL_LOWER_CASE);
if (!pJmpTab)
{
return pJmpTab;
}
memset(pJmpTab, 0, nTotalLen*ALL_LOWER_CASE);
for (int i = 0; i < nLen; i++)
{
byte byCurRow = 0;
for (int j = 0; j < patternTable[i].nLen; j++)
{
byte cSign = patternTable[i].SigData[j];
byNextRow++;
if (patternTable[i].bMask && patternTable[i].byMask == cSign)//in fuzzy match, add this branch
{
FillTable(pJmpTab[byCurRow], byNextRow);
byCurRow = byNextRow;
continue;
}
cSign -= 'a';
byte cNext = pJmpTab[byCurRow][cSign];
if (cNext == 0)
{
if (j == patternTable[i].nLen-1)//the last character of the pattern, and the inner loop will over
{
pJmpTab[byCurRow][cSign] = i | END_FLAG;// 'i' indicate the pattern number
}
else
{
pJmpTab[byCurRow][cSign] = byNextRow;
}
byCurRow = byNextRow;
}
else
{
assert(0 == (cNext & END_FLAG));//if reach a end position, the prefix of current pattern is another pattern
byCurRow = cNext;
}
}
}
return pJmpTab;
}
//CreateFuzzyJumpTable2 modify from CreateFuzzyJumpTable
//just save some rows for the reason 1(every pattern's head char mapping in first row)
//for reason 2, we can reduce rows further, but is not easy to calculation the repeat prefix
//so we do not do this, but in code we can let the redundant rows all behind the malloc memory.
JMPTABLE CreateFuzzyJumpTable2(MULTI_PATTERN patternTable[], int nLen)
{
int nTotalLen = 0;
byte byNextRow = 0;
JMPTABLE pJmpTab;
for (int i = 0; i < nLen; i++)
{
nTotalLen += patternTable[i].nLen;
}
nTotalLen -= nLen-1;//for reason 1
//use byte array only for study, maybe you need use integer array in project
pJmpTab = (JMPTABLE)malloc(nTotalLen*ALL_LOWER_CASE);
if (!pJmpTab)
{
return pJmpTab;
}
memset(pJmpTab, 0, nTotalLen*ALL_LOWER_CASE);
for (int i = 0; i < nLen; i++)
{
byte byCurRow = 0;
for (int j = 0; j < patternTable[i].nLen; j++)
{
byte cSign = patternTable[i].SigData[j];
//byNextRow++; only add this value in necessary
if (patternTable[i].bMask && patternTable[i].byMask == cSign)//in fuzzy match, add this branch
{
byNextRow++;//need add
FillTable(pJmpTab[byCurRow], byNextRow);
byCurRow = byNextRow;
continue;
}
cSign -= 'a';
byte cNext = pJmpTab[byCurRow][cSign];
if (cNext == 0)
{
if (j == patternTable[i].nLen-1)//the last character of the pattern, and the inner loop will over
{
pJmpTab[byCurRow][cSign] = i | END_FLAG;// 'i' indicate the pattern number
}
else
{
byNextRow++;//need add
pJmpTab[byCurRow][cSign] = byNextRow;
}
byCurRow = byNextRow;
}
else
{
assert(0 == (cNext & END_FLAG));//if reach a end position, the prefix of current pattern is another pattern
byCurRow = cNext;
}
}
}
return pJmpTab;
}
char* MultiSearch2(char *p, int nLen, JMPTABLE pJmpTable, int &nIdx)
{
for (int nStart = 0; nStart < nLen; nStart++)
{
for (int i = nStart, nRow = 0; i < nLen; i++)
{
byte byNextRow = pJmpTable[nRow][p[i]-'a'];
if (0 == byNextRow)
{
break;
}
//check whether is end of pattern or not
if (byNextRow & END_FLAG)
{
nIdx = byNextRow & (~END_FLAG);
return &p[i];
}
nRow = byNextRow;
}
}
return NULL;
}
char* MultiSearch(char *p, int nLen, JMPTABLE pJmpTable, int &nIdx)
{
int nRow = 0;
int nMatchMaybe = 0;
bool bSetMatchMaybe = false;
for (int nStart = 0; nStart < nLen; nStart++)
{
byte byNe
【模式匹配】之——多模匹配 上篇(AC算法)
需积分: 44 6 浏览量
2013-04-19
13:43:30
上传
评论
收藏 20KB ZIP 举报
超然_烟火
- 粉丝: 87
- 资源: 17
最新资源
- 基于Python的图像阴影检测与去除源码(高分期末大作业项目).zip
- 基于C++/Qt实现的井字棋游戏
- 基于 Python 编程语言的 Web 框架Django
- Python和Flask实现的基于体检数据的城市公共健康可视分析系统源码+使用说明.zip
- 基于python实现的华为智慧工地-安全帽检测
- buck-boost_2023-12-16_12-12-13.eprj
- 后端开发关于数据库和API开发的介绍
- 机器学习和数据挖掘课程设计-米其林餐厅数据挖掘管理系统源码+使用文档说明.zip
- html html html展示我与ai的对化
- 数据结构课程设计-全国交通出行咨询模拟系统C语言实现源码.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈