/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include "lpicache.h"
#include "matrixsearch.h"
#include "mystdlib.h"
#include "ngram.h"
#include "userdict.h"
namespace ime_pinyin {
#define PRUMING_SCORE 8000.0
MatrixSearch::MatrixSearch() {
inited_ = false;
spl_trie_ = SpellingTrie::get_cpinstance();
reset_pointers_to_null();
pys_decoded_len_ = 0;
mtrx_nd_pool_used_ = 0;
dmi_pool_used_ = 0;
xi_an_enabled_ = false;
dmi_c_phrase_ = false;
assert(kMaxSearchSteps > 0);
max_sps_len_ = kMaxSearchSteps - 1;
max_hzs_len_ = kMaxSearchSteps;
}
MatrixSearch::~MatrixSearch() {
free_resource();
}
void MatrixSearch::reset_pointers_to_null() {
dict_trie_ = NULL;
user_dict_ = NULL;
spl_parser_ = NULL;
share_buf_ = NULL;
// The following four buffers are used for decoding, and they are based on
// share_buf_, no need to delete them.
mtrx_nd_pool_ = NULL;
dmi_pool_ = NULL;
matrix_ = NULL;
dep_ = NULL;
// Based on share_buf_, no need to delete them.
npre_items_ = NULL;
}
bool MatrixSearch::alloc_resource() {
free_resource();
dict_trie_ = new DictTrie();
user_dict_ = static_cast<AtomDictBase*>(new UserDict());
spl_parser_ = new SpellingParser();
size_t mtrx_nd_size = sizeof(MatrixNode) * kMtrxNdPoolSize;
mtrx_nd_size = align_to_size_t(mtrx_nd_size) / sizeof(size_t);
size_t dmi_size = sizeof(DictMatchInfo) * kDmiPoolSize;
dmi_size = align_to_size_t(dmi_size) / sizeof(size_t);
size_t matrix_size = sizeof(MatrixRow) * kMaxRowNum;
matrix_size = align_to_size_t(matrix_size) / sizeof(size_t);
size_t dep_size = sizeof(DictExtPara);
dep_size = align_to_size_t(dep_size) / sizeof(size_t);
// share_buf's size is determined by the buffers for search.
share_buf_ = new size_t[mtrx_nd_size + dmi_size + matrix_size + dep_size];
if (NULL == dict_trie_ || NULL == user_dict_ || NULL == spl_parser_ ||
NULL == share_buf_)
return false;
// The buffers for search are based on the share buffer
mtrx_nd_pool_ = reinterpret_cast<MatrixNode*>(share_buf_);
dmi_pool_ = reinterpret_cast<DictMatchInfo*>(share_buf_ + mtrx_nd_size);
matrix_ = reinterpret_cast<MatrixRow*>(share_buf_ + mtrx_nd_size + dmi_size);
dep_ = reinterpret_cast<DictExtPara*>
(share_buf_ + mtrx_nd_size + dmi_size + matrix_size);
// The prediction buffer is also based on the share buffer.
npre_items_ = reinterpret_cast<NPredictItem*>(share_buf_);
npre_items_len_ = (mtrx_nd_size + dmi_size + matrix_size + dep_size) *
sizeof(size_t) / sizeof(NPredictItem);
return true;
}
void MatrixSearch::free_resource() {
if (NULL != dict_trie_)
delete dict_trie_;
if (NULL != user_dict_)
delete user_dict_;
if (NULL != spl_parser_)
delete spl_parser_;
if (NULL != share_buf_)
delete [] share_buf_;
reset_pointers_to_null();
}
bool MatrixSearch::init(const char *fn_sys_dict, const char *fn_usr_dict) {
if (NULL == fn_sys_dict || NULL == fn_usr_dict)
return false;
if (!alloc_resource())
return false;
if (!dict_trie_->load_dict(fn_sys_dict, 1, kSysDictIdEnd))
return false;
// If engine fails to load the user dictionary, reset the user dictionary
// to NULL.
if (!user_dict_->load_dict(fn_usr_dict, kUserDictIdStart, kUserDictIdEnd)) {
delete user_dict_;
user_dict_ = NULL;
} else{
user_dict_->set_total_lemma_count_of_others(NGram::kSysDictTotalFreq);
}
reset_search0();
inited_ = true;
return true;
}
bool MatrixSearch::init_fd(int sys_fd, long start_offset, long length,
const char *fn_usr_dict) {
if (NULL == fn_usr_dict)
return false;
if (!alloc_resource())
return false;
if (!dict_trie_->load_dict_fd(sys_fd, start_offset, length, 1, kSysDictIdEnd))
return false;
if (!user_dict_->load_dict(fn_usr_dict, kUserDictIdStart, kUserDictIdEnd)) {
delete user_dict_;
user_dict_ = NULL;
} else {
user_dict_->set_total_lemma_count_of_others(NGram::kSysDictTotalFreq);
}
reset_search0();
inited_ = true;
return true;
}
void MatrixSearch::init_user_dictionary(const char *fn_usr_dict) {
assert(inited_);
if (NULL != user_dict_) {
delete user_dict_;
user_dict_ = NULL;
}
if (NULL != fn_usr_dict) {
user_dict_ = static_cast<AtomDictBase*>(new UserDict());
if (!user_dict_->load_dict(fn_usr_dict, kUserDictIdStart, kUserDictIdEnd)) {
delete user_dict_;
user_dict_ = NULL;
}
}
reset_search0();
}
bool MatrixSearch::is_user_dictionary_enabled() const {
return NULL != user_dict_;
}
void MatrixSearch::set_max_lens(size_t max_sps_len, size_t max_hzs_len) {
if (0 != max_sps_len)
max_sps_len_ = max_sps_len;
if (0 != max_hzs_len)
max_hzs_len_ = max_hzs_len;
}
void MatrixSearch::close() {
flush_cache();
free_resource();
inited_ = false;
}
void MatrixSearch::flush_cache() {
if (NULL != user_dict_)
user_dict_->flush_cache();
}
void MatrixSearch::set_xi_an_switch(bool xi_an_enabled) {
xi_an_enabled_ = xi_an_enabled;
}
bool MatrixSearch::get_xi_an_switch() {
return xi_an_enabled_;
}
bool MatrixSearch::reset_search() {
if (!inited_)
return false;
return reset_search0();
}
bool MatrixSearch::reset_search0() {
if (!inited_)
return false;
pys_decoded_len_ = 0;
mtrx_nd_pool_used_ = 0;
dmi_pool_used_ = 0;
// Get a MatrixNode from the pool
matrix_[0].mtrx_nd_pos = mtrx_nd_pool_used_;
matrix_[0].mtrx_nd_num = 1;
mtrx_nd_pool_used_ += 1;
// Update the node, and make it to be a starting node
MatrixNode *node = mtrx_nd_pool_ + matrix_[0].mtrx_nd_pos;
node->id = 0;
node->score = 0;
node->from = NULL;
node->step = 0;
node->dmi_fr = (PoolPosType)-1;
matrix_[0].dmi_pos = 0;
matrix_[0].dmi_num = 0;
matrix_[0].dmi_has_full_id = 1;
matrix_[0].mtrx_nd_fixed = node;
lma_start_[0] = 0;
fixed_lmas_ = 0;
spl_start_[0] = 0;
fixed_hzs_ = 0;
dict_trie_->reset_milestones(0, 0);
if (NULL != user_dict_)
user_dict_->reset_milestones(0, 0);
return true;
}
bool MatrixSearch::reset_search(size_t ch_pos, bool clear_fixed_this_step,
bool clear_dmi_this_step,
bool clear_mtrx_this_step) {
if (!inited_ || ch_pos > pys_decoded_len_ || ch_pos >= kMaxRowNum)
return false;
if (0 == ch_pos) {
reset_search0();
} else {
// Prepare mile stones of this step to clear.
MileStoneHandle *dict_handles_to_clear = NULL;
if (clear_dmi_this_step && matrix_[ch_pos].dmi_num > 0) {
dict_handles_to_clear = dmi_pool_[matrix_[ch_pos].dmi_pos].dict_handles;
}
// If there are more steps, and this step is not allowed to clear, find
// milestones of next step.
if (pys_decoded_len_ > ch_pos && !clear_dmi_this_step) {
dict_handles_to_clear = NULL;
if (matrix_[ch_pos + 1].dmi_num > 0) {
dict_handles_to_clear =
dmi_pool_[matrix_[ch_pos + 1].dmi_pos].dict_handles;
}
}
if (NULL != dict_handles_to_clear) {
dict_trie_->reset_milestones(ch_pos, dict_handles_to_clear[0]);
if (NULL != user_dict_)
user_dict_->reset_milestones(ch_pos, dict_handles_to_clear[1]);
}
pys_decoded_len_ = ch_pos;
if (clear_dmi_this_step) {
dmi_pool_used_ = matrix
- 1
- 2
- 3
- 4
- 5
- 6
前往页