mirror of
https://github.com/fanlumaster/googlepinyinime-rev.git
synced 2025-07-18 17:08:00 +08:00
add all necessary configs
This commit is contained in:
708
src/share/spellingtrie.cpp
Normal file
708
src/share/spellingtrie.cpp
Normal file
@ -0,0 +1,708 @@
|
||||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "../include/dictdef.h"
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
#include "../include/spellingtable.h"
|
||||
#endif
|
||||
|
||||
#include "../include/spellingtrie.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
SpellingTrie *SpellingTrie::instance_ = NULL;
|
||||
|
||||
// z/c/s is for Zh/Ch/Sh
|
||||
const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] = "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
|
||||
|
||||
// Bit 0 : is it a Shengmu char?
|
||||
// Bit 1 : is it a Yunmu char? (one char is a Yunmu)
|
||||
// Bit 2 : is it enabled in ShouZiMu(first char) mode?
|
||||
unsigned char SpellingTrie::char_flags_[] = {
|
||||
// a b c d e f g
|
||||
0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
|
||||
// h i j k l m n
|
||||
0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
// o p q r s t
|
||||
0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
// u v w x y z
|
||||
0x00, 0x00, 0x01, 0x01, 0x01, 0x01};
|
||||
|
||||
int compare_spl(const void *p1, const void *p2) { return strcmp((const char *)(p1), (const char *)(p2)); }
|
||||
|
||||
SpellingTrie::SpellingTrie() {
|
||||
spelling_buf_ = NULL;
|
||||
spelling_size_ = 0;
|
||||
spelling_num_ = 0;
|
||||
spl_ym_ids_ = NULL;
|
||||
splstr_queried_ = NULL;
|
||||
splstr16_queried_ = NULL;
|
||||
root_ = NULL;
|
||||
dumb_node_ = NULL;
|
||||
splitter_node_ = NULL;
|
||||
instance_ = NULL;
|
||||
ym_buf_ = NULL;
|
||||
f2h_ = NULL;
|
||||
|
||||
szm_enable_shm(true);
|
||||
szm_enable_ym(true);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
node_num_ = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
SpellingTrie::~SpellingTrie() {
|
||||
if (NULL != spelling_buf_) delete[] spelling_buf_;
|
||||
|
||||
if (NULL != splstr_queried_) delete[] splstr_queried_;
|
||||
|
||||
if (NULL != splstr16_queried_) delete[] splstr16_queried_;
|
||||
|
||||
if (NULL != spl_ym_ids_) delete[] spl_ym_ids_;
|
||||
|
||||
if (NULL != root_) {
|
||||
free_son_trie(root_);
|
||||
delete root_;
|
||||
}
|
||||
|
||||
if (NULL != dumb_node_) {
|
||||
delete[] dumb_node_;
|
||||
}
|
||||
|
||||
if (NULL != splitter_node_) {
|
||||
delete[] splitter_node_;
|
||||
}
|
||||
|
||||
if (NULL != instance_) {
|
||||
delete instance_;
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ym_buf_) delete[] ym_buf_;
|
||||
|
||||
if (NULL != f2h_) delete[] f2h_;
|
||||
}
|
||||
|
||||
bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
|
||||
if (NULL == splid || 0 == *splid) return false;
|
||||
|
||||
if (*splid >= kFullSplIdStart) return true;
|
||||
if (*splid < kFullSplIdStart) {
|
||||
char ch = kHalfId2Sc_[*splid];
|
||||
if (ch > 'Z') {
|
||||
return true;
|
||||
} else {
|
||||
if (szm_is_enabled(ch)) {
|
||||
return true;
|
||||
} else if (is_yunmu_char(ch)) {
|
||||
assert(h2f_num_[*splid] > 0);
|
||||
*splid = h2f_start_[*splid];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_half_id(uint16 splid) const {
|
||||
if (0 == splid || splid >= kFullSplIdStart) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_full_id(uint16 splid) const {
|
||||
if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
|
||||
uint16 half_fr_full = full_to_half(full_id);
|
||||
|
||||
if (half_fr_full == half_id) return true;
|
||||
|
||||
// &~0x20 is used to conver the char to upper case.
|
||||
// So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
|
||||
char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
|
||||
char ch_h = kHalfId2Sc_[half_id];
|
||||
if (ch_f == ch_h) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
|
||||
if (0 == splid || splid >= kFullSplIdStart) return false;
|
||||
|
||||
char ch = kHalfId2Sc_[splid];
|
||||
// If ch >= 'a', that means the half id is one of Zh/Ch/Sh
|
||||
if (ch >= 'a') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_shengmu_char(char ch) const { return char_flags_[ch - 'A'] & kHalfIdShengmuMask; }
|
||||
|
||||
bool SpellingTrie::is_yunmu_char(char ch) const { return char_flags_[ch - 'A'] & kHalfIdYunmuMask; }
|
||||
|
||||
bool SpellingTrie::is_szm_char(char ch) const { return is_shengmu_char(ch) || is_yunmu_char(ch); }
|
||||
|
||||
bool SpellingTrie::szm_is_enabled(char ch) const { return char_flags_[ch - 'A'] & kHalfIdSzmMask; }
|
||||
|
||||
void SpellingTrie::szm_enable_shm(bool enable) {
|
||||
if (enable) {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_shengmu_char(ch)) char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
|
||||
}
|
||||
} else {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_shengmu_char(ch)) char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SpellingTrie::szm_enable_ym(bool enable) {
|
||||
if (enable) {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_yunmu_char(ch)) char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
|
||||
}
|
||||
} else {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_yunmu_char(ch)) char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_szm_enabled(char ch) const { return char_flags_[ch - 'A'] & kHalfIdSzmMask; }
|
||||
|
||||
const SpellingTrie *SpellingTrie::get_cpinstance() { return &get_instance(); }
|
||||
|
||||
SpellingTrie &SpellingTrie::get_instance() {
|
||||
if (NULL == instance_) instance_ = new SpellingTrie();
|
||||
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
uint16 SpellingTrie::half2full_num(uint16 half_id) const {
|
||||
if (NULL == root_ || half_id >= kFullSplIdStart) return 0;
|
||||
return h2f_num_[half_id];
|
||||
}
|
||||
|
||||
uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
|
||||
if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart) return 0;
|
||||
|
||||
*spl_id_start = h2f_start_[half_id];
|
||||
return h2f_num_[half_id];
|
||||
}
|
||||
|
||||
uint16 SpellingTrie::full_to_half(uint16 full_id) const {
|
||||
if (NULL == root_ || full_id < kFullSplIdStart || full_id > spelling_num_ + kFullSplIdStart) return 0;
|
||||
|
||||
return f2h_[full_id - kFullSplIdStart];
|
||||
}
|
||||
|
||||
void SpellingTrie::free_son_trie(SpellingNode *node) {
|
||||
if (NULL == node) return;
|
||||
|
||||
for (size_t pos = 0; pos < node->num_of_son; pos++) {
|
||||
free_son_trie(node->first_son + pos);
|
||||
}
|
||||
|
||||
if (NULL != node->first_son) delete[] node->first_son;
|
||||
}
|
||||
|
||||
bool SpellingTrie::construct(const char *spelling_arr, size_t item_size, size_t item_num, float score_amplifier, unsigned char average_score) {
|
||||
if (spelling_arr == NULL) return false;
|
||||
|
||||
memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
|
||||
memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
|
||||
|
||||
// If the arr is the same as the buf, means this function is called by
|
||||
// load_table(), the table data are ready; otherwise the array should be
|
||||
// saved.
|
||||
if (spelling_arr != spelling_buf_) {
|
||||
if (NULL != spelling_buf_) delete[] spelling_buf_;
|
||||
spelling_buf_ = new char[item_size * item_num];
|
||||
if (NULL == spelling_buf_) return false;
|
||||
memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
|
||||
}
|
||||
|
||||
spelling_size_ = item_size;
|
||||
spelling_num_ = item_num;
|
||||
|
||||
score_amplifier_ = score_amplifier;
|
||||
average_score_ = average_score;
|
||||
|
||||
if (NULL != splstr_queried_) delete[] splstr_queried_;
|
||||
splstr_queried_ = new char[spelling_size_];
|
||||
if (NULL == splstr_queried_) return false;
|
||||
|
||||
if (NULL != splstr16_queried_) delete[] splstr16_queried_;
|
||||
splstr16_queried_ = new char16[spelling_size_];
|
||||
if (NULL == splstr16_queried_) return false;
|
||||
|
||||
// First, sort the buf to ensure they are in ascendant order
|
||||
qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
node_num_ = 1;
|
||||
#endif
|
||||
|
||||
root_ = new SpellingNode();
|
||||
memset(root_, 0, sizeof(SpellingNode));
|
||||
|
||||
dumb_node_ = new SpellingNode();
|
||||
memset(dumb_node_, 0, sizeof(SpellingNode));
|
||||
dumb_node_->score = average_score_;
|
||||
|
||||
splitter_node_ = new SpellingNode();
|
||||
memset(splitter_node_, 0, sizeof(SpellingNode));
|
||||
splitter_node_->score = average_score_;
|
||||
|
||||
memset(level1_sons_, 0, sizeof(SpellingNode *) * kValidSplCharNum);
|
||||
|
||||
root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
|
||||
|
||||
// Root's score should be cleared.
|
||||
root_->score = 0;
|
||||
|
||||
if (NULL == root_->first_son) return false;
|
||||
|
||||
h2f_start_[0] = h2f_num_[0] = 0;
|
||||
|
||||
if (!build_f2h()) return false;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
if (kPrintDebug0) {
|
||||
printf("---SpellingTrie Nodes: %d\n", node_num_);
|
||||
}
|
||||
return build_ym_info();
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
const char *SpellingTrie::get_ym_str(const char *spl_str) {
|
||||
bool start_ZCS = false;
|
||||
if (is_shengmu_char(*spl_str)) {
|
||||
if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str) start_ZCS = true;
|
||||
spl_str += 1;
|
||||
if (start_ZCS && 'h' == *spl_str) spl_str += 1;
|
||||
}
|
||||
return spl_str;
|
||||
}
|
||||
|
||||
bool SpellingTrie::build_ym_info() {
|
||||
bool sucess;
|
||||
SpellingTable *spl_table = new SpellingTable();
|
||||
|
||||
sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
|
||||
assert(sucess);
|
||||
|
||||
for (uint16 pos = 0; pos < spelling_num_; pos++) {
|
||||
const char *spl_str = spelling_buf_ + spelling_size_ * pos;
|
||||
spl_str = get_ym_str(spl_str);
|
||||
if ('\0' != spl_str[0]) {
|
||||
sucess = spl_table->put_spelling(spl_str, 0);
|
||||
assert(sucess);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ym_item_size; // '\0' is included
|
||||
size_t ym_num;
|
||||
const char *ym_buf;
|
||||
ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
|
||||
|
||||
if (NULL != ym_buf_) delete[] ym_buf_;
|
||||
ym_buf_ = new char[ym_item_size * ym_num];
|
||||
if (NULL == ym_buf_) {
|
||||
delete spl_table;
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
|
||||
ym_size_ = ym_item_size;
|
||||
ym_num_ = ym_num;
|
||||
|
||||
delete spl_table;
|
||||
|
||||
// Generate the maping from the spelling ids to the Yunmu ids.
|
||||
if (spl_ym_ids_) delete spl_ym_ids_;
|
||||
spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
|
||||
if (NULL == spl_ym_ids_) return false;
|
||||
|
||||
memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
|
||||
|
||||
for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
|
||||
const char *str = get_spelling_str(id);
|
||||
|
||||
str = get_ym_str(str);
|
||||
if ('\0' != str[0]) {
|
||||
uint8 ym_id = get_ym_id(str);
|
||||
spl_ym_ids_[id] = ym_id;
|
||||
assert(ym_id > 0);
|
||||
} else {
|
||||
spl_ym_ids_[id] = 0;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
SpellingNode *SpellingTrie::construct_spellings_subset(size_t item_start, size_t item_end, size_t level, SpellingNode *parent) {
|
||||
if (level >= spelling_size_ || item_end <= item_start || NULL == parent) return NULL;
|
||||
|
||||
SpellingNode *first_son = NULL;
|
||||
uint16 num_of_son = 0;
|
||||
unsigned char min_son_score = 255;
|
||||
|
||||
const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
|
||||
char char_for_node = spelling_last_start[level];
|
||||
assert(char_for_node >= 'A' && char_for_node <= 'Z' || 'h' == char_for_node);
|
||||
|
||||
// Scan the array to find how many sons
|
||||
for (size_t i = item_start + 1; i < item_end; i++) {
|
||||
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
|
||||
char char_current = spelling_current[level];
|
||||
if (char_current != char_for_node) {
|
||||
num_of_son++;
|
||||
char_for_node = char_current;
|
||||
}
|
||||
}
|
||||
num_of_son++;
|
||||
|
||||
// Allocate memory
|
||||
#ifdef ___BUILD_MODEL___
|
||||
node_num_ += num_of_son;
|
||||
#endif
|
||||
first_son = new SpellingNode[num_of_son];
|
||||
memset(first_son, 0, sizeof(SpellingNode) * num_of_son);
|
||||
|
||||
// Now begin construct tree
|
||||
size_t son_pos = 0;
|
||||
|
||||
spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
|
||||
char_for_node = spelling_last_start[level];
|
||||
|
||||
bool spelling_endable = true;
|
||||
if (spelling_last_start[level + 1] != '\0') spelling_endable = false;
|
||||
|
||||
size_t item_start_next = item_start;
|
||||
|
||||
for (size_t i = item_start + 1; i < item_end; i++) {
|
||||
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
|
||||
char char_current = spelling_current[level];
|
||||
assert(is_valid_spl_char(char_current));
|
||||
|
||||
if (char_current != char_for_node) {
|
||||
// Construct a node
|
||||
SpellingNode *node_current = first_son + son_pos;
|
||||
node_current->char_this_node = char_for_node;
|
||||
|
||||
// For quick search in the first level
|
||||
if (0 == level) level1_sons_[char_for_node - 'A'] = node_current;
|
||||
|
||||
if (spelling_endable) {
|
||||
node_current->spelling_idx = kFullSplIdStart + item_start_next;
|
||||
}
|
||||
|
||||
if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
|
||||
size_t real_start = item_start_next;
|
||||
if (spelling_last_start[level + 1] == '\0') real_start++;
|
||||
|
||||
node_current->first_son = construct_spellings_subset(real_start, i, level + 1, node_current);
|
||||
|
||||
if (real_start == item_start_next + 1) {
|
||||
uint16 score_this = static_cast<unsigned char>(spelling_last_start[spelling_size_ - 1]);
|
||||
if (score_this < node_current->score) node_current->score = score_this;
|
||||
}
|
||||
} else {
|
||||
node_current->first_son = NULL;
|
||||
node_current->score = static_cast<unsigned char>(spelling_last_start[spelling_size_ - 1]);
|
||||
}
|
||||
|
||||
if (node_current->score < min_son_score) min_son_score = node_current->score;
|
||||
|
||||
bool is_half = false;
|
||||
if (level == 0 && is_szm_char(char_for_node)) {
|
||||
node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
|
||||
|
||||
if (char_for_node > 'C') node_current->spelling_idx++;
|
||||
if (char_for_node > 'S') node_current->spelling_idx++;
|
||||
|
||||
h2f_num_[node_current->spelling_idx] = i - item_start_next;
|
||||
is_half = true;
|
||||
} else if (level == 1 && char_for_node == 'h') {
|
||||
char ch_level0 = spelling_last_start[0];
|
||||
uint16 part_id = 0;
|
||||
if (ch_level0 == 'C')
|
||||
part_id = 'C' - 'A' + 1 + 1;
|
||||
else if (ch_level0 == 'S')
|
||||
part_id = 'S' - 'A' + 1 + 2;
|
||||
else if (ch_level0 == 'Z')
|
||||
part_id = 'Z' - 'A' + 1 + 3;
|
||||
if (0 != part_id) {
|
||||
node_current->spelling_idx = part_id;
|
||||
h2f_num_[node_current->spelling_idx] = i - item_start_next;
|
||||
is_half = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_half) {
|
||||
if (h2f_num_[node_current->spelling_idx] > 0)
|
||||
h2f_start_[node_current->spelling_idx] = item_start_next + kFullSplIdStart;
|
||||
else
|
||||
h2f_start_[node_current->spelling_idx] = 0;
|
||||
}
|
||||
|
||||
// for next sibling
|
||||
spelling_last_start = spelling_current;
|
||||
char_for_node = char_current;
|
||||
item_start_next = i;
|
||||
spelling_endable = true;
|
||||
if (spelling_current[level + 1] != '\0') spelling_endable = false;
|
||||
|
||||
son_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
// the last one
|
||||
SpellingNode *node_current = first_son + son_pos;
|
||||
node_current->char_this_node = char_for_node;
|
||||
|
||||
// For quick search in the first level
|
||||
if (0 == level) level1_sons_[char_for_node - 'A'] = node_current;
|
||||
|
||||
if (spelling_endable) {
|
||||
node_current->spelling_idx = kFullSplIdStart + item_start_next;
|
||||
}
|
||||
|
||||
if (spelling_last_start[level + 1] != '\0' || item_end - item_start_next > 1) {
|
||||
size_t real_start = item_start_next;
|
||||
if (spelling_last_start[level + 1] == '\0') real_start++;
|
||||
|
||||
node_current->first_son = construct_spellings_subset(real_start, item_end, level + 1, node_current);
|
||||
|
||||
if (real_start == item_start_next + 1) {
|
||||
uint16 score_this = static_cast<unsigned char>(spelling_last_start[spelling_size_ - 1]);
|
||||
if (score_this < node_current->score) node_current->score = score_this;
|
||||
}
|
||||
} else {
|
||||
node_current->first_son = NULL;
|
||||
node_current->score = static_cast<unsigned char>(spelling_last_start[spelling_size_ - 1]);
|
||||
}
|
||||
|
||||
if (node_current->score < min_son_score) min_son_score = node_current->score;
|
||||
|
||||
assert(son_pos + 1 == num_of_son);
|
||||
|
||||
bool is_half = false;
|
||||
if (level == 0 && szm_is_enabled(char_for_node)) {
|
||||
node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
|
||||
|
||||
if (char_for_node > 'C') node_current->spelling_idx++;
|
||||
if (char_for_node > 'S') node_current->spelling_idx++;
|
||||
|
||||
h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
|
||||
is_half = true;
|
||||
} else if (level == 1 && char_for_node == 'h') {
|
||||
char ch_level0 = spelling_last_start[0];
|
||||
uint16 part_id = 0;
|
||||
if (ch_level0 == 'C')
|
||||
part_id = 'C' - 'A' + 1 + 1;
|
||||
else if (ch_level0 == 'S')
|
||||
part_id = 'S' - 'A' + 1 + 2;
|
||||
else if (ch_level0 == 'Z')
|
||||
part_id = 'Z' - 'A' + 1 + 3;
|
||||
if (0 != part_id) {
|
||||
node_current->spelling_idx = part_id;
|
||||
h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
|
||||
is_half = true;
|
||||
}
|
||||
}
|
||||
if (is_half) {
|
||||
if (h2f_num_[node_current->spelling_idx] > 0)
|
||||
h2f_start_[node_current->spelling_idx] = item_start_next + kFullSplIdStart;
|
||||
else
|
||||
h2f_start_[node_current->spelling_idx] = 0;
|
||||
}
|
||||
|
||||
parent->num_of_son = num_of_son;
|
||||
parent->score = min_son_score;
|
||||
return first_son;
|
||||
}
|
||||
|
||||
bool SpellingTrie::save_spl_trie(FILE *fp) {
|
||||
if (NULL == fp || NULL == spelling_buf_) return false;
|
||||
|
||||
if (fwrite(&spelling_size_, sizeof(uint32), 1, fp) != 1) return false;
|
||||
|
||||
if (fwrite(&spelling_num_, sizeof(uint32), 1, fp) != 1) return false;
|
||||
|
||||
if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1) return false;
|
||||
|
||||
if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1) return false;
|
||||
|
||||
if (fwrite(spelling_buf_, sizeof(char) * spelling_size_, spelling_num_, fp) != spelling_num_) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTrie::load_spl_trie(FILE *fp) {
|
||||
if (NULL == fp) return false;
|
||||
|
||||
if (fread(&spelling_size_, sizeof(uint32), 1, fp) != 1) return false;
|
||||
|
||||
if (fread(&spelling_num_, sizeof(uint32), 1, fp) != 1) return false;
|
||||
|
||||
if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1) return false;
|
||||
|
||||
if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1) return false;
|
||||
|
||||
if (NULL != spelling_buf_) delete[] spelling_buf_;
|
||||
|
||||
spelling_buf_ = new char[spelling_size_ * spelling_num_];
|
||||
if (NULL == spelling_buf_) return false;
|
||||
|
||||
if (fread(spelling_buf_, sizeof(char) * spelling_size_, spelling_num_, fp) != spelling_num_) return false;
|
||||
|
||||
return construct(spelling_buf_, spelling_size_, spelling_num_, score_amplifier_, average_score_);
|
||||
}
|
||||
|
||||
bool SpellingTrie::build_f2h() {
|
||||
if (NULL != f2h_) delete[] f2h_;
|
||||
f2h_ = new uint16[spelling_num_];
|
||||
if (NULL == f2h_) return false;
|
||||
|
||||
for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
|
||||
for (uint16 fid = h2f_start_[hid]; fid < h2f_start_[hid] + h2f_num_[hid]; fid++) f2h_[fid - kFullSplIdStart] = hid;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t SpellingTrie::get_spelling_num() { return spelling_num_; }
|
||||
|
||||
uint8 SpellingTrie::get_ym_id(const char *ym_str) {
|
||||
if (NULL == ym_str || NULL == ym_buf_) return 0;
|
||||
|
||||
for (uint8 pos = 0; pos < ym_num_; pos++)
|
||||
if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0) return pos + 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *SpellingTrie::get_spelling_str(uint16 splid) {
|
||||
splstr_queried_[0] = '\0';
|
||||
|
||||
if (splid >= kFullSplIdStart) {
|
||||
splid -= kFullSplIdStart;
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", spelling_buf_ + splid * spelling_size_);
|
||||
} else {
|
||||
if (splid == 'C' - 'A' + 1 + 1) {
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
|
||||
} else if (splid == 'S' - 'A' + 1 + 2) {
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
|
||||
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
|
||||
} else {
|
||||
if (splid > 'C' - 'A' + 1) splid--;
|
||||
if (splid > 'S' - 'A' + 1) splid--;
|
||||
splstr_queried_[0] = 'A' + splid - 1;
|
||||
splstr_queried_[1] = '\0';
|
||||
}
|
||||
}
|
||||
return splstr_queried_;
|
||||
}
|
||||
|
||||
const char16 *SpellingTrie::get_spelling_str16(uint16 splid) {
|
||||
splstr16_queried_[0] = '\0';
|
||||
|
||||
if (splid >= kFullSplIdStart) {
|
||||
splid -= kFullSplIdStart;
|
||||
for (size_t pos = 0; pos < spelling_size_; pos++) {
|
||||
splstr16_queried_[pos] = static_cast<char16>(spelling_buf_[splid * spelling_size_ + pos]);
|
||||
}
|
||||
} else {
|
||||
if (splid == 'C' - 'A' + 1 + 1) {
|
||||
splstr16_queried_[0] = static_cast<char16>('C');
|
||||
splstr16_queried_[1] = static_cast<char16>('h');
|
||||
splstr16_queried_[2] = static_cast<char16>('\0');
|
||||
} else if (splid == 'S' - 'A' + 1 + 2) {
|
||||
splstr16_queried_[0] = static_cast<char16>('S');
|
||||
splstr16_queried_[1] = static_cast<char16>('h');
|
||||
splstr16_queried_[2] = static_cast<char16>('\0');
|
||||
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
||||
splstr16_queried_[0] = static_cast<char16>('Z');
|
||||
splstr16_queried_[1] = static_cast<char16>('h');
|
||||
splstr16_queried_[2] = static_cast<char16>('\0');
|
||||
} else {
|
||||
if (splid > 'C' - 'A' + 1) splid--;
|
||||
if (splid > 'S' - 'A' + 1) splid--;
|
||||
splstr16_queried_[0] = 'A' + splid - 1;
|
||||
splstr16_queried_[1] = '\0';
|
||||
}
|
||||
}
|
||||
return splstr16_queried_;
|
||||
}
|
||||
|
||||
size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16, size_t splstr16_len) {
|
||||
if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
|
||||
|
||||
if (splid >= kFullSplIdStart) {
|
||||
splid -= kFullSplIdStart;
|
||||
for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
|
||||
splstr16[pos] = static_cast<char16>(spelling_buf_[splid * spelling_size_ + pos]);
|
||||
if (static_cast<char16>('\0') == splstr16[pos]) {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (splid == 'C' - 'A' + 1 + 1) {
|
||||
splstr16[0] = static_cast<char16>('C');
|
||||
splstr16[1] = static_cast<char16>('h');
|
||||
splstr16[2] = static_cast<char16>('\0');
|
||||
return 2;
|
||||
} else if (splid == 'S' - 'A' + 1 + 2) {
|
||||
splstr16[0] = static_cast<char16>('S');
|
||||
splstr16[1] = static_cast<char16>('h');
|
||||
splstr16[2] = static_cast<char16>('\0');
|
||||
return 2;
|
||||
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
||||
splstr16[0] = static_cast<char16>('Z');
|
||||
splstr16[1] = static_cast<char16>('h');
|
||||
splstr16[2] = static_cast<char16>('\0');
|
||||
return 2;
|
||||
} else {
|
||||
if (splid > 'C' - 'A' + 1) splid--;
|
||||
if (splid > 'S' - 'A' + 1) splid--;
|
||||
splstr16[0] = 'A' + splid - 1;
|
||||
splstr16[1] = '\0';
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Not reachable.
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace ime_pinyin
|
Reference in New Issue
Block a user