mirror of
https://github.com/fanlumaster/googlepinyinime-rev.git
synced 2025-07-18 17:08:00 +08:00
update
This commit is contained in:
@ -1,35 +1,77 @@
|
||||
#include "../src/include/pinyinime.h"
|
||||
#include <codecvt>
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
std::string fromUtf16(const ime_pinyin::char16 *buf, size_t len) {
|
||||
// 转换为标准 char16_t
|
||||
std::u16string utf16Str(reinterpret_cast<const char16_t *>(buf), len);
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
|
||||
return convert.to_bytes(utf16Str);
|
||||
}
|
||||
|
||||
int main() {
|
||||
if (!ime_pinyin::im_open_decoder("./data/dict_pinyin.dat", "./data/user_dict.dat")) {
|
||||
std::cout << "fany bug.\n";
|
||||
return 0;
|
||||
void test_pinyin_search_and_segment(const std::string &user_pinyin) {
|
||||
auto start_time = std::chrono::high_resolution_clock::now(); // 开始计时
|
||||
|
||||
std::string pinyin_str = user_pinyin;
|
||||
|
||||
const char *pinyin = pinyin_str.c_str();
|
||||
size_t cand_cnt = ime_pinyin::im_search(pinyin, strlen(pinyin));
|
||||
const ime_pinyin::uint16 *spl_start = nullptr;
|
||||
size_t segment_count = ime_pinyin::im_get_spl_start_pos(spl_start);
|
||||
|
||||
if (spl_start != nullptr && segment_count > 0) {
|
||||
std::string segmented_pinyin;
|
||||
for (size_t i = 0; i < segment_count; ++i) {
|
||||
size_t start = spl_start[i];
|
||||
size_t end = spl_start[i + 1];
|
||||
std::string segment(pinyin + start, end - start);
|
||||
segmented_pinyin += segment;
|
||||
if (i < segment_count - 1) {
|
||||
segmented_pinyin += "'"; // 在分段之间添加分词符
|
||||
}
|
||||
}
|
||||
std::cout << "拼音分词:" << segmented_pinyin << "\n";
|
||||
} else {
|
||||
std::cout << "Failed to get segments or no segments found!\n";
|
||||
}
|
||||
|
||||
std::string pinyin = "ni'ma'si'le";
|
||||
pinyin = "ni'ma'mei'si";
|
||||
pinyin = "ni'shuo'ni'ma'ne";
|
||||
size_t cand_cnt = ime_pinyin::im_search(pinyin.c_str(), pinyin.size());
|
||||
ime_pinyin::char16 buf[256] = {0};
|
||||
std::string msg;
|
||||
std::vector<std::string> candidateList;
|
||||
for (size_t i = 0; i < cand_cnt; ++i) {
|
||||
ime_pinyin::char16 buf[256] = {0};
|
||||
ime_pinyin::im_get_candidate(i, buf, 255);
|
||||
size_t len = 0;
|
||||
while (buf[len] != 0 && len < 255) ++len;
|
||||
msg.append(fromUtf16(buf, len) + " ");
|
||||
candidateList.push_back(fromUtf16(buf, len));
|
||||
}
|
||||
|
||||
std::cout << "候选项数量: " << cand_cnt << std::endl;
|
||||
std::cout << "候选项本体: " << msg << std::endl;
|
||||
return 0;
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now(); // 结束计时
|
||||
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
|
||||
|
||||
std::cout << "函数执行时间: " << duration.count() << " 微秒\n";
|
||||
}
|
||||
|
||||
int main() {
|
||||
ime_pinyin::im_set_max_lens(80, 30);
|
||||
if (!ime_pinyin::im_open_decoder("./data/dict_pinyin.dat", "./data/user_dict.dat")) {
|
||||
std::cout << "fany bug.\n";
|
||||
return 0;
|
||||
}
|
||||
test_pinyin_search_and_segment("qusiba");
|
||||
test_pinyin_search_and_segment("shuoshenmene");
|
||||
test_pinyin_search_and_segment("yiwushichushima");
|
||||
test_pinyin_search_and_segment("zhonghuarenmingongheguo");
|
||||
test_pinyin_search_and_segment("meilijianhezhongguo");
|
||||
ime_pinyin::im_set_max_lens(64, 64);
|
||||
ime_pinyin::im_reset_search();
|
||||
test_pinyin_search_and_segment("qunimadegouridequsibawonengzenmeban");
|
||||
test_pinyin_search_and_segment("kanbuchulaishizenmexianzhichangdude");
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user