Files
googlepinyinime-rev/tests/main.cpp

121 lines
4.9 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "../src/include/pinyinime.h"
#include <chrono>
#include <iostream>
#include <string>
#include <vector>
#include <cstring>
#include <utf8cpp/utf8.h>
#include <Windows.h>
std::string fromUtf16(const ime_pinyin::char16 *buf, size_t len) {
// 先将输入的 UTF-16 buffer 转成 std::u16string等价于 char16_t
const char16_t *utf16Data = reinterpret_cast<const char16_t *>(buf);
std::u16string utf16Str(utf16Data, len);
std::string utf8Result;
// 使用 utfcpp 提供的 utf16to8 转换迭代器,将 UTF-16 转为 UTF-8
utf8::utf16to8(utf16Str.begin(), utf16Str.end(), std::back_inserter(utf8Result));
return utf8Result;
}
void test_pinyin_search_and_segment(const std::string &user_pinyin) {
auto start_time = std::chrono::high_resolution_clock::now(); // 开始计时
std::string pinyin_str = user_pinyin;
const char *pinyin = pinyin_str.c_str();
size_t cand_cnt = ime_pinyin::im_search(pinyin, strlen(pinyin));
const ime_pinyin::uint16 *spl_start = nullptr;
size_t segment_count = ime_pinyin::im_get_spl_start_pos(spl_start);
if (spl_start != nullptr && segment_count > 0) {
std::string segmented_pinyin;
for (size_t i = 0; i < segment_count; ++i) {
size_t start = spl_start[i];
size_t end = spl_start[i + 1];
std::string segment(pinyin + start, end - start);
segmented_pinyin += segment;
if (i < segment_count - 1) {
segmented_pinyin += "'"; // 在分段之间添加分词符
}
}
std::cout << "拼音分词:" << segmented_pinyin << "\n";
} else {
std::cout << "Failed to get segments or no segments found!\n";
}
std::string msg;
std::vector<std::string> candidateList;
for (size_t i = 0; i < cand_cnt; ++i) {
ime_pinyin::char16 buf[256] = {0};
ime_pinyin::im_get_candidate(i, buf, 255);
size_t len = 0;
while (buf[len] != 0 && len < 255) ++len;
msg.append(fromUtf16(buf, len) + " ");
candidateList.push_back(fromUtf16(buf, len));
}
std::cout << "候选项数量: " << cand_cnt << std::endl;
std::cout << "候选项本体: " << msg << std::endl;
auto end_time = std::chrono::high_resolution_clock::now(); // 结束计时
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
std::cout << "函数执行时间: " << duration.count() << " 微秒\n";
}
void test_pinyin_search_when_retriving_first_element(const std::string &user_pinyin) {
auto start_time = std::chrono::high_resolution_clock::now(); // 开始计时
std::string pinyin_str = user_pinyin;
const char *pinyin = pinyin_str.c_str();
size_t cand_cnt = ime_pinyin::im_search(pinyin, strlen(pinyin));
std::string msg;
std::vector<std::string> candidateList;
cand_cnt = cand_cnt > 1 ? 1 : cand_cnt;
for (size_t i = 0; i < cand_cnt; ++i) {
ime_pinyin::char16 buf[256] = {0};
ime_pinyin::im_get_candidate(i, buf, 255);
size_t len = 0;
while (buf[len] != 0 && len < 255) ++len;
msg.append(fromUtf16(buf, len) + " ");
candidateList.push_back(fromUtf16(buf, len));
}
std::cout << "候选项数量: " << cand_cnt << std::endl;
std::cout << "候选项本体: " << msg << std::endl;
auto end_time = std::chrono::high_resolution_clock::now(); // 结束计时
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
std::cout << "函数执行时间: " << duration.count() << " 微秒\n";
}
int main() {
#ifdef _WIN32
SetConsoleOutputCP(CP_UTF8);
#endif
ime_pinyin::im_set_max_lens(64, 32);
if (!ime_pinyin::im_open_decoder("./data/dict_pinyin.dat", "./data/user_dict.dat")) {
std::cout << "fany bug.\n";
return 0;
}
test_pinyin_search_and_segment("qusiba");
test_pinyin_search_and_segment("shuoshenmene");
test_pinyin_search_and_segment("yiwushichushima");
test_pinyin_search_and_segment("zhonghuarenmingongheguo");
test_pinyin_search_and_segment("meilijianhezhongguo");
test_pinyin_search_and_segment("qunimadegouridequsibawonengzenmeban");
test_pinyin_search_and_segment("kanbuchulaishizenmexianzhichangdude");
test_pinyin_search_and_segment("ninininininininininininininininini");
test_pinyin_search_and_segment("jingjiandao");
test_pinyin_search_and_segment("zhen'ta'ma'an'jing");
test_pinyin_search_and_segment("zh'ta'ma'an'jing");
test_pinyin_search_and_segment("ni'shuo'ni'ma'ne");
test_pinyin_search_when_retriving_first_element("qunimadegouridequsibawonengzenmeban");
test_pinyin_search_when_retriving_first_element("zh'ta'ma'an'jing");
test_pinyin_search_when_retriving_first_element("ni'shuo'ne'ma'de");
return 0;
}