adapt to win32

2025-09-19 04:06:11 +08:00 · 2025-01-02 06:19:47 +08:00
parent 155d6eb410
commit 12abe936bf
13 changed files with 2864 additions and 2714 deletions
--- a/command/pinyinime_dictbuilder.cpp
+++ b/command/pinyinime_dictbuilder.cpp
@ -1,55 +1,59 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <unistd.h>
-#include "../src/include/dicttrie.h"
-
-using namespace ime_pinyin;
-
-/**
- * Build binary dictionary model. Make sure that ___BUILD_MODEL___ is defined
- * in dictdef.h.
- */
-int main(int argc, char* argv[]) {
-    DictTrie* dict_trie = new DictTrie();
-    bool success;
-    if (argc >= 3)
-        success = dict_trie->build_dict(argv[1], argv[2]);
-    else
-        success = dict_trie->build_dict("../data/rawdict_utf16_65105_freq.txt", "../data/valid_utf16.txt");
-
-    if (success) {
-        printf("Build dictionary successfully.\n");
-    } else {
-        printf("Build dictionary unsuccessfully.\n");
-        return -1;
-    }
-
-    success = dict_trie->save_dict("./dict/dict_pinyin.dat");
-
-    if (success) {
-        printf("Save dictionary successfully.\n");
-    } else {
-        printf("Save dictionary unsuccessfully.\n");
-        return -1;
-    }
-
-    return 0;
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#ifdef _WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include "../src/include/dicttrie.h"
+
+using namespace ime_pinyin;
+
+/**
+ * Build binary dictionary model. Make sure that ___BUILD_MODEL___ is defined
+ * in dictdef.h.
+ */
+int main(int argc, char* argv[]) {
+    DictTrie* dict_trie = new DictTrie();
+    bool success;
+    if (argc >= 3)
+        success = dict_trie->build_dict(argv[1], argv[2]);
+    else
+        success = dict_trie->build_dict("../data/rawdict_utf16_65105_freq.txt", "../data/valid_utf16.txt");
+
+    if (success) {
+        printf("Build dictionary successfully.\n");
+    } else {
+        printf("Build dictionary unsuccessfully.\n");
+        return -1;
+    }
+
+    success = dict_trie->save_dict("./dict/dict_pinyin.dat");
+
+    if (success) {
+        printf("Save dictionary successfully.\n");
+    } else {
+        printf("Save dictionary unsuccessfully.\n");
+        return -1;
+    }
+
+    return 0;
 }
--- a/command/scripts/lcompile.sh
+++ b/command/scripts/lcompile.sh
@ -1,23 +1,23 @@
-#!/bin/bash
-currentDirectory=$(pwd)
-cmakeListsPath="${currentDirectory}/CMakeLists.txt"
-
-if [ ! -f "$cmakeListsPath" ]; then
-  echo "No CMakeLists.txt in current directory, please check."
-  exit 1
-fi
-
-echo "Start generating and compiling..."
-
-buildFolderPath="./build"
-
-if [ ! -d "$buildFolderPath" ]; then
-  mkdir -p "$buildFolderPath"
-  echo "build folder created."
-fi
-
-cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
-
-if [ $? -eq 0 ]; then
-  cmake --build ./build/ --config DEBUG
-fi
+#!/bin/bash
+currentDirectory=$(pwd)
+cmakeListsPath="${currentDirectory}/CMakeLists.txt"
+
+if [ ! -f "$cmakeListsPath" ]; then
+  echo "No CMakeLists.txt in current directory, please check."
+  exit 1
+fi
+
+echo "Start generating and compiling..."
+
+buildFolderPath="./build"
+
+if [ ! -d "$buildFolderPath" ]; then
+  mkdir -p "$buildFolderPath"
+  echo "build folder created."
+fi
+
+cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
+
+if [ $? -eq 0 ]; then
+  cmake --build ./build/ --config DEBUG
+fi
--- a/command/scripts/llaunch.sh
+++ b/command/scripts/llaunch.sh
@ -1,45 +1,45 @@
-#!/bin/bash
-currentDirectory=$(pwd)
-cmakeListsPath="${currentDirectory}/CMakeLists.txt"
-
-if [ ! -f "$cmakeListsPath" ]; then
-  echo "No CMakeLists.txt in current directory, please check."
-  exit 1
-fi
-
-echo "Start generating and compiling..."
-
-buildFolderPath="./build"
-
-if [ ! -d "$buildFolderPath" ]; then
-  mkdir -p "$buildFolderPath"
-  echo "build folder created."
-fi
-
-cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
-
-if [ $? -eq 0 ]; then
-  cmake --build ./build/ --config DEBUG
-  if [ $? -eq 0 ]; then
-    content=$(<"./CMakeLists.txt")
-    exePath=""
-    while IFS= read -r line; do
-      if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
-        pattern="\"([^\"]+)\""
-        if [[ $line =~ $pattern ]]; then
-          contentInParentheses="${BASH_REMATCH[1]}"
-          result=($contentInParentheses)
-          exePath="./build/bin/${result[0]}"
-          echo "start running as follows..."
-          echo "=================================================="
-        fi
-      fi
-    done <<<"$content"
-    # execute the binary file
-    if [ -n "$exePath" ]; then
-      $exePath
-    else
-      echo "cannot find executable file path"
-    fi
-  fi
-fi
+#!/bin/bash
+currentDirectory=$(pwd)
+cmakeListsPath="${currentDirectory}/CMakeLists.txt"
+
+if [ ! -f "$cmakeListsPath" ]; then
+  echo "No CMakeLists.txt in current directory, please check."
+  exit 1
+fi
+
+echo "Start generating and compiling..."
+
+buildFolderPath="./build"
+
+if [ ! -d "$buildFolderPath" ]; then
+  mkdir -p "$buildFolderPath"
+  echo "build folder created."
+fi
+
+cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
+
+if [ $? -eq 0 ]; then
+  cmake --build ./build/ --config DEBUG
+  if [ $? -eq 0 ]; then
+    content=$(<"./CMakeLists.txt")
+    exePath=""
+    while IFS= read -r line; do
+      if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
+        pattern="\"([^\"]+)\""
+        if [[ $line =~ $pattern ]]; then
+          contentInParentheses="${BASH_REMATCH[1]}"
+          result=($contentInParentheses)
+          exePath="./build/bin/${result[0]}"
+          echo "start running as follows..."
+          echo "=================================================="
+        fi
+      fi
+    done <<<"$content"
+    # execute the binary file
+    if [ -n "$exePath" ]; then
+      $exePath
+    else
+      echo "cannot find executable file path"
+    fi
+  fi
+fi
--- a/command/scripts/lrun.sh
+++ b/command/scripts/lrun.sh
@ -1,18 +1,18 @@
-content=$(<"./CMakeLists.txt")
-exePath=""
-while IFS= read -r line; do
-  if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
-    pattern="\"([^\"]+)\""
-    if [[ $line =~ $pattern ]]; then
-      contentInParentheses="${BASH_REMATCH[1]}"
-      result=($contentInParentheses)
-      exePath="./build/bin/${result[0]}"
-    fi
-  fi
-done <<<"$content"
-
-if [ -n "$exePath" ]; then
-  $exePath
-else
-  echo "cannot find executable file path"
-fi
+content=$(<"./CMakeLists.txt")
+exePath=""
+while IFS= read -r line; do
+  if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
+    pattern="\"([^\"]+)\""
+    if [[ $line =~ $pattern ]]; then
+      contentInParentheses="${BASH_REMATCH[1]}"
+      result=($contentInParentheses)
+      exePath="./build/bin/${result[0]}"
+    fi
+  fi
+done <<<"$content"
+
+if [ -n "$exePath" ]; then
+  $exePath
+else
+  echo "cannot find executable file path"
+fi
--- a/scripts/lcompile.ps1
+++ b/scripts/lcompile.ps1
@ -0,0 +1,26 @@
+# generate compile to exe files
+$currentDirectory = Get-Location
+$cmakeListsPath = Join-Path -Path $currentDirectory -ChildPath "CMakeLists.txt"
+
+if (-not (Test-Path $cmakeListsPath))
+{
+  Write-Host("No CMakeLists.txt in current directory, please check.")
+  return
+}
+
+Write-Host "Start generating and compiling..."
+
+$buildFolderPath = ".\build"
+
+if (-not (Test-Path $buildFolderPath))
+{
+  New-Item -ItemType Directory -Path $buildFolderPath | Out-Null
+  Write-Host "build folder created."
+}
+
+cmake -G "Visual Studio 17 2022" -A x64 -S . -B ./build/
+
+if ($LASTEXITCODE -eq 0)
+{
+  cmake --build ./build/ --config DEBUG
+}
--- a/scripts/lcompile.sh
+++ b/scripts/lcompile.sh
@ -1,23 +1,23 @@
-#!/bin/bash
-currentDirectory=$(pwd)
-cmakeListsPath="${currentDirectory}/CMakeLists.txt"
-
-if [ ! -f "$cmakeListsPath" ]; then
-  echo "No CMakeLists.txt in current directory, please check."
-  exit 1
-fi
-
-echo "Start generating and compiling..."
-
-buildFolderPath="./build"
-
-if [ ! -d "$buildFolderPath" ]; then
-  mkdir -p "$buildFolderPath"
-  echo "build folder created."
-fi
-
-cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
-
-if [ $? -eq 0 ]; then
-  cmake --build ./build/ --config DEBUG
-fi
+#!/bin/bash
+currentDirectory=$(pwd)
+cmakeListsPath="${currentDirectory}/CMakeLists.txt"
+
+if [ ! -f "$cmakeListsPath" ]; then
+  echo "No CMakeLists.txt in current directory, please check."
+  exit 1
+fi
+
+echo "Start generating and compiling..."
+
+buildFolderPath="./build"
+
+if [ ! -d "$buildFolderPath" ]; then
+  mkdir -p "$buildFolderPath"
+  echo "build folder created."
+fi
+
+cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
+
+if [ $? -eq 0 ]; then
+  cmake --build ./build/ --config DEBUG
+fi
--- a/scripts/llaunch.ps1
+++ b/scripts/llaunch.ps1
@ -0,0 +1,45 @@
+#
+# generate, compile and run exe files
+#
+function getExePathFromCMakeLists() {
+    $content = Get-Content -Raw -Path "./CMakeLists.txt"
+    $exePath = ""
+    foreach ($line in $content -split "`n") {
+        if ($line -match 'set\(MY_EXECUTABLE_NAME[^\"]*\"([^\"]+)\"') {
+            $exeName = $matches[1]
+            $exePath = "./build/bin/Debug/$exeName" + ".exe"
+            break
+        }
+    }
+    return $exePath
+}
+
+$currentDirectory = Get-Location
+$cmakeListsPath = Join-Path -Path $currentDirectory -ChildPath "CMakeLists.txt"
+
+if (-not (Test-Path $cmakeListsPath)) {
+    Write-Host("No CMakeLists.txt in current directory, please check.")
+    return
+}
+
+Write-Host "Start generating and compiling..."
+
+$buildFolderPath = ".\build"
+
+if (-not (Test-Path $buildFolderPath)) {
+    New-Item -ItemType Directory -Path $buildFolderPath | Out-Null
+    Write-Host "build folder created."
+}
+
+cmake -G "Visual Studio 17 2022" -A x64 -S . -B ./build/
+
+if ($LASTEXITCODE -eq 0) {
+    cmake --build ./build/ --config DEBUG
+    if ($LASTEXITCODE -eq 0) {
+        $exePath = getExePathFromCMakeLists
+        Write-Host "start running as follows..."
+        Write-Host "=================================================="
+        Invoke-Expression $exePath
+    }
+}
+
--- a/scripts/llaunch.sh
+++ b/scripts/llaunch.sh
@ -1,45 +1,45 @@
-#!/bin/bash
-currentDirectory=$(pwd)
-cmakeListsPath="${currentDirectory}/CMakeLists.txt"
-
-if [ ! -f "$cmakeListsPath" ]; then
-  echo "No CMakeLists.txt in current directory, please check."
-  exit 1
-fi
-
-echo "Start generating and compiling..."
-
-buildFolderPath="./build"
-
-if [ ! -d "$buildFolderPath" ]; then
-  mkdir -p "$buildFolderPath"
-  echo "build folder created."
-fi
-
-cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
-
-if [ $? -eq 0 ]; then
-  cmake --build ./build/ --config DEBUG
-  if [ $? -eq 0 ]; then
-    content=$(<"./CMakeLists.txt")
-    exePath=""
-    while IFS= read -r line; do
-      if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
-        pattern="\"([^\"]+)\""
-        if [[ $line =~ $pattern ]]; then
-          contentInParentheses="${BASH_REMATCH[1]}"
-          result=($contentInParentheses)
-          exePath="./build/bin/${result[0]}"
-          echo "start running as follows..."
-          echo "=================================================="
-        fi
-      fi
-    done <<<"$content"
-    # execute the binary file
-    if [ -n "$exePath" ]; then
-      $exePath
-    else
-      echo "cannot find executable file path"
-    fi
-  fi
-fi
+#!/bin/bash
+currentDirectory=$(pwd)
+cmakeListsPath="${currentDirectory}/CMakeLists.txt"
+
+if [ ! -f "$cmakeListsPath" ]; then
+  echo "No CMakeLists.txt in current directory, please check."
+  exit 1
+fi
+
+echo "Start generating and compiling..."
+
+buildFolderPath="./build"
+
+if [ ! -d "$buildFolderPath" ]; then
+  mkdir -p "$buildFolderPath"
+  echo "build folder created."
+fi
+
+cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
+
+if [ $? -eq 0 ]; then
+  cmake --build ./build/ --config DEBUG
+  if [ $? -eq 0 ]; then
+    content=$(<"./CMakeLists.txt")
+    exePath=""
+    while IFS= read -r line; do
+      if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
+        pattern="\"([^\"]+)\""
+        if [[ $line =~ $pattern ]]; then
+          contentInParentheses="${BASH_REMATCH[1]}"
+          result=($contentInParentheses)
+          exePath="./build/bin/${result[0]}"
+          echo "start running as follows..."
+          echo "=================================================="
+        fi
+      fi
+    done <<<"$content"
+    # execute the binary file
+    if [ -n "$exePath" ]; then
+      $exePath
+    else
+      echo "cannot find executable file path"
+    fi
+  fi
+fi
--- a/scripts/lrun.ps1
+++ b/scripts/lrun.ps1
@ -0,0 +1,20 @@
+#
+# run exe file that has already been compiled before
+#
+function getExePathFromCMakeLists() {
+    $content = Get-Content -Raw -Path "./CMakeLists.txt"
+    $exePath = ""
+    foreach ($line in $content -split "`n") {
+        if ($line -match 'set\(MY_EXECUTABLE_NAME[^\"]*\"([^\"]+)\"') {
+            $exeName = $matches[1]
+            $exePath = "./build/bin/Debug/$exeName" + ".exe"
+            break
+        }
+    }
+    return $exePath
+}
+
+$exePath = getExePathFromCMakeLists
+#Write-Host "start running as follows..."
+#Write-Host "=================================================="
+Invoke-Expression $exePath
--- a/scripts/lrun.sh
+++ b/scripts/lrun.sh
@ -1,18 +1,18 @@
-content=$(<"./CMakeLists.txt")
-exePath=""
-while IFS= read -r line; do
-  if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
-    pattern="\"([^\"]+)\""
-    if [[ $line =~ $pattern ]]; then
-      contentInParentheses="${BASH_REMATCH[1]}"
-      result=($contentInParentheses)
-      exePath="./build/bin/${result[0]}"
-    fi
-  fi
-done <<<"$content"
-
-if [ -n "$exePath" ]; then
-  $exePath
-else
-  echo "cannot find executable file path"
-fi
+content=$(<"./CMakeLists.txt")
+exePath=""
+while IFS= read -r line; do
+  if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
+    pattern="\"([^\"]+)\""
+    if [[ $line =~ $pattern ]]; then
+      contentInParentheses="${BASH_REMATCH[1]}"
+      result=($contentInParentheses)
+      exePath="./build/bin/${result[0]}"
+    fi
+  fi
+done <<<"$content"
+
+if [ -n "$exePath" ]; then
+  $exePath
+else
+  echo "cannot find executable file path"
+fi
--- a/src/include/userdict.h
+++ b/src/include/userdict.h
@ -1,390 +1,396 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_USERDICT_H__
-#define PINYINIME_INCLUDE_USERDICT_H__
-
-#define ___CACHE_ENABLED___
-#define ___SYNC_ENABLED___
-#define ___PREDICT_ENABLED___
-
-// Debug performance for operations
-// #define ___DEBUG_PERF___
-
-#include <pthread.h>
-#include "atomdictbase.h"
-
-namespace ime_pinyin {
-
-class UserDict : public AtomDictBase {
-   public:
-    UserDict();
-    ~UserDict();
-
-    bool load_dict(const char *file_name, LemmaIdType start_id, LemmaIdType end_id);
-
-    bool close_dict();
-
-    size_t number_of_lemmas();
-
-    void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
-
-    MileStoneHandle extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num);
-
-    size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max);
-
-    uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
-
-    uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid);
-
-    size_t predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used);
-
-    // Full spelling ids are required
-    LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count);
-
-    LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count, bool selected);
-
-    LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
-
-    LmaScoreType get_lemma_score(LemmaIdType lemma_id);
-
-    LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
-
-    bool remove_lemma(LemmaIdType lemma_id);
-
-    size_t get_total_lemma_count();
-    void set_total_lemma_count_of_others(size_t count);
-
-    void flush_cache();
-
-    void set_limit(uint32 max_lemma_count, uint32 max_lemma_size, uint32 reclaim_ratio);
-
-    void reclaim();
-
-    void defragment();
-
-#ifdef ___SYNC_ENABLED___
-    void clear_sync_lemmas(unsigned int start, unsigned int end);
-
-    int get_sync_count();
-
-    LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
-    /**
-     * Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
-     *
-     * @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
-     * @param len length of lemmas string in UTF-16LE
-     * @return newly added lemma count
-     */
-    int put_lemmas_no_sync_from_utf16le_string(char16 *lemmas, int len);
-
-    /**
-     * Get lemmas need sync to a UTF-16LE string of above format.
-     * Note: input buffer (str) must not be too small. If str is too small to
-     *       contain single one lemma, there might be a dead loop.
-     *
-     * @param str buffer to write lemmas
-     * @param size buffer size in UTF-16LE
-     * @param count output value of lemma returned
-     * @return UTF-16LE string length
-     */
-    int get_sync_lemmas_in_utf16le_string_from_beginning(char16 *str, int size, int *count);
-
-#endif
-
-    struct UserDictStat {
-        uint32 version;
-        const char *file_name;
-        struct timeval load_time;
-        struct timeval last_update;
-        uint32 disk_size;
-        uint32 lemma_count;
-        uint32 lemma_size;
-        uint32 delete_count;
-        uint32 delete_size;
-#ifdef ___SYNC_ENABLED___
-        uint32 sync_count;
-#endif
-        uint32 reclaim_ratio;
-        uint32 limit_lemma_count;
-        uint32 limit_lemma_size;
-    };
-
-    bool state(UserDictStat *stat);
-
-   private:
-    uint32 total_other_nfreq_;
-    struct timeval load_time_;
-    LemmaIdType start_id_;
-    uint32 version_;
-    uint8 *lemmas_;
-
-    // In-Memory-Only flag for each lemma
-    static const uint8 kUserDictLemmaFlagRemove = 1;
-    // Inuse lemmas' offset
-    uint32 *offsets_;
-    // Highest bit in offset tells whether corresponding lemma is removed
-    static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
-    // Maximum possible for the offset
-    static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
-    // Bit width for last modified time, from 1 to 16
-    static const uint32 kUserDictLMTBitWidth = 16;
-    // Granularity for last modified time in second
-    static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
-    // Maximum frequency count
-    static const uint16 kUserDictMaxFrequency = 0xFFFF;
-
-#define COARSE_UTC(year, month, day, hour, minute, second) ((year - 1970) * 365 * 24 * 60 * 60 + (month - 1) * 30 * 24 * 60 * 60 + (day - 1) * 24 * 60 * 60 + (hour - 0) * 60 * 60 + (minute - 0) * 60 + (second - 0))
-    static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);
-
-    // Correspond to offsets_
-    uint32 *scores_;
-    // Following two fields are only valid in memory
-    uint32 *ids_;
-#ifdef ___PREDICT_ENABLED___
-    uint32 *predicts_;
-#endif
-#ifdef ___SYNC_ENABLED___
-    uint32 *syncs_;
-    size_t sync_count_size_;
-#endif
-    uint32 *offsets_by_id_;
-
-    size_t lemma_count_left_;
-    size_t lemma_size_left_;
-
-    const char *dict_file_;
-
-    // Be sure size is 4xN
-    struct UserDictInfo {
-        // When limitation reached, how much percentage will be reclaimed (1 ~ 100)
-        uint32 reclaim_ratio;
-        // maximum lemma count, 0 means no limitation
-        uint32 limit_lemma_count;
-        // Maximum lemma size, it's different from
-        // whole disk file size or in-mem dict size
-        // 0 means no limitation
-        uint32 limit_lemma_size;
-        // Total lemma count including deleted and inuse
-        // Also indicate offsets_ size
-        uint32 lemma_count;
-        // Total size of lemmas including used and freed
-        uint32 lemma_size;
-        // Freed lemma count
-        uint32 free_count;
-        // Freed lemma size in byte
-        uint32 free_size;
-#ifdef ___SYNC_ENABLED___
-        uint32 sync_count;
-#endif
-        int32 total_nfreq;
-    } dict_info_;
-
-    static const uint32 kUserDictVersion = 0x0ABCDEF0;
-
-    static const uint32 kUserDictPreAlloc = 32;
-    static const uint32 kUserDictAverageNchar = 8;
-
-    enum UserDictState {
-        // Keep in order
-        USER_DICT_NONE = 0,
-        USER_DICT_SYNC,
-#ifdef ___SYNC_ENABLED___
-        USER_DICT_SYNC_DIRTY,
-#endif
-        USER_DICT_SCORE_DIRTY,
-        USER_DICT_OFFSET_DIRTY,
-        USER_DICT_LEMMA_DIRTY,
-
-        USER_DICT_DEFRAGMENTED,
-    } state_;
-
-    struct UserDictSearchable {
-        uint16 splids_len;
-        uint16 splid_start[kMaxLemmaSize];
-        uint16 splid_count[kMaxLemmaSize];
-        // Compact inital letters for both FuzzyCompareSpellId and cache system
-        uint32 signature[kMaxLemmaSize / 4];
-    };
-
-#ifdef ___CACHE_ENABLED___
-    enum UserDictCacheType {
-        USER_DICT_CACHE,
-        USER_DICT_MISS_CACHE,
-    };
-
-    static const int kUserDictCacheSize = 4;
-    static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;
-
-    struct UserDictMissCache {
-        uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
-        uint16 head, tail;
-    } miss_caches_[kMaxLemmaSize];
-
-    struct UserDictCache {
-        uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
-        uint32 offsets[kUserDictCacheSize];
-        uint32 lengths[kUserDictCacheSize];
-        // Ring buffer
-        uint16 head, tail;
-    } caches_[kMaxLemmaSize];
-
-    void cache_init();
-
-    void cache_push(UserDictCacheType type, UserDictSearchable *searchable, uint32 offset, uint32 length);
-
-    bool cache_hit(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
-
-    bool load_cache(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
-
-    void save_cache(UserDictSearchable *searchable, uint32 offset, uint32 length);
-
-    void reset_cache();
-
-    bool load_miss_cache(UserDictSearchable *searchable);
-
-    void save_miss_cache(UserDictSearchable *searchable);
-
-    void reset_miss_cache();
-#endif
-
-    LmaScoreType translate_score(int f);
-
-    int extract_score_freq(int raw_score);
-
-    uint64 extract_score_lmt(int raw_score);
-
-    inline int build_score(uint64 lmt, int freq);
-
-    inline int64 utf16le_atoll(uint16 *s, int len);
-
-    inline int utf16le_lltoa(int64 v, uint16 *s, int size);
-
-    LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
-
-    size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend);
-
-    int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
-
-    int _get_lemma_score(LemmaIdType lemma_id);
-
-    int is_fuzzy_prefix_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
-
-    bool is_prefix_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
-
-    uint32 get_dict_file_size(UserDictInfo *info);
-
-    bool reset(const char *file);
-
-    bool validate(const char *file);
-
-    bool load(const char *file, LemmaIdType start_id);
-
-    bool is_valid_state();
-
-    bool is_valid_lemma_id(LemmaIdType id);
-
-    LemmaIdType get_max_lemma_id();
-
-    void set_lemma_flag(uint32 offset, uint8 flag);
-
-    char get_lemma_flag(uint32 offset);
-
-    char get_lemma_nchar(uint32 offset);
-
-    uint16 *get_lemma_spell_ids(uint32 offset);
-
-    uint16 *get_lemma_word(uint32 offset);
-
-    // Prepare searchable to fasten locate process
-    void prepare_locate(UserDictSearchable *searchable, const uint16 *splids, uint16 len);
-
-    // Compare initial letters only
-    int32 fuzzy_compare_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
-
-    // Compare exactly two spell ids
-    // First argument must be a full id spell id
-    bool equal_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
-
-    // Find first item by initial letters
-    int32 locate_first_in_offsets(const UserDictSearchable *searchable);
-
-    LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
-
-    // Check if a lemma is in dictionary
-    int32 locate_in_offsets(char16 lemma_str[], uint16 splid_str[], uint16 lemma_len);
-
-    bool remove_lemma_by_offset_index(int offset_index);
-#ifdef ___PREDICT_ENABLED___
-    uint32 locate_where_to_insert_in_predicts(const uint16 *words, int lemma_len);
-
-    int32 locate_first_in_predicts(const uint16 *words, int lemma_len);
-
-    void remove_lemma_from_predict_list(uint32 offset);
-#endif
-#ifdef ___SYNC_ENABLED___
-    void queue_lemma_for_sync(LemmaIdType id);
-
-    void remove_lemma_from_sync_list(uint32 offset);
-
-    void write_back_sync(int fd);
-#endif
-    void write_back_score(int fd);
-    void write_back_offset(int fd);
-    void write_back_lemma(int fd);
-    void write_back_all(int fd);
-    void write_back();
-
-    struct UserDictScoreOffsetPair {
-        int score;
-        uint32 offset_index;
-    };
-
-    inline void swap(UserDictScoreOffsetPair *sop, int i, int j);
-
-    void shift_down(UserDictScoreOffsetPair *sop, int i, int n);
-
-    // On-disk format for each lemma
-    // +-------------+
-    // | Version (4) |
-    // +-------------+
-    // +-----------+-----------+--------------------+-------------------+
-    // | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
-    // +-----------+-----------+--------------------+-------------------+
-    // ...
-    // +-----------------------+     +-------------+      <---Offset of offset
-    // | Offset1 by_splids (4) | ... | OffsetN (4) |
-    // +-----------------------+     +-------------+
-#ifdef ___PREDICT_ENABLED___
-    // +----------------------+     +-------------+
-    // | Offset1 by_lemma (4) | ... | OffsetN (4) |
-    // +----------------------+     +-------------+
-#endif
-    // +------------+     +------------+
-    // | Score1 (4) | ... | ScoreN (4) |
-    // +------------+     +------------+
-#ifdef ___SYNC_ENABLED___
-    // +-------------+     +-------------+
-    // | NewAdd1 (4) | ... | NewAddN (4) |
-    // +-------------+     +-------------+
-#endif
-    // +----------------+
-    // | Dict Info (4x) |
-    // +----------------+
-};
-}  // namespace ime_pinyin
-
-#endif
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_USERDICT_H__
+#define PINYINIME_INCLUDE_USERDICT_H__
+
+#define ___CACHE_ENABLED___
+#define ___SYNC_ENABLED___
+#define ___PREDICT_ENABLED___
+
+// Debug performance for operations
+// #define ___DEBUG_PERF___
+
+#ifdef _WIN32
+#include <time.h>
+#include <winsock.h> // timeval
+#else
+#include <pthread.h>
+#include <sys/time.h>
+#endif
+#include "atomdictbase.h"
+
+namespace ime_pinyin {
+
+class UserDict : public AtomDictBase {
+   public:
+    UserDict();
+    ~UserDict();
+
+    bool load_dict(const char *file_name, LemmaIdType start_id, LemmaIdType end_id);
+
+    bool close_dict();
+
+    size_t number_of_lemmas();
+
+    void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
+
+    MileStoneHandle extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num);
+
+    size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max);
+
+    uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
+
+    uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid);
+
+    size_t predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used);
+
+    // Full spelling ids are required
+    LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count);
+
+    LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count, bool selected);
+
+    LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
+
+    LmaScoreType get_lemma_score(LemmaIdType lemma_id);
+
+    LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
+
+    bool remove_lemma(LemmaIdType lemma_id);
+
+    size_t get_total_lemma_count();
+    void set_total_lemma_count_of_others(size_t count);
+
+    void flush_cache();
+
+    void set_limit(uint32 max_lemma_count, uint32 max_lemma_size, uint32 reclaim_ratio);
+
+    void reclaim();
+
+    void defragment();
+
+#ifdef ___SYNC_ENABLED___
+    void clear_sync_lemmas(unsigned int start, unsigned int end);
+
+    int get_sync_count();
+
+    LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
+    /**
+     * Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
+     *
+     * @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
+     * @param len length of lemmas string in UTF-16LE
+     * @return newly added lemma count
+     */
+    int put_lemmas_no_sync_from_utf16le_string(char16 *lemmas, int len);
+
+    /**
+     * Get lemmas need sync to a UTF-16LE string of above format.
+     * Note: input buffer (str) must not be too small. If str is too small to
+     *       contain single one lemma, there might be a dead loop.
+     *
+     * @param str buffer to write lemmas
+     * @param size buffer size in UTF-16LE
+     * @param count output value of lemma returned
+     * @return UTF-16LE string length
+     */
+    int get_sync_lemmas_in_utf16le_string_from_beginning(char16 *str, int size, int *count);
+
+#endif
+
+    struct UserDictStat {
+        uint32 version;
+        const char *file_name;
+        struct timeval load_time;
+        struct timeval last_update;
+        uint32 disk_size;
+        uint32 lemma_count;
+        uint32 lemma_size;
+        uint32 delete_count;
+        uint32 delete_size;
+#ifdef ___SYNC_ENABLED___
+        uint32 sync_count;
+#endif
+        uint32 reclaim_ratio;
+        uint32 limit_lemma_count;
+        uint32 limit_lemma_size;
+    };
+
+    bool state(UserDictStat *stat);
+
+   private:
+    uint32 total_other_nfreq_;
+    struct timeval load_time_;
+    LemmaIdType start_id_;
+    uint32 version_;
+    uint8 *lemmas_;
+
+    // In-Memory-Only flag for each lemma
+    static const uint8 kUserDictLemmaFlagRemove = 1;
+    // Inuse lemmas' offset
+    uint32 *offsets_;
+    // Highest bit in offset tells whether corresponding lemma is removed
+    static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
+    // Maximum possible for the offset
+    static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
+    // Bit width for last modified time, from 1 to 16
+    static const uint32 kUserDictLMTBitWidth = 16;
+    // Granularity for last modified time in second
+    static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
+    // Maximum frequency count
+    static const uint16 kUserDictMaxFrequency = 0xFFFF;
+
+#define COARSE_UTC(year, month, day, hour, minute, second) ((year - 1970) * 365 * 24 * 60 * 60 + (month - 1) * 30 * 24 * 60 * 60 + (day - 1) * 24 * 60 * 60 + (hour - 0) * 60 * 60 + (minute - 0) * 60 + (second - 0))
+    static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);
+
+    // Correspond to offsets_
+    uint32 *scores_;
+    // Following two fields are only valid in memory
+    uint32 *ids_;
+#ifdef ___PREDICT_ENABLED___
+    uint32 *predicts_;
+#endif
+#ifdef ___SYNC_ENABLED___
+    uint32 *syncs_;
+    size_t sync_count_size_;
+#endif
+    uint32 *offsets_by_id_;
+
+    size_t lemma_count_left_;
+    size_t lemma_size_left_;
+
+    const char *dict_file_;
+
+    // Be sure size is 4xN
+    struct UserDictInfo {
+        // When limitation reached, how much percentage will be reclaimed (1 ~ 100)
+        uint32 reclaim_ratio;
+        // maximum lemma count, 0 means no limitation
+        uint32 limit_lemma_count;
+        // Maximum lemma size, it's different from
+        // whole disk file size or in-mem dict size
+        // 0 means no limitation
+        uint32 limit_lemma_size;
+        // Total lemma count including deleted and inuse
+        // Also indicate offsets_ size
+        uint32 lemma_count;
+        // Total size of lemmas including used and freed
+        uint32 lemma_size;
+        // Freed lemma count
+        uint32 free_count;
+        // Freed lemma size in byte
+        uint32 free_size;
+#ifdef ___SYNC_ENABLED___
+        uint32 sync_count;
+#endif
+        int32 total_nfreq;
+    } dict_info_;
+
+    static const uint32 kUserDictVersion = 0x0ABCDEF0;
+
+    static const uint32 kUserDictPreAlloc = 32;
+    static const uint32 kUserDictAverageNchar = 8;
+
+    enum UserDictState {
+        // Keep in order
+        USER_DICT_NONE = 0,
+        USER_DICT_SYNC,
+#ifdef ___SYNC_ENABLED___
+        USER_DICT_SYNC_DIRTY,
+#endif
+        USER_DICT_SCORE_DIRTY,
+        USER_DICT_OFFSET_DIRTY,
+        USER_DICT_LEMMA_DIRTY,
+
+        USER_DICT_DEFRAGMENTED,
+    } state_;
+
+    struct UserDictSearchable {
+        uint16 splids_len;
+        uint16 splid_start[kMaxLemmaSize];
+        uint16 splid_count[kMaxLemmaSize];
+        // Compact inital letters for both FuzzyCompareSpellId and cache system
+        uint32 signature[kMaxLemmaSize / 4];
+    };
+
+#ifdef ___CACHE_ENABLED___
+    enum UserDictCacheType {
+        USER_DICT_CACHE,
+        USER_DICT_MISS_CACHE,
+    };
+
+    static const int kUserDictCacheSize = 4;
+    static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;
+
+    struct UserDictMissCache {
+        uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
+        uint16 head, tail;
+    } miss_caches_[kMaxLemmaSize];
+
+    struct UserDictCache {
+        uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
+        uint32 offsets[kUserDictCacheSize];
+        uint32 lengths[kUserDictCacheSize];
+        // Ring buffer
+        uint16 head, tail;
+    } caches_[kMaxLemmaSize];
+
+    void cache_init();
+
+    void cache_push(UserDictCacheType type, UserDictSearchable *searchable, uint32 offset, uint32 length);
+
+    bool cache_hit(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
+
+    bool load_cache(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
+
+    void save_cache(UserDictSearchable *searchable, uint32 offset, uint32 length);
+
+    void reset_cache();
+
+    bool load_miss_cache(UserDictSearchable *searchable);
+
+    void save_miss_cache(UserDictSearchable *searchable);
+
+    void reset_miss_cache();
+#endif
+
+    LmaScoreType translate_score(int f);
+
+    int extract_score_freq(int raw_score);
+
+    uint64 extract_score_lmt(int raw_score);
+
+    inline int build_score(uint64 lmt, int freq);
+
+    inline int64 utf16le_atoll(uint16 *s, int len);
+
+    inline int utf16le_lltoa(int64 v, uint16 *s, int size);
+
+    LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
+
+    size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend);
+
+    int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
+
+    int _get_lemma_score(LemmaIdType lemma_id);
+
+    int is_fuzzy_prefix_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
+
+    bool is_prefix_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
+
+    uint32 get_dict_file_size(UserDictInfo *info);
+
+    bool reset(const char *file);
+
+    bool validate(const char *file);
+
+    bool load(const char *file, LemmaIdType start_id);
+
+    bool is_valid_state();
+
+    bool is_valid_lemma_id(LemmaIdType id);
+
+    LemmaIdType get_max_lemma_id();
+
+    void set_lemma_flag(uint32 offset, uint8 flag);
+
+    char get_lemma_flag(uint32 offset);
+
+    char get_lemma_nchar(uint32 offset);
+
+    uint16 *get_lemma_spell_ids(uint32 offset);
+
+    uint16 *get_lemma_word(uint32 offset);
+
+    // Prepare searchable to fasten locate process
+    void prepare_locate(UserDictSearchable *searchable, const uint16 *splids, uint16 len);
+
+    // Compare initial letters only
+    int32 fuzzy_compare_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
+
+    // Compare exactly two spell ids
+    // First argument must be a full id spell id
+    bool equal_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
+
+    // Find first item by initial letters
+    int32 locate_first_in_offsets(const UserDictSearchable *searchable);
+
+    LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
+
+    // Check if a lemma is in dictionary
+    int32 locate_in_offsets(char16 lemma_str[], uint16 splid_str[], uint16 lemma_len);
+
+    bool remove_lemma_by_offset_index(int offset_index);
+#ifdef ___PREDICT_ENABLED___
+    uint32 locate_where_to_insert_in_predicts(const uint16 *words, int lemma_len);
+
+    int32 locate_first_in_predicts(const uint16 *words, int lemma_len);
+
+    void remove_lemma_from_predict_list(uint32 offset);
+#endif
+#ifdef ___SYNC_ENABLED___
+    void queue_lemma_for_sync(LemmaIdType id);
+
+    void remove_lemma_from_sync_list(uint32 offset);
+
+    void write_back_sync(int fd);
+#endif
+    void write_back_score(int fd);
+    void write_back_offset(int fd);
+    void write_back_lemma(int fd);
+    void write_back_all(int fd);
+    void write_back();
+
+    struct UserDictScoreOffsetPair {
+        int score;
+        uint32 offset_index;
+    };
+
+    inline void swap(UserDictScoreOffsetPair *sop, int i, int j);
+
+    void shift_down(UserDictScoreOffsetPair *sop, int i, int n);
+
+    // On-disk format for each lemma
+    // +-------------+
+    // | Version (4) |
+    // +-------------+
+    // +-----------+-----------+--------------------+-------------------+
+    // | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
+    // +-----------+-----------+--------------------+-------------------+
+    // ...
+    // +-----------------------+     +-------------+      <---Offset of offset
+    // | Offset1 by_splids (4) | ... | OffsetN (4) |
+    // +-----------------------+     +-------------+
+#ifdef ___PREDICT_ENABLED___
+    // +----------------------+     +-------------+
+    // | Offset1 by_lemma (4) | ... | OffsetN (4) |
+    // +----------------------+     +-------------+
+#endif
+    // +------------+     +------------+
+    // | Score1 (4) | ... | ScoreN (4) |
+    // +------------+     +------------+
+#ifdef ___SYNC_ENABLED___
+    // +-------------+     +-------------+
+    // | NewAdd1 (4) | ... | NewAddN (4) |
+    // +-------------+     +-------------+
+#endif
+    // +----------------+
+    // | Dict Info (4x) |
+    // +----------------+
+};
+}  // namespace ime_pinyin
+
+#endif
--- a/src/share/userdict.cpp
+++ b/src/share/userdict.cpp
--- a/tests/main.cpp
+++ b/tests/main.cpp
@ -1,35 +1,35 @@
-#include "../src/include/pinyinime.h"
-#include <codecvt>
-#include <iostream>
-#include <locale>
-#include <string>
-
-std::string fromUtf16(const ime_pinyin::char16 *buf, size_t len) {
-    // 转换为标准 char16_t
-    std::u16string utf16Str(reinterpret_cast<const char16_t *>(buf), len);
-    std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
-    return convert.to_bytes(utf16Str);
-}
-
-int main() {
-    if (!ime_pinyin::im_open_decoder("./data/dict_pinyin.dat", "./data/user_dict.dat")) {
-        std::cout << "fany bug.\n";
-        return 0;
-    }
-
-    std::string pinyin = "ni'ma'si'le";
-    pinyin = "ni'ma'mei'si";
-    pinyin = "ni'shuo'ni'ma'ne";
-    size_t cand_cnt = ime_pinyin::im_search(pinyin.c_str(), pinyin.size());
-    ime_pinyin::char16 buf[256] = {0};
-    std::string msg;
-    for (size_t i = 0; i < 100; ++i) {
-        ime_pinyin::im_get_candidate(i, buf, 255);
-        size_t len = 0;
-        while (buf[len] != 0 && len < 255) ++len;
-        msg.append(fromUtf16(buf, len) + " ");
-    }
-    std::cout << "候选项数量: " << cand_cnt << std::endl;
-    std::cout << "候选项本体: " << msg << std::endl;
-    return 0;
-}
+#include "../src/include/pinyinime.h"
+#include <codecvt>
+#include <iostream>
+#include <locale>
+#include <string>
+
+std::string fromUtf16(const ime_pinyin::char16 *buf, size_t len) {
+    // 转换为标准 char16_t
+    std::u16string utf16Str(reinterpret_cast<const char16_t *>(buf), len);
+    std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
+    return convert.to_bytes(utf16Str);
+}
+
+int main() {
+    if (!ime_pinyin::im_open_decoder("./data/dict_pinyin.dat", "./data/user_dict.dat")) {
+        std::cout << "fany bug.\n";
+        return 0;
+    }
+
+    std::string pinyin = "ni'ma'si'le";
+    pinyin = "ni'ma'mei'si";
+    pinyin = "ni'shuo'ni'ma'ne";
+    size_t cand_cnt = ime_pinyin::im_search(pinyin.c_str(), pinyin.size());
+    ime_pinyin::char16 buf[256] = {0};
+    std::string msg;
+    for (size_t i = 0; i < 100; ++i) {
+        ime_pinyin::im_get_candidate(i, buf, 255);
+        size_t len = 0;
+        while (buf[len] != 0 && len < 255) ++len;
+        msg.append(fromUtf16(buf, len) + " ");
+    }
+    std::cout << "候选项数量: " << cand_cnt << std::endl;
+    std::cout << "候选项本体: " << msg << std::endl;
+    return 0;
+}