diff --git a/command/pinyinime_dictbuilder.cpp b/command/pinyinime_dictbuilder.cpp
index d5e0433..5c6786f 100644
--- a/command/pinyinime_dictbuilder.cpp
+++ b/command/pinyinime_dictbuilder.cpp
@@ -1,55 +1,59 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <unistd.h>
-#include "../src/include/dicttrie.h"
-
-using namespace ime_pinyin;
-
-/**
- * Build binary dictionary model. Make sure that ___BUILD_MODEL___ is defined
- * in dictdef.h.
- */
-int main(int argc, char* argv[]) {
-    DictTrie* dict_trie = new DictTrie();
-    bool success;
-    if (argc >= 3)
-        success = dict_trie->build_dict(argv[1], argv[2]);
-    else
-        success = dict_trie->build_dict("../data/rawdict_utf16_65105_freq.txt", "../data/valid_utf16.txt");
-
-    if (success) {
-        printf("Build dictionary successfully.\n");
-    } else {
-        printf("Build dictionary unsuccessfully.\n");
-        return -1;
-    }
-
-    success = dict_trie->save_dict("./dict/dict_pinyin.dat");
-
-    if (success) {
-        printf("Save dictionary successfully.\n");
-    } else {
-        printf("Save dictionary unsuccessfully.\n");
-        return -1;
-    }
-
-    return 0;
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#ifdef _WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include "../src/include/dicttrie.h"
+
+using namespace ime_pinyin;
+
+/**
+ * Build binary dictionary model. Make sure that ___BUILD_MODEL___ is defined
+ * in dictdef.h.
+ */
+int main(int argc, char* argv[]) {
+    DictTrie* dict_trie = new DictTrie();
+    bool success;
+    if (argc >= 3)
+        success = dict_trie->build_dict(argv[1], argv[2]);
+    else
+        success = dict_trie->build_dict("../data/rawdict_utf16_65105_freq.txt", "../data/valid_utf16.txt");
+
+    if (success) {
+        printf("Build dictionary successfully.\n");
+    } else {
+        printf("Build dictionary unsuccessfully.\n");
+        return -1;
+    }
+
+    success = dict_trie->save_dict("./dict/dict_pinyin.dat");
+
+    if (success) {
+        printf("Save dictionary successfully.\n");
+    } else {
+        printf("Save dictionary unsuccessfully.\n");
+        return -1;
+    }
+
+    return 0;
 }
\ No newline at end of file
diff --git a/command/lcompile.sh b/command/scripts/lcompile.sh
old mode 100755
new mode 100644
similarity index 95%
rename from command/lcompile.sh
rename to command/scripts/lcompile.sh
index bb146aa..b530bcd
--- a/command/lcompile.sh
+++ b/command/scripts/lcompile.sh
@@ -1,23 +1,23 @@
-#!/bin/bash
-currentDirectory=$(pwd)
-cmakeListsPath="${currentDirectory}/CMakeLists.txt"
-
-if [ ! -f "$cmakeListsPath" ]; then
-  echo "No CMakeLists.txt in current directory, please check."
-  exit 1
-fi
-
-echo "Start generating and compiling..."
-
-buildFolderPath="./build"
-
-if [ ! -d "$buildFolderPath" ]; then
-  mkdir -p "$buildFolderPath"
-  echo "build folder created."
-fi
-
-cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
-
-if [ $? -eq 0 ]; then
-  cmake --build ./build/ --config DEBUG
-fi
+#!/bin/bash
+currentDirectory=$(pwd)
+cmakeListsPath="${currentDirectory}/CMakeLists.txt"
+
+if [ ! -f "$cmakeListsPath" ]; then
+  echo "No CMakeLists.txt in current directory, please check."
+  exit 1
+fi
+
+echo "Start generating and compiling..."
+
+buildFolderPath="./build"
+
+if [ ! -d "$buildFolderPath" ]; then
+  mkdir -p "$buildFolderPath"
+  echo "build folder created."
+fi
+
+cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
+
+if [ $? -eq 0 ]; then
+  cmake --build ./build/ --config DEBUG
+fi
diff --git a/llaunch.sh b/command/scripts/llaunch.sh
old mode 100755
new mode 100644
similarity index 96%
rename from llaunch.sh
rename to command/scripts/llaunch.sh
index 4518c0b..49e549a
--- a/llaunch.sh
+++ b/command/scripts/llaunch.sh
@@ -1,45 +1,45 @@
-#!/bin/bash
-currentDirectory=$(pwd)
-cmakeListsPath="${currentDirectory}/CMakeLists.txt"
-
-if [ ! -f "$cmakeListsPath" ]; then
-  echo "No CMakeLists.txt in current directory, please check."
-  exit 1
-fi
-
-echo "Start generating and compiling..."
-
-buildFolderPath="./build"
-
-if [ ! -d "$buildFolderPath" ]; then
-  mkdir -p "$buildFolderPath"
-  echo "build folder created."
-fi
-
-cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
-
-if [ $? -eq 0 ]; then
-  cmake --build ./build/ --config DEBUG
-  if [ $? -eq 0 ]; then
-    content=$(<"./CMakeLists.txt")
-    exePath=""
-    while IFS= read -r line; do
-      if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
-        pattern="\"([^\"]+)\""
-        if [[ $line =~ $pattern ]]; then
-          contentInParentheses="${BASH_REMATCH[1]}"
-          result=($contentInParentheses)
-          exePath="./build/bin/${result[0]}"
-          echo "start running as follows..."
-          echo "=================================================="
-        fi
-      fi
-    done <<<"$content"
-    # execute the binary file
-    if [ -n "$exePath" ]; then
-      $exePath
-    else
-      echo "cannot find executable file path"
-    fi
-  fi
-fi
+#!/bin/bash
+currentDirectory=$(pwd)
+cmakeListsPath="${currentDirectory}/CMakeLists.txt"
+
+if [ ! -f "$cmakeListsPath" ]; then
+  echo "No CMakeLists.txt in current directory, please check."
+  exit 1
+fi
+
+echo "Start generating and compiling..."
+
+buildFolderPath="./build"
+
+if [ ! -d "$buildFolderPath" ]; then
+  mkdir -p "$buildFolderPath"
+  echo "build folder created."
+fi
+
+cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
+
+if [ $? -eq 0 ]; then
+  cmake --build ./build/ --config DEBUG
+  if [ $? -eq 0 ]; then
+    content=$(<"./CMakeLists.txt")
+    exePath=""
+    while IFS= read -r line; do
+      if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
+        pattern="\"([^\"]+)\""
+        if [[ $line =~ $pattern ]]; then
+          contentInParentheses="${BASH_REMATCH[1]}"
+          result=($contentInParentheses)
+          exePath="./build/bin/${result[0]}"
+          echo "start running as follows..."
+          echo "=================================================="
+        fi
+      fi
+    done <<<"$content"
+    # execute the binary file
+    if [ -n "$exePath" ]; then
+      $exePath
+    else
+      echo "cannot find executable file path"
+    fi
+  fi
+fi
diff --git a/lrun.sh b/command/scripts/lrun.sh
old mode 100755
new mode 100644
similarity index 96%
rename from lrun.sh
rename to command/scripts/lrun.sh
index 0c6264e..8dec0c3
--- a/lrun.sh
+++ b/command/scripts/lrun.sh
@@ -1,18 +1,18 @@
-content=$(<"./CMakeLists.txt")
-exePath=""
-while IFS= read -r line; do
-  if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
-    pattern="\"([^\"]+)\""
-    if [[ $line =~ $pattern ]]; then
-      contentInParentheses="${BASH_REMATCH[1]}"
-      result=($contentInParentheses)
-      exePath="./build/bin/${result[0]}"
-    fi
-  fi
-done <<<"$content"
-
-if [ -n "$exePath" ]; then
-  $exePath
-else
-  echo "cannot find executable file path"
-fi
+content=$(<"./CMakeLists.txt")
+exePath=""
+while IFS= read -r line; do
+  if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
+    pattern="\"([^\"]+)\""
+    if [[ $line =~ $pattern ]]; then
+      contentInParentheses="${BASH_REMATCH[1]}"
+      result=($contentInParentheses)
+      exePath="./build/bin/${result[0]}"
+    fi
+  fi
+done <<<"$content"
+
+if [ -n "$exePath" ]; then
+  $exePath
+else
+  echo "cannot find executable file path"
+fi
diff --git a/scripts/lcompile.ps1 b/scripts/lcompile.ps1
new file mode 100644
index 0000000..81d154f
--- /dev/null
+++ b/scripts/lcompile.ps1
@@ -0,0 +1,26 @@
+# generate compile to exe files
+$currentDirectory = Get-Location
+$cmakeListsPath = Join-Path -Path $currentDirectory -ChildPath "CMakeLists.txt"
+
+if (-not (Test-Path $cmakeListsPath))
+{
+  Write-Host("No CMakeLists.txt in current directory, please check.")
+  return
+}
+
+Write-Host "Start generating and compiling..."
+
+$buildFolderPath = ".\build"
+
+if (-not (Test-Path $buildFolderPath))
+{
+  New-Item -ItemType Directory -Path $buildFolderPath | Out-Null
+  Write-Host "build folder created."
+}
+
+cmake -G "Visual Studio 17 2022" -A x64 -S . -B ./build/
+
+if ($LASTEXITCODE -eq 0)
+{
+  cmake --build ./build/ --config DEBUG
+}
diff --git a/lcompile.sh b/scripts/lcompile.sh
old mode 100755
new mode 100644
similarity index 95%
rename from lcompile.sh
rename to scripts/lcompile.sh
index bb146aa..b530bcd
--- a/lcompile.sh
+++ b/scripts/lcompile.sh
@@ -1,23 +1,23 @@
-#!/bin/bash
-currentDirectory=$(pwd)
-cmakeListsPath="${currentDirectory}/CMakeLists.txt"
-
-if [ ! -f "$cmakeListsPath" ]; then
-  echo "No CMakeLists.txt in current directory, please check."
-  exit 1
-fi
-
-echo "Start generating and compiling..."
-
-buildFolderPath="./build"
-
-if [ ! -d "$buildFolderPath" ]; then
-  mkdir -p "$buildFolderPath"
-  echo "build folder created."
-fi
-
-cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
-
-if [ $? -eq 0 ]; then
-  cmake --build ./build/ --config DEBUG
-fi
+#!/bin/bash
+currentDirectory=$(pwd)
+cmakeListsPath="${currentDirectory}/CMakeLists.txt"
+
+if [ ! -f "$cmakeListsPath" ]; then
+  echo "No CMakeLists.txt in current directory, please check."
+  exit 1
+fi
+
+echo "Start generating and compiling..."
+
+buildFolderPath="./build"
+
+if [ ! -d "$buildFolderPath" ]; then
+  mkdir -p "$buildFolderPath"
+  echo "build folder created."
+fi
+
+cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
+
+if [ $? -eq 0 ]; then
+  cmake --build ./build/ --config DEBUG
+fi
diff --git a/scripts/llaunch.ps1 b/scripts/llaunch.ps1
new file mode 100644
index 0000000..4c23a29
--- /dev/null
+++ b/scripts/llaunch.ps1
@@ -0,0 +1,45 @@
+#
+# generate, compile and run exe files
+#
+function getExePathFromCMakeLists() {
+    $content = Get-Content -Raw -Path "./CMakeLists.txt"
+    $exePath = ""
+    foreach ($line in $content -split "`n") {
+        if ($line -match 'set\(MY_EXECUTABLE_NAME[^\"]*\"([^\"]+)\"') {
+            $exeName = $matches[1]
+            $exePath = "./build/bin/Debug/$exeName" + ".exe"
+            break
+        }
+    }
+    return $exePath
+}
+
+$currentDirectory = Get-Location
+$cmakeListsPath = Join-Path -Path $currentDirectory -ChildPath "CMakeLists.txt"
+
+if (-not (Test-Path $cmakeListsPath)) {
+    Write-Host("No CMakeLists.txt in current directory, please check.")
+    return
+}
+
+Write-Host "Start generating and compiling..."
+
+$buildFolderPath = ".\build"
+
+if (-not (Test-Path $buildFolderPath)) {
+    New-Item -ItemType Directory -Path $buildFolderPath | Out-Null
+    Write-Host "build folder created."
+}
+
+cmake -G "Visual Studio 17 2022" -A x64 -S . -B ./build/
+
+if ($LASTEXITCODE -eq 0) {
+    cmake --build ./build/ --config DEBUG
+    if ($LASTEXITCODE -eq 0) {
+        $exePath = getExePathFromCMakeLists
+        Write-Host "start running as follows..."
+        Write-Host "=================================================="
+        Invoke-Expression $exePath
+    }
+}
+
diff --git a/command/llaunch.sh b/scripts/llaunch.sh
old mode 100755
new mode 100644
similarity index 96%
rename from command/llaunch.sh
rename to scripts/llaunch.sh
index 4518c0b..49e549a
--- a/command/llaunch.sh
+++ b/scripts/llaunch.sh
@@ -1,45 +1,45 @@
-#!/bin/bash
-currentDirectory=$(pwd)
-cmakeListsPath="${currentDirectory}/CMakeLists.txt"
-
-if [ ! -f "$cmakeListsPath" ]; then
-  echo "No CMakeLists.txt in current directory, please check."
-  exit 1
-fi
-
-echo "Start generating and compiling..."
-
-buildFolderPath="./build"
-
-if [ ! -d "$buildFolderPath" ]; then
-  mkdir -p "$buildFolderPath"
-  echo "build folder created."
-fi
-
-cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
-
-if [ $? -eq 0 ]; then
-  cmake --build ./build/ --config DEBUG
-  if [ $? -eq 0 ]; then
-    content=$(<"./CMakeLists.txt")
-    exePath=""
-    while IFS= read -r line; do
-      if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
-        pattern="\"([^\"]+)\""
-        if [[ $line =~ $pattern ]]; then
-          contentInParentheses="${BASH_REMATCH[1]}"
-          result=($contentInParentheses)
-          exePath="./build/bin/${result[0]}"
-          echo "start running as follows..."
-          echo "=================================================="
-        fi
-      fi
-    done <<<"$content"
-    # execute the binary file
-    if [ -n "$exePath" ]; then
-      $exePath
-    else
-      echo "cannot find executable file path"
-    fi
-  fi
-fi
+#!/bin/bash
+currentDirectory=$(pwd)
+cmakeListsPath="${currentDirectory}/CMakeLists.txt"
+
+if [ ! -f "$cmakeListsPath" ]; then
+  echo "No CMakeLists.txt in current directory, please check."
+  exit 1
+fi
+
+echo "Start generating and compiling..."
+
+buildFolderPath="./build"
+
+if [ ! -d "$buildFolderPath" ]; then
+  mkdir -p "$buildFolderPath"
+  echo "build folder created."
+fi
+
+cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/
+
+if [ $? -eq 0 ]; then
+  cmake --build ./build/ --config DEBUG
+  if [ $? -eq 0 ]; then
+    content=$(<"./CMakeLists.txt")
+    exePath=""
+    while IFS= read -r line; do
+      if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
+        pattern="\"([^\"]+)\""
+        if [[ $line =~ $pattern ]]; then
+          contentInParentheses="${BASH_REMATCH[1]}"
+          result=($contentInParentheses)
+          exePath="./build/bin/${result[0]}"
+          echo "start running as follows..."
+          echo "=================================================="
+        fi
+      fi
+    done <<<"$content"
+    # execute the binary file
+    if [ -n "$exePath" ]; then
+      $exePath
+    else
+      echo "cannot find executable file path"
+    fi
+  fi
+fi
diff --git a/scripts/lrun.ps1 b/scripts/lrun.ps1
new file mode 100644
index 0000000..1faa0ff
--- /dev/null
+++ b/scripts/lrun.ps1
@@ -0,0 +1,20 @@
+#
+# run exe file that has already been compiled before
+#
+function getExePathFromCMakeLists() {
+    $content = Get-Content -Raw -Path "./CMakeLists.txt"
+    $exePath = ""
+    foreach ($line in $content -split "`n") {
+        if ($line -match 'set\(MY_EXECUTABLE_NAME[^\"]*\"([^\"]+)\"') {
+            $exeName = $matches[1]
+            $exePath = "./build/bin/Debug/$exeName" + ".exe"
+            break
+        }
+    }
+    return $exePath
+}
+
+$exePath = getExePathFromCMakeLists
+#Write-Host "start running as follows..."
+#Write-Host "=================================================="
+Invoke-Expression $exePath
diff --git a/command/lrun.sh b/scripts/lrun.sh
old mode 100755
new mode 100644
similarity index 96%
rename from command/lrun.sh
rename to scripts/lrun.sh
index 0c6264e..8dec0c3
--- a/command/lrun.sh
+++ b/scripts/lrun.sh
@@ -1,18 +1,18 @@
-content=$(<"./CMakeLists.txt")
-exePath=""
-while IFS= read -r line; do
-  if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
-    pattern="\"([^\"]+)\""
-    if [[ $line =~ $pattern ]]; then
-      contentInParentheses="${BASH_REMATCH[1]}"
-      result=($contentInParentheses)
-      exePath="./build/bin/${result[0]}"
-    fi
-  fi
-done <<<"$content"
-
-if [ -n "$exePath" ]; then
-  $exePath
-else
-  echo "cannot find executable file path"
-fi
+content=$(<"./CMakeLists.txt")
+exePath=""
+while IFS= read -r line; do
+  if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then
+    pattern="\"([^\"]+)\""
+    if [[ $line =~ $pattern ]]; then
+      contentInParentheses="${BASH_REMATCH[1]}"
+      result=($contentInParentheses)
+      exePath="./build/bin/${result[0]}"
+    fi
+  fi
+done <<<"$content"
+
+if [ -n "$exePath" ]; then
+  $exePath
+else
+  echo "cannot find executable file path"
+fi
diff --git a/src/include/userdict.h b/src/include/userdict.h
index 32914fe..766e5e8 100644
--- a/src/include/userdict.h
+++ b/src/include/userdict.h
@@ -1,390 +1,396 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_USERDICT_H__
-#define PINYINIME_INCLUDE_USERDICT_H__
-
-#define ___CACHE_ENABLED___
-#define ___SYNC_ENABLED___
-#define ___PREDICT_ENABLED___
-
-// Debug performance for operations
-// #define ___DEBUG_PERF___
-
-#include <pthread.h>
-#include "atomdictbase.h"
-
-namespace ime_pinyin {
-
-class UserDict : public AtomDictBase {
-   public:
-    UserDict();
-    ~UserDict();
-
-    bool load_dict(const char *file_name, LemmaIdType start_id, LemmaIdType end_id);
-
-    bool close_dict();
-
-    size_t number_of_lemmas();
-
-    void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
-
-    MileStoneHandle extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num);
-
-    size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max);
-
-    uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
-
-    uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid);
-
-    size_t predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used);
-
-    // Full spelling ids are required
-    LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count);
-
-    LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count, bool selected);
-
-    LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
-
-    LmaScoreType get_lemma_score(LemmaIdType lemma_id);
-
-    LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
-
-    bool remove_lemma(LemmaIdType lemma_id);
-
-    size_t get_total_lemma_count();
-    void set_total_lemma_count_of_others(size_t count);
-
-    void flush_cache();
-
-    void set_limit(uint32 max_lemma_count, uint32 max_lemma_size, uint32 reclaim_ratio);
-
-    void reclaim();
-
-    void defragment();
-
-#ifdef ___SYNC_ENABLED___
-    void clear_sync_lemmas(unsigned int start, unsigned int end);
-
-    int get_sync_count();
-
-    LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
-    /**
-     * Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
-     *
-     * @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
-     * @param len length of lemmas string in UTF-16LE
-     * @return newly added lemma count
-     */
-    int put_lemmas_no_sync_from_utf16le_string(char16 *lemmas, int len);
-
-    /**
-     * Get lemmas need sync to a UTF-16LE string of above format.
-     * Note: input buffer (str) must not be too small. If str is too small to
-     *       contain single one lemma, there might be a dead loop.
-     *
-     * @param str buffer to write lemmas
-     * @param size buffer size in UTF-16LE
-     * @param count output value of lemma returned
-     * @return UTF-16LE string length
-     */
-    int get_sync_lemmas_in_utf16le_string_from_beginning(char16 *str, int size, int *count);
-
-#endif
-
-    struct UserDictStat {
-        uint32 version;
-        const char *file_name;
-        struct timeval load_time;
-        struct timeval last_update;
-        uint32 disk_size;
-        uint32 lemma_count;
-        uint32 lemma_size;
-        uint32 delete_count;
-        uint32 delete_size;
-#ifdef ___SYNC_ENABLED___
-        uint32 sync_count;
-#endif
-        uint32 reclaim_ratio;
-        uint32 limit_lemma_count;
-        uint32 limit_lemma_size;
-    };
-
-    bool state(UserDictStat *stat);
-
-   private:
-    uint32 total_other_nfreq_;
-    struct timeval load_time_;
-    LemmaIdType start_id_;
-    uint32 version_;
-    uint8 *lemmas_;
-
-    // In-Memory-Only flag for each lemma
-    static const uint8 kUserDictLemmaFlagRemove = 1;
-    // Inuse lemmas' offset
-    uint32 *offsets_;
-    // Highest bit in offset tells whether corresponding lemma is removed
-    static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
-    // Maximum possible for the offset
-    static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
-    // Bit width for last modified time, from 1 to 16
-    static const uint32 kUserDictLMTBitWidth = 16;
-    // Granularity for last modified time in second
-    static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
-    // Maximum frequency count
-    static const uint16 kUserDictMaxFrequency = 0xFFFF;
-
-#define COARSE_UTC(year, month, day, hour, minute, second) ((year - 1970) * 365 * 24 * 60 * 60 + (month - 1) * 30 * 24 * 60 * 60 + (day - 1) * 24 * 60 * 60 + (hour - 0) * 60 * 60 + (minute - 0) * 60 + (second - 0))
-    static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);
-
-    // Correspond to offsets_
-    uint32 *scores_;
-    // Following two fields are only valid in memory
-    uint32 *ids_;
-#ifdef ___PREDICT_ENABLED___
-    uint32 *predicts_;
-#endif
-#ifdef ___SYNC_ENABLED___
-    uint32 *syncs_;
-    size_t sync_count_size_;
-#endif
-    uint32 *offsets_by_id_;
-
-    size_t lemma_count_left_;
-    size_t lemma_size_left_;
-
-    const char *dict_file_;
-
-    // Be sure size is 4xN
-    struct UserDictInfo {
-        // When limitation reached, how much percentage will be reclaimed (1 ~ 100)
-        uint32 reclaim_ratio;
-        // maximum lemma count, 0 means no limitation
-        uint32 limit_lemma_count;
-        // Maximum lemma size, it's different from
-        // whole disk file size or in-mem dict size
-        // 0 means no limitation
-        uint32 limit_lemma_size;
-        // Total lemma count including deleted and inuse
-        // Also indicate offsets_ size
-        uint32 lemma_count;
-        // Total size of lemmas including used and freed
-        uint32 lemma_size;
-        // Freed lemma count
-        uint32 free_count;
-        // Freed lemma size in byte
-        uint32 free_size;
-#ifdef ___SYNC_ENABLED___
-        uint32 sync_count;
-#endif
-        int32 total_nfreq;
-    } dict_info_;
-
-    static const uint32 kUserDictVersion = 0x0ABCDEF0;
-
-    static const uint32 kUserDictPreAlloc = 32;
-    static const uint32 kUserDictAverageNchar = 8;
-
-    enum UserDictState {
-        // Keep in order
-        USER_DICT_NONE = 0,
-        USER_DICT_SYNC,
-#ifdef ___SYNC_ENABLED___
-        USER_DICT_SYNC_DIRTY,
-#endif
-        USER_DICT_SCORE_DIRTY,
-        USER_DICT_OFFSET_DIRTY,
-        USER_DICT_LEMMA_DIRTY,
-
-        USER_DICT_DEFRAGMENTED,
-    } state_;
-
-    struct UserDictSearchable {
-        uint16 splids_len;
-        uint16 splid_start[kMaxLemmaSize];
-        uint16 splid_count[kMaxLemmaSize];
-        // Compact inital letters for both FuzzyCompareSpellId and cache system
-        uint32 signature[kMaxLemmaSize / 4];
-    };
-
-#ifdef ___CACHE_ENABLED___
-    enum UserDictCacheType {
-        USER_DICT_CACHE,
-        USER_DICT_MISS_CACHE,
-    };
-
-    static const int kUserDictCacheSize = 4;
-    static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;
-
-    struct UserDictMissCache {
-        uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
-        uint16 head, tail;
-    } miss_caches_[kMaxLemmaSize];
-
-    struct UserDictCache {
-        uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
-        uint32 offsets[kUserDictCacheSize];
-        uint32 lengths[kUserDictCacheSize];
-        // Ring buffer
-        uint16 head, tail;
-    } caches_[kMaxLemmaSize];
-
-    void cache_init();
-
-    void cache_push(UserDictCacheType type, UserDictSearchable *searchable, uint32 offset, uint32 length);
-
-    bool cache_hit(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
-
-    bool load_cache(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
-
-    void save_cache(UserDictSearchable *searchable, uint32 offset, uint32 length);
-
-    void reset_cache();
-
-    bool load_miss_cache(UserDictSearchable *searchable);
-
-    void save_miss_cache(UserDictSearchable *searchable);
-
-    void reset_miss_cache();
-#endif
-
-    LmaScoreType translate_score(int f);
-
-    int extract_score_freq(int raw_score);
-
-    uint64 extract_score_lmt(int raw_score);
-
-    inline int build_score(uint64 lmt, int freq);
-
-    inline int64 utf16le_atoll(uint16 *s, int len);
-
-    inline int utf16le_lltoa(int64 v, uint16 *s, int size);
-
-    LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
-
-    size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend);
-
-    int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
-
-    int _get_lemma_score(LemmaIdType lemma_id);
-
-    int is_fuzzy_prefix_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
-
-    bool is_prefix_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
-
-    uint32 get_dict_file_size(UserDictInfo *info);
-
-    bool reset(const char *file);
-
-    bool validate(const char *file);
-
-    bool load(const char *file, LemmaIdType start_id);
-
-    bool is_valid_state();
-
-    bool is_valid_lemma_id(LemmaIdType id);
-
-    LemmaIdType get_max_lemma_id();
-
-    void set_lemma_flag(uint32 offset, uint8 flag);
-
-    char get_lemma_flag(uint32 offset);
-
-    char get_lemma_nchar(uint32 offset);
-
-    uint16 *get_lemma_spell_ids(uint32 offset);
-
-    uint16 *get_lemma_word(uint32 offset);
-
-    // Prepare searchable to fasten locate process
-    void prepare_locate(UserDictSearchable *searchable, const uint16 *splids, uint16 len);
-
-    // Compare initial letters only
-    int32 fuzzy_compare_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
-
-    // Compare exactly two spell ids
-    // First argument must be a full id spell id
-    bool equal_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
-
-    // Find first item by initial letters
-    int32 locate_first_in_offsets(const UserDictSearchable *searchable);
-
-    LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
-
-    // Check if a lemma is in dictionary
-    int32 locate_in_offsets(char16 lemma_str[], uint16 splid_str[], uint16 lemma_len);
-
-    bool remove_lemma_by_offset_index(int offset_index);
-#ifdef ___PREDICT_ENABLED___
-    uint32 locate_where_to_insert_in_predicts(const uint16 *words, int lemma_len);
-
-    int32 locate_first_in_predicts(const uint16 *words, int lemma_len);
-
-    void remove_lemma_from_predict_list(uint32 offset);
-#endif
-#ifdef ___SYNC_ENABLED___
-    void queue_lemma_for_sync(LemmaIdType id);
-
-    void remove_lemma_from_sync_list(uint32 offset);
-
-    void write_back_sync(int fd);
-#endif
-    void write_back_score(int fd);
-    void write_back_offset(int fd);
-    void write_back_lemma(int fd);
-    void write_back_all(int fd);
-    void write_back();
-
-    struct UserDictScoreOffsetPair {
-        int score;
-        uint32 offset_index;
-    };
-
-    inline void swap(UserDictScoreOffsetPair *sop, int i, int j);
-
-    void shift_down(UserDictScoreOffsetPair *sop, int i, int n);
-
-    // On-disk format for each lemma
-    // +-------------+
-    // | Version (4) |
-    // +-------------+
-    // +-----------+-----------+--------------------+-------------------+
-    // | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
-    // +-----------+-----------+--------------------+-------------------+
-    // ...
-    // +-----------------------+     +-------------+      <---Offset of offset
-    // | Offset1 by_splids (4) | ... | OffsetN (4) |
-    // +-----------------------+     +-------------+
-#ifdef ___PREDICT_ENABLED___
-    // +----------------------+     +-------------+
-    // | Offset1 by_lemma (4) | ... | OffsetN (4) |
-    // +----------------------+     +-------------+
-#endif
-    // +------------+     +------------+
-    // | Score1 (4) | ... | ScoreN (4) |
-    // +------------+     +------------+
-#ifdef ___SYNC_ENABLED___
-    // +-------------+     +-------------+
-    // | NewAdd1 (4) | ... | NewAddN (4) |
-    // +-------------+     +-------------+
-#endif
-    // +----------------+
-    // | Dict Info (4x) |
-    // +----------------+
-};
-}  // namespace ime_pinyin
-
-#endif
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_USERDICT_H__
+#define PINYINIME_INCLUDE_USERDICT_H__
+
+#define ___CACHE_ENABLED___
+#define ___SYNC_ENABLED___
+#define ___PREDICT_ENABLED___
+
+// Debug performance for operations
+// #define ___DEBUG_PERF___
+
+#ifdef _WIN32
+#include <time.h>
+#include <winsock.h> // timeval
+#else
+#include <pthread.h>
+#include <sys/time.h>
+#endif
+#include "atomdictbase.h"
+
+namespace ime_pinyin {
+
+class UserDict : public AtomDictBase {
+   public:
+    UserDict();
+    ~UserDict();
+
+    bool load_dict(const char *file_name, LemmaIdType start_id, LemmaIdType end_id);
+
+    bool close_dict();
+
+    size_t number_of_lemmas();
+
+    void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
+
+    MileStoneHandle extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num);
+
+    size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max);
+
+    uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
+
+    uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid);
+
+    size_t predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used);
+
+    // Full spelling ids are required
+    LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count);
+
+    LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count, bool selected);
+
+    LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
+
+    LmaScoreType get_lemma_score(LemmaIdType lemma_id);
+
+    LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
+
+    bool remove_lemma(LemmaIdType lemma_id);
+
+    size_t get_total_lemma_count();
+    void set_total_lemma_count_of_others(size_t count);
+
+    void flush_cache();
+
+    void set_limit(uint32 max_lemma_count, uint32 max_lemma_size, uint32 reclaim_ratio);
+
+    void reclaim();
+
+    void defragment();
+
+#ifdef ___SYNC_ENABLED___
+    void clear_sync_lemmas(unsigned int start, unsigned int end);
+
+    int get_sync_count();
+
+    LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
+    /**
+     * Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
+     *
+     * @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
+     * @param len length of lemmas string in UTF-16LE
+     * @return newly added lemma count
+     */
+    int put_lemmas_no_sync_from_utf16le_string(char16 *lemmas, int len);
+
+    /**
+     * Get lemmas need sync to a UTF-16LE string of above format.
+     * Note: input buffer (str) must not be too small. If str is too small to
+     *       contain single one lemma, there might be a dead loop.
+     *
+     * @param str buffer to write lemmas
+     * @param size buffer size in UTF-16LE
+     * @param count output value of lemma returned
+     * @return UTF-16LE string length
+     */
+    int get_sync_lemmas_in_utf16le_string_from_beginning(char16 *str, int size, int *count);
+
+#endif
+
+    struct UserDictStat {
+        uint32 version;
+        const char *file_name;
+        struct timeval load_time;
+        struct timeval last_update;
+        uint32 disk_size;
+        uint32 lemma_count;
+        uint32 lemma_size;
+        uint32 delete_count;
+        uint32 delete_size;
+#ifdef ___SYNC_ENABLED___
+        uint32 sync_count;
+#endif
+        uint32 reclaim_ratio;
+        uint32 limit_lemma_count;
+        uint32 limit_lemma_size;
+    };
+
+    bool state(UserDictStat *stat);
+
+   private:
+    uint32 total_other_nfreq_;
+    struct timeval load_time_;
+    LemmaIdType start_id_;
+    uint32 version_;
+    uint8 *lemmas_;
+
+    // In-Memory-Only flag for each lemma
+    static const uint8 kUserDictLemmaFlagRemove = 1;
+    // Inuse lemmas' offset
+    uint32 *offsets_;
+    // Highest bit in offset tells whether corresponding lemma is removed
+    static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
+    // Maximum possible for the offset
+    static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
+    // Bit width for last modified time, from 1 to 16
+    static const uint32 kUserDictLMTBitWidth = 16;
+    // Granularity for last modified time in second
+    static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
+    // Maximum frequency count
+    static const uint16 kUserDictMaxFrequency = 0xFFFF;
+
+#define COARSE_UTC(year, month, day, hour, minute, second) ((year - 1970) * 365 * 24 * 60 * 60 + (month - 1) * 30 * 24 * 60 * 60 + (day - 1) * 24 * 60 * 60 + (hour - 0) * 60 * 60 + (minute - 0) * 60 + (second - 0))
+    static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);
+
+    // Correspond to offsets_
+    uint32 *scores_;
+    // Following two fields are only valid in memory
+    uint32 *ids_;
+#ifdef ___PREDICT_ENABLED___
+    uint32 *predicts_;
+#endif
+#ifdef ___SYNC_ENABLED___
+    uint32 *syncs_;
+    size_t sync_count_size_;
+#endif
+    uint32 *offsets_by_id_;
+
+    size_t lemma_count_left_;
+    size_t lemma_size_left_;
+
+    const char *dict_file_;
+
+    // Be sure size is 4xN
+    struct UserDictInfo {
+        // When limitation reached, how much percentage will be reclaimed (1 ~ 100)
+        uint32 reclaim_ratio;
+        // maximum lemma count, 0 means no limitation
+        uint32 limit_lemma_count;
+        // Maximum lemma size, it's different from
+        // whole disk file size or in-mem dict size
+        // 0 means no limitation
+        uint32 limit_lemma_size;
+        // Total lemma count including deleted and inuse
+        // Also indicate offsets_ size
+        uint32 lemma_count;
+        // Total size of lemmas including used and freed
+        uint32 lemma_size;
+        // Freed lemma count
+        uint32 free_count;
+        // Freed lemma size in byte
+        uint32 free_size;
+#ifdef ___SYNC_ENABLED___
+        uint32 sync_count;
+#endif
+        int32 total_nfreq;
+    } dict_info_;
+
+    static const uint32 kUserDictVersion = 0x0ABCDEF0;
+
+    static const uint32 kUserDictPreAlloc = 32;
+    static const uint32 kUserDictAverageNchar = 8;
+
+    enum UserDictState {
+        // Keep in order
+        USER_DICT_NONE = 0,
+        USER_DICT_SYNC,
+#ifdef ___SYNC_ENABLED___
+        USER_DICT_SYNC_DIRTY,
+#endif
+        USER_DICT_SCORE_DIRTY,
+        USER_DICT_OFFSET_DIRTY,
+        USER_DICT_LEMMA_DIRTY,
+
+        USER_DICT_DEFRAGMENTED,
+    } state_;
+
+    struct UserDictSearchable {
+        uint16 splids_len;
+        uint16 splid_start[kMaxLemmaSize];
+        uint16 splid_count[kMaxLemmaSize];
+        // Compact inital letters for both FuzzyCompareSpellId and cache system
+        uint32 signature[kMaxLemmaSize / 4];
+    };
+
+#ifdef ___CACHE_ENABLED___
+    enum UserDictCacheType {
+        USER_DICT_CACHE,
+        USER_DICT_MISS_CACHE,
+    };
+
+    static const int kUserDictCacheSize = 4;
+    static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;
+
+    struct UserDictMissCache {
+        uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
+        uint16 head, tail;
+    } miss_caches_[kMaxLemmaSize];
+
+    struct UserDictCache {
+        uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
+        uint32 offsets[kUserDictCacheSize];
+        uint32 lengths[kUserDictCacheSize];
+        // Ring buffer
+        uint16 head, tail;
+    } caches_[kMaxLemmaSize];
+
+    void cache_init();
+
+    void cache_push(UserDictCacheType type, UserDictSearchable *searchable, uint32 offset, uint32 length);
+
+    bool cache_hit(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
+
+    bool load_cache(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
+
+    void save_cache(UserDictSearchable *searchable, uint32 offset, uint32 length);
+
+    void reset_cache();
+
+    bool load_miss_cache(UserDictSearchable *searchable);
+
+    void save_miss_cache(UserDictSearchable *searchable);
+
+    void reset_miss_cache();
+#endif
+
+    LmaScoreType translate_score(int f);
+
+    int extract_score_freq(int raw_score);
+
+    uint64 extract_score_lmt(int raw_score);
+
+    inline int build_score(uint64 lmt, int freq);
+
+    inline int64 utf16le_atoll(uint16 *s, int len);
+
+    inline int utf16le_lltoa(int64 v, uint16 *s, int size);
+
+    LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
+
+    size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend);
+
+    int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
+
+    int _get_lemma_score(LemmaIdType lemma_id);
+
+    int is_fuzzy_prefix_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
+
+    bool is_prefix_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
+
+    uint32 get_dict_file_size(UserDictInfo *info);
+
+    bool reset(const char *file);
+
+    bool validate(const char *file);
+
+    bool load(const char *file, LemmaIdType start_id);
+
+    bool is_valid_state();
+
+    bool is_valid_lemma_id(LemmaIdType id);
+
+    LemmaIdType get_max_lemma_id();
+
+    void set_lemma_flag(uint32 offset, uint8 flag);
+
+    char get_lemma_flag(uint32 offset);
+
+    char get_lemma_nchar(uint32 offset);
+
+    uint16 *get_lemma_spell_ids(uint32 offset);
+
+    uint16 *get_lemma_word(uint32 offset);
+
+    // Prepare searchable to fasten locate process
+    void prepare_locate(UserDictSearchable *searchable, const uint16 *splids, uint16 len);
+
+    // Compare initial letters only
+    int32 fuzzy_compare_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
+
+    // Compare exactly two spell ids
+    // First argument must be a full id spell id
+    bool equal_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
+
+    // Find first item by initial letters
+    int32 locate_first_in_offsets(const UserDictSearchable *searchable);
+
+    LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
+
+    // Check if a lemma is in dictionary
+    int32 locate_in_offsets(char16 lemma_str[], uint16 splid_str[], uint16 lemma_len);
+
+    bool remove_lemma_by_offset_index(int offset_index);
+#ifdef ___PREDICT_ENABLED___
+    uint32 locate_where_to_insert_in_predicts(const uint16 *words, int lemma_len);
+
+    int32 locate_first_in_predicts(const uint16 *words, int lemma_len);
+
+    void remove_lemma_from_predict_list(uint32 offset);
+#endif
+#ifdef ___SYNC_ENABLED___
+    void queue_lemma_for_sync(LemmaIdType id);
+
+    void remove_lemma_from_sync_list(uint32 offset);
+
+    void write_back_sync(int fd);
+#endif
+    void write_back_score(int fd);
+    void write_back_offset(int fd);
+    void write_back_lemma(int fd);
+    void write_back_all(int fd);
+    void write_back();
+
+    struct UserDictScoreOffsetPair {
+        int score;
+        uint32 offset_index;
+    };
+
+    inline void swap(UserDictScoreOffsetPair *sop, int i, int j);
+
+    void shift_down(UserDictScoreOffsetPair *sop, int i, int n);
+
+    // On-disk format for each lemma
+    // +-------------+
+    // | Version (4) |
+    // +-------------+
+    // +-----------+-----------+--------------------+-------------------+
+    // | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
+    // +-----------+-----------+--------------------+-------------------+
+    // ...
+    // +-----------------------+     +-------------+      <---Offset of offset
+    // | Offset1 by_splids (4) | ... | OffsetN (4) |
+    // +-----------------------+     +-------------+
+#ifdef ___PREDICT_ENABLED___
+    // +----------------------+     +-------------+
+    // | Offset1 by_lemma (4) | ... | OffsetN (4) |
+    // +----------------------+     +-------------+
+#endif
+    // +------------+     +------------+
+    // | Score1 (4) | ... | ScoreN (4) |
+    // +------------+     +------------+
+#ifdef ___SYNC_ENABLED___
+    // +-------------+     +-------------+
+    // | NewAdd1 (4) | ... | NewAddN (4) |
+    // +-------------+     +-------------+
+#endif
+    // +----------------+
+    // | Dict Info (4x) |
+    // +----------------+
+};
+}  // namespace ime_pinyin
+
+#endif
diff --git a/src/share/userdict.cpp b/src/share/userdict.cpp
index 5adc430..3e1a7f1 100644
--- a/src/share/userdict.cpp
+++ b/src/share/userdict.cpp
@@ -1,2063 +1,2112 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../include/userdict.h"
-#include "../include/splparser.h"
-#include "../include/ngram.h"
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <assert.h>
-#include <ctype.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <time.h>
-#include <pthread.h>
-#include <math.h>
-
-namespace ime_pinyin {
-
-#ifdef ___DEBUG_PERF___
-static uint64 _ellapse_ = 0;
-static struct timeval _tv_start_, _tv_end_;
-#define DEBUG_PERF_BEGIN                 \
-    do {                                 \
-        gettimeofday(&_tv_start_, NULL); \
-    } while (0)
-#define DEBUG_PERF_END                                                                                         \
-    do {                                                                                                       \
-        gettimeofday(&_tv_end_, NULL);                                                                         \
-        _ellapse_ = (_tv_end_.tv_sec - _tv_start_.tv_sec) * 1000000 + (_tv_end_.tv_usec - _tv_start_.tv_usec); \
-    } while (0)
-#define LOGD_PERF(message) ALOGD("PERFORMANCE[%s] %llu usec.", message, _ellapse_);
-#else
-#define DEBUG_PERF_BEGIN
-#define DEBUG_PERF_END
-#define LOGD_PERF(message)
-#endif
-
-// XXX File load and write are thread-safe by g_mutex_
-static pthread_mutex_t g_mutex_ = PTHREAD_MUTEX_INITIALIZER;
-static struct timeval g_last_update_ = {0, 0};
-
-inline uint32 UserDict::get_dict_file_size(UserDictInfo *info) {
-    return (4 + info->lemma_size + (info->lemma_count << 3)
-#ifdef ___PREDICT_ENABLED___
-            + (info->lemma_count << 2)
-#endif
-#ifdef ___SYNC_ENABLED___
-            + (info->sync_count << 2)
-#endif
-            + sizeof(*info));
-}
-
-inline LmaScoreType UserDict::translate_score(int raw_score) {
-    // 1) ori_freq: original user frequency
-    uint32 ori_freq = extract_score_freq(raw_score);
-    // 2) lmt_off: lmt index (week offset for example)
-    uint64 lmt_off = ((raw_score & 0xffff0000) >> 16);
-    if (kUserDictLMTBitWidth < 16) {
-        uint64 mask = ~(1 << kUserDictLMTBitWidth);
-        lmt_off &= mask;
-    }
-    // 3) now_off: current time index (current week offset for example)
-    // assuming load_time_ is around current time
-    uint64 now_off = load_time_.tv_sec;
-    now_off = (now_off - kUserDictLMTSince) / kUserDictLMTGranularity;
-    now_off = (now_off << (64 - kUserDictLMTBitWidth));
-    now_off = (now_off >> (64 - kUserDictLMTBitWidth));
-    // 4) factor: decide expand-factor
-    int delta = now_off - lmt_off;
-    if (delta > 4) delta = 4;
-    int factor = 80 - (delta << 4);
-
-    double tf = (double)(dict_info_.total_nfreq + total_other_nfreq_);
-    return (LmaScoreType)(log((double)factor * (double)ori_freq / tf) * NGram::kLogValueAmplifier);
-}
-
-inline int UserDict::extract_score_freq(int raw_score) {
-    // Frequence stored in lowest 16 bits
-    int freq = (raw_score & 0x0000ffff);
-    return freq;
-}
-
-inline uint64 UserDict::extract_score_lmt(int raw_score) {
-    uint64 lmt = ((raw_score & 0xffff0000) >> 16);
-    if (kUserDictLMTBitWidth < 16) {
-        uint64 mask = ~(1 << kUserDictLMTBitWidth);
-        lmt &= mask;
-    }
-    lmt = lmt * kUserDictLMTGranularity + kUserDictLMTSince;
-    return lmt;
-}
-
-inline int UserDict::build_score(uint64 lmt, int freq) {
-    lmt = (lmt - kUserDictLMTSince) / kUserDictLMTGranularity;
-    lmt = (lmt << (64 - kUserDictLMTBitWidth));
-    lmt = (lmt >> (64 - kUserDictLMTBitWidth));
-    uint16 lmt16 = (uint16)lmt;
-    int s = freq;
-    s &= 0x0000ffff;
-    s = (lmt16 << 16) | s;
-    return s;
-}
-
-inline int64 UserDict::utf16le_atoll(uint16 *s, int len) {
-    int64 ret = 0;
-    if (len <= 0) return ret;
-
-    int flag = 1;
-    const uint16 *endp = s + len;
-    if (*s == '-') {
-        flag = -1;
-        s++;
-    } else if (*s == '+') {
-        s++;
-    }
-
-    while (*s >= '0' && *s <= '9' && s < endp) {
-        ret += ret * 10 + (*s) - '0';
-        s++;
-    }
-    return ret * flag;
-}
-
-inline int UserDict::utf16le_lltoa(int64 v, uint16 *s, int size) {
-    if (!s || size <= 0) return 0;
-    uint16 *endp = s + size;
-    int ret_len = 0;
-    if (v < 0) {
-        *(s++) = '-';
-        ++ret_len;
-        v *= -1;
-    }
-
-    uint16 *b = s;
-    while (s < endp && v != 0) {
-        *(s++) = '0' + (v % 10);
-        v = v / 10;
-        ++ret_len;
-    }
-
-    if (v != 0) return 0;
-
-    --s;
-
-    while (b < s) {
-        *b = *s;
-        ++b, --s;
-    }
-
-    return ret_len;
-}
-
-inline void UserDict::set_lemma_flag(uint32 offset, uint8 flag) {
-    offset &= kUserDictOffsetMask;
-    lemmas_[offset] |= flag;
-}
-
-inline char UserDict::get_lemma_flag(uint32 offset) {
-    offset &= kUserDictOffsetMask;
-    return (char)(lemmas_[offset]);
-}
-
-inline char UserDict::get_lemma_nchar(uint32 offset) {
-    offset &= kUserDictOffsetMask;
-    return (char)(lemmas_[offset + 1]);
-}
-
-inline uint16 *UserDict::get_lemma_spell_ids(uint32 offset) {
-    offset &= kUserDictOffsetMask;
-    return (uint16 *)(lemmas_ + offset + 2);
-}
-
-inline uint16 *UserDict::get_lemma_word(uint32 offset) {
-    offset &= kUserDictOffsetMask;
-    uint8 nchar = get_lemma_nchar(offset);
-    return (uint16 *)(lemmas_ + offset + 2 + (nchar << 1));
-}
-
-inline LemmaIdType UserDict::get_max_lemma_id() {
-    // When a lemma is deleted, we don't not claim its id back for
-    // simplicity and performance
-    return start_id_ + dict_info_.lemma_count - 1;
-}
-
-inline bool UserDict::is_valid_lemma_id(LemmaIdType id) {
-    if (id >= start_id_ && id <= get_max_lemma_id()) return true;
-    return false;
-}
-
-inline bool UserDict::is_valid_state() {
-    if (state_ == USER_DICT_NONE) return false;
-    return true;
-}
-
-UserDict::UserDict()
-    : start_id_(0),
-      version_(0),
-      lemmas_(NULL),
-      offsets_(NULL),
-      scores_(NULL),
-      ids_(NULL),
-#ifdef ___PREDICT_ENABLED___
-      predicts_(NULL),
-#endif
-#ifdef ___SYNC_ENABLED___
-      syncs_(NULL),
-      sync_count_size_(0),
-#endif
-      offsets_by_id_(NULL),
-      lemma_count_left_(0),
-      lemma_size_left_(0),
-      dict_file_(NULL),
-      state_(USER_DICT_NONE) {
-    memset(&dict_info_, 0, sizeof(dict_info_));
-    memset(&load_time_, 0, sizeof(load_time_));
-#ifdef ___CACHE_ENABLED___
-    cache_init();
-#endif
-}
-
-UserDict::~UserDict() { close_dict(); }
-
-bool UserDict::load_dict(const char *file_name, LemmaIdType start_id, LemmaIdType end_id) {
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_BEGIN;
-#endif
-    dict_file_ = strdup(file_name);
-    if (!dict_file_) return false;
-
-    start_id_ = start_id;
-
-    if (false == validate(file_name) && false == reset(file_name)) {
-        goto error;
-    }
-    if (false == load(file_name, start_id)) {
-        goto error;
-    }
-
-    state_ = USER_DICT_SYNC;
-
-    gettimeofday(&load_time_, NULL);
-
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_END;
-    LOGD_PERF("load_dict");
-#endif
-    return true;
-error:
-    free((void *)dict_file_);
-    start_id_ = 0;
-    return false;
-}
-
-bool UserDict::close_dict() {
-    if (state_ == USER_DICT_NONE) return true;
-    if (state_ == USER_DICT_SYNC) goto out;
-
-    // If dictionary is written back by others,
-    // we can not simply write back here
-    // To do a safe flush, we have to discard all newly added
-    // lemmas and try to reload dict file.
-    pthread_mutex_lock(&g_mutex_);
-    if (load_time_.tv_sec > g_last_update_.tv_sec || (load_time_.tv_sec == g_last_update_.tv_sec && load_time_.tv_usec > g_last_update_.tv_usec)) {
-        write_back();
-        gettimeofday(&g_last_update_, NULL);
-    }
-    pthread_mutex_unlock(&g_mutex_);
-
-out:
-    free((void *)dict_file_);
-    free(lemmas_);
-    free(offsets_);
-    free(offsets_by_id_);
-    free(scores_);
-    free(ids_);
-#ifdef ___PREDICT_ENABLED___
-    free(predicts_);
-#endif
-
-    version_ = 0;
-    dict_file_ = NULL;
-    lemmas_ = NULL;
-#ifdef ___SYNC_ENABLED___
-    syncs_ = NULL;
-    sync_count_size_ = 0;
-#endif
-    offsets_ = NULL;
-    offsets_by_id_ = NULL;
-    scores_ = NULL;
-    ids_ = NULL;
-#ifdef ___PREDICT_ENABLED___
-    predicts_ = NULL;
-#endif
-
-    memset(&dict_info_, 0, sizeof(dict_info_));
-    lemma_count_left_ = 0;
-    lemma_size_left_ = 0;
-    state_ = USER_DICT_NONE;
-
-    return true;
-}
-
-size_t UserDict::number_of_lemmas() { return dict_info_.lemma_count; }
-
-void UserDict::reset_milestones(uint16 from_step, MileStoneHandle from_handle) { return; }
-
-MileStoneHandle UserDict::extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num) {
-    if (is_valid_state() == false) return 0;
-
-    bool need_extend = false;
-
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_BEGIN;
-#endif
-    *lpi_num = _get_lpis(dep->splids, dep->splids_extended + 1, lpi_items, lpi_max, &need_extend);
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_END;
-    LOGD_PERF("extend_dict");
-#endif
-    return ((*lpi_num > 0 || need_extend) ? 1 : 0);
-}
-
-int UserDict::is_fuzzy_prefix_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable) {
-    if (len1 < searchable->splids_len) return 0;
-
-    SpellingTrie &spl_trie = SpellingTrie::get_instance();
-    uint32 i = 0;
-    for (i = 0; i < searchable->splids_len; i++) {
-        const char py1 = *spl_trie.get_spelling_str(id1[i]);
-        uint16 off = 8 * (i % 4);
-        const char py2 = ((searchable->signature[i / 4] & (0xff << off)) >> off);
-        if (py1 == py2) continue;
-        return 0;
-    }
-    return 1;
-}
-
-int UserDict::fuzzy_compare_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable) {
-    if (len1 < searchable->splids_len) return -1;
-    if (len1 > searchable->splids_len) return 1;
-
-    SpellingTrie &spl_trie = SpellingTrie::get_instance();
-    uint32 i = 0;
-    for (i = 0; i < len1; i++) {
-        const char py1 = *spl_trie.get_spelling_str(id1[i]);
-        uint16 off = 8 * (i % 4);
-        const char py2 = ((searchable->signature[i / 4] & (0xff << off)) >> off);
-        if (py1 == py2) continue;
-        if (py1 > py2) return 1;
-        return -1;
-    }
-    return 0;
-}
-
-bool UserDict::is_prefix_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable) {
-    if (fulllen < searchable->splids_len) return false;
-
-    uint32 i = 0;
-    for (; i < searchable->splids_len; i++) {
-        uint16 start_id = searchable->splid_start[i];
-        uint16 count = searchable->splid_count[i];
-        if (fullids[i] >= start_id && fullids[i] < start_id + count)
-            continue;
-        else
-            return false;
-    }
-    return true;
-}
-
-bool UserDict::equal_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable) {
-    if (fulllen != searchable->splids_len) return false;
-
-    uint32 i = 0;
-    for (; i < fulllen; i++) {
-        uint16 start_id = searchable->splid_start[i];
-        uint16 count = searchable->splid_count[i];
-        if (fullids[i] >= start_id && fullids[i] < start_id + count)
-            continue;
-        else
-            return false;
-    }
-    return true;
-}
-
-int32 UserDict::locate_first_in_offsets(const UserDictSearchable *searchable) {
-    int32 begin = 0;
-    int32 end = dict_info_.lemma_count - 1;
-    int32 middle = -1;
-
-    int32 first_prefix = middle;
-    int32 last_matched = middle;
-
-    while (begin <= end) {
-        middle = (begin + end) >> 1;
-        uint32 offset = offsets_[middle];
-        uint8 nchar = get_lemma_nchar(offset);
-        const uint16 *splids = get_lemma_spell_ids(offset);
-        int cmp = fuzzy_compare_spell_id(splids, nchar, searchable);
-        int pre = is_fuzzy_prefix_spell_id(splids, nchar, searchable);
-
-        if (pre) first_prefix = middle;
-
-        if (cmp < 0) {
-            begin = middle + 1;
-        } else if (cmp > 0) {
-            end = middle - 1;
-        } else {
-            end = middle - 1;
-            last_matched = middle;
-        }
-    }
-
-    return first_prefix;
-}
-
-void UserDict::prepare_locate(UserDictSearchable *searchable, const uint16 *splid_str, uint16 splid_str_len) {
-    searchable->splids_len = splid_str_len;
-    memset(searchable->signature, 0, sizeof(searchable->signature));
-
-    SpellingTrie &spl_trie = SpellingTrie::get_instance();
-    uint32 i = 0;
-    for (; i < splid_str_len; i++) {
-        if (spl_trie.is_half_id(splid_str[i])) {
-            searchable->splid_count[i] = spl_trie.half_to_full(splid_str[i], &(searchable->splid_start[i]));
-        } else {
-            searchable->splid_count[i] = 1;
-            searchable->splid_start[i] = splid_str[i];
-        }
-        const unsigned char py = *spl_trie.get_spelling_str(splid_str[i]);
-        searchable->signature[i >> 2] |= (py << (8 * (i % 4)));
-    }
-}
-
-size_t UserDict::get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max) { return _get_lpis(splid_str, splid_str_len, lpi_items, lpi_max, NULL); }
-
-size_t UserDict::_get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend) {
-    bool tmp_extend;
-    if (!need_extend) need_extend = &tmp_extend;
-
-    *need_extend = false;
-
-    if (is_valid_state() == false) return 0;
-    if (lpi_max <= 0) return 0;
-
-    if (0 == pthread_mutex_trylock(&g_mutex_)) {
-        if (load_time_.tv_sec < g_last_update_.tv_sec || (load_time_.tv_sec == g_last_update_.tv_sec && load_time_.tv_usec < g_last_update_.tv_usec)) {
-            // Others updated disk file, have to reload
-            pthread_mutex_unlock(&g_mutex_);
-            flush_cache();
-        } else {
-            pthread_mutex_unlock(&g_mutex_);
-        }
-    } else {
-    }
-
-    UserDictSearchable searchable;
-    prepare_locate(&searchable, splid_str, splid_str_len);
-
-    uint32 max_off = dict_info_.lemma_count;
-#ifdef ___CACHE_ENABLED___
-    int32 middle;
-    uint32 start, count;
-    bool cached = cache_hit(&searchable, &start, &count);
-    if (cached) {
-        middle = start;
-        max_off = start + count;
-    } else {
-        middle = locate_first_in_offsets(&searchable);
-        start = middle;
-    }
-#else
-    int32 middle = locate_first_in_offsets(&searchable);
-#endif
-
-    if (middle == -1) {
-#ifdef ___CACHE_ENABLED___
-        if (!cached) cache_push(USER_DICT_MISS_CACHE, &searchable, 0, 0);
-#endif
-        return 0;
-    }
-
-    size_t lpi_current = 0;
-
-    bool fuzzy_break = false;
-    bool prefix_break = false;
-    while ((size_t)middle < max_off && !fuzzy_break && !prefix_break) {
-        if (lpi_current >= lpi_max) break;
-        uint32 offset = offsets_[middle];
-        // Ignore deleted lemmas
-        if (offset & kUserDictOffsetFlagRemove) {
-            middle++;
-            continue;
-        }
-        uint8 nchar = get_lemma_nchar(offset);
-        uint16 *splids = get_lemma_spell_ids(offset);
-#ifdef ___CACHE_ENABLED___
-        if (!cached && 0 != fuzzy_compare_spell_id(splids, nchar, &searchable)) {
-#else
-        if (0 != fuzzy_compare_spell_id(splids, nchar, &searchable)) {
-#endif
-            fuzzy_break = true;
-        }
-
-        if (prefix_break == false) {
-            if (is_fuzzy_prefix_spell_id(splids, nchar, &searchable)) {
-                if (*need_extend == false && is_prefix_spell_id(splids, nchar, &searchable)) {
-                    *need_extend = true;
-                }
-            } else {
-                prefix_break = true;
-            }
-        }
-
-        if (equal_spell_id(splids, nchar, &searchable) == true) {
-            lpi_items[lpi_current].psb = translate_score(scores_[middle]);
-            lpi_items[lpi_current].id = ids_[middle];
-            lpi_items[lpi_current].lma_len = nchar;
-            lpi_current++;
-        }
-        middle++;
-    }
-
-#ifdef ___CACHE_ENABLED___
-    if (!cached) {
-        count = middle - start;
-        cache_push(USER_DICT_CACHE, &searchable, start, count);
-    }
-#endif
-
-    return lpi_current;
-}
-
-uint16 UserDict::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max) {
-    if (is_valid_state() == false) return 0;
-    if (is_valid_lemma_id(id_lemma) == false) return 0;
-    uint32 offset = offsets_by_id_[id_lemma - start_id_];
-    uint8 nchar = get_lemma_nchar(offset);
-    char16 *str = get_lemma_word(offset);
-    uint16 m = nchar < str_max - 1 ? nchar : str_max - 1;
-    int i = 0;
-    for (; i < m; i++) {
-        str_buf[i] = str[i];
-    }
-    str_buf[i] = 0;
-    return m;
-}
-
-uint16 UserDict::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid) {
-    if (is_valid_lemma_id(id_lemma) == false) return 0;
-    uint32 offset = offsets_by_id_[id_lemma - start_id_];
-    uint8 nchar = get_lemma_nchar(offset);
-    const uint16 *ids = get_lemma_spell_ids(offset);
-    int i = 0;
-    for (; i < nchar && i < splids_max; i++) splids[i] = ids[i];
-    return i;
-}
-
-size_t UserDict::predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used) {
-    uint32 new_added = 0;
-#ifdef ___PREDICT_ENABLED___
-    int32 end = dict_info_.lemma_count - 1;
-    int j = locate_first_in_predicts((const uint16 *)last_hzs, hzs_len);
-    if (j == -1) return 0;
-
-    while (j <= end) {
-        uint32 offset = predicts_[j];
-        // Ignore deleted lemmas
-        if (offset & kUserDictOffsetFlagRemove) {
-            j++;
-            continue;
-        }
-        uint32 nchar = get_lemma_nchar(offset);
-        uint16 *words = get_lemma_word(offset);
-        uint16 *splids = get_lemma_spell_ids(offset);
-
-        if (nchar <= hzs_len) {
-            j++;
-            continue;
-        }
-
-        if (memcmp(words, last_hzs, hzs_len << 1) == 0) {
-            if (new_added >= npre_max) {
-                return new_added;
-            }
-            uint32 cpy_len = (nchar < kMaxPredictSize ? (nchar << 1) : (kMaxPredictSize << 1)) - (hzs_len << 1);
-            npre_items[new_added].his_len = hzs_len;
-            npre_items[new_added].psb = get_lemma_score(words, splids, nchar);
-            memcpy(npre_items[new_added].pre_hzs, words + hzs_len, cpy_len);
-            if ((cpy_len >> 1) < kMaxPredictSize) {
-                npre_items[new_added].pre_hzs[cpy_len >> 1] = 0;
-            }
-            new_added++;
-        } else {
-            break;
-        }
-
-        j++;
-    }
-#endif
-    return new_added;
-}
-
-int32 UserDict::locate_in_offsets(char16 lemma_str[], uint16 splid_str[], uint16 lemma_len) {
-    int32 max_off = dict_info_.lemma_count;
-
-    UserDictSearchable searchable;
-    prepare_locate(&searchable, splid_str, lemma_len);
-#ifdef ___CACHE_ENABLED___
-    int32 off;
-    uint32 start, count;
-    bool cached = load_cache(&searchable, &start, &count);
-    if (cached) {
-        off = start;
-        max_off = start + count;
-    } else {
-        off = locate_first_in_offsets(&searchable);
-        start = off;
-    }
-#else
-    int32 off = locate_first_in_offsets(&searchable);
-#endif
-
-    if (off == -1) {
-        return off;
-    }
-
-    while (off < max_off) {
-        uint32 offset = offsets_[off];
-        if (offset & kUserDictOffsetFlagRemove) {
-            off++;
-            continue;
-        }
-        uint16 *splids = get_lemma_spell_ids(offset);
-#ifdef ___CACHE_ENABLED___
-        if (!cached && 0 != fuzzy_compare_spell_id(splids, lemma_len, &searchable)) break;
-#else
-        if (0 != fuzzy_compare_spell_id(splids, lemma_len, &searchable)) break;
-#endif
-        if (equal_spell_id(splids, lemma_len, &searchable) == true) {
-            uint16 *str = get_lemma_word(offset);
-            uint32 i = 0;
-            for (i = 0; i < lemma_len; i++) {
-                if (str[i] == lemma_str[i]) continue;
-                break;
-            }
-            if (i < lemma_len) {
-                off++;
-                continue;
-            }
-#ifdef ___CACHE_ENABLED___
-            // No need to save_cache here, since current function is invoked by
-            // put_lemma. It's rarely possible for a user input same lemma twice.
-            // That means first time user type a new lemma, it is newly added into
-            // user dictionary, then it's possible that user type the same lemma
-            // again.
-            // Another reason save_cache can not be invoked here is this function
-            // aborts when lemma is found, and it never knows the count.
-#endif
-            return off;
-        }
-        off++;
-    }
-
-    return -1;
-}
-
-#ifdef ___PREDICT_ENABLED___
-uint32 UserDict::locate_where_to_insert_in_predicts(const uint16 *words, int lemma_len) {
-    int32 begin = 0;
-    int32 end = dict_info_.lemma_count - 1;
-    int32 middle = end;
-
-    uint32 last_matched = middle;
-
-    while (begin <= end) {
-        middle = (begin + end) >> 1;
-        uint32 offset = offsets_[middle];
-        uint8 nchar = get_lemma_nchar(offset);
-        const uint16 *ws = get_lemma_word(offset);
-
-        uint32 minl = nchar < lemma_len ? nchar : lemma_len;
-        uint32 k = 0;
-        int cmp = 0;
-
-        for (; k < minl; k++) {
-            if (ws[k] < words[k]) {
-                cmp = -1;
-                break;
-            } else if (ws[k] > words[k]) {
-                cmp = 1;
-                break;
-            }
-        }
-        if (cmp == 0) {
-            if (nchar < lemma_len)
-                cmp = -1;
-            else if (nchar > lemma_len)
-                cmp = 1;
-        }
-
-        if (cmp < 0) {
-            begin = middle + 1;
-            last_matched = middle;
-        } else if (cmp > 0) {
-            end = middle - 1;
-        } else {
-            end = middle - 1;
-            last_matched = middle;
-        }
-    }
-
-    return last_matched;
-}
-
-int32 UserDict::locate_first_in_predicts(const uint16 *words, int lemma_len) {
-    int32 begin = 0;
-    int32 end = dict_info_.lemma_count - 1;
-    int32 middle = -1;
-
-    int32 last_matched = middle;
-
-    while (begin <= end) {
-        middle = (begin + end) >> 1;
-        uint32 offset = offsets_[middle];
-        uint8 nchar = get_lemma_nchar(offset);
-        const uint16 *ws = get_lemma_word(offset);
-
-        uint32 minl = nchar < lemma_len ? nchar : lemma_len;
-        uint32 k = 0;
-        int cmp = 0;
-
-        for (; k < minl; k++) {
-            if (ws[k] < words[k]) {
-                cmp = -1;
-                break;
-            } else if (ws[k] > words[k]) {
-                cmp = 1;
-                break;
-            }
-        }
-        if (cmp == 0) {
-            if (nchar >= lemma_len) last_matched = middle;
-            if (nchar < lemma_len)
-                cmp = -1;
-            else if (nchar > lemma_len)
-                cmp = 1;
-        }
-
-        if (cmp < 0) {
-            begin = middle + 1;
-        } else if (cmp > 0) {
-            end = middle - 1;
-        } else {
-            end = middle - 1;
-        }
-    }
-
-    return last_matched;
-}
-
-#endif
-
-LemmaIdType UserDict::get_lemma_id(char16 lemma_str[], uint16 splids[], uint16 lemma_len) {
-    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
-    if (off == -1) {
-        return 0;
-    }
-
-    return ids_[off];
-}
-
-LmaScoreType UserDict::get_lemma_score(LemmaIdType lemma_id) {
-    if (is_valid_state() == false) return 0;
-    if (is_valid_lemma_id(lemma_id) == false) return 0;
-
-    return translate_score(_get_lemma_score(lemma_id));
-}
-
-LmaScoreType UserDict::get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len) {
-    if (is_valid_state() == false) return 0;
-    return translate_score(_get_lemma_score(lemma_str, splids, lemma_len));
-}
-
-int UserDict::_get_lemma_score(LemmaIdType lemma_id) {
-    if (is_valid_state() == false) return 0;
-    if (is_valid_lemma_id(lemma_id) == false) return 0;
-
-    uint32 offset = offsets_by_id_[lemma_id - start_id_];
-
-    uint32 nchar = get_lemma_nchar(offset);
-    uint16 *spl = get_lemma_spell_ids(offset);
-    uint16 *wrd = get_lemma_word(offset);
-
-    int32 off = locate_in_offsets(wrd, spl, nchar);
-    if (off == -1) {
-        return 0;
-    }
-
-    return scores_[off];
-}
-
-int UserDict::_get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len) {
-    if (is_valid_state() == false) return 0;
-
-    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
-    if (off == -1) {
-        return 0;
-    }
-
-    return scores_[off];
-}
-
-#ifdef ___SYNC_ENABLED___
-void UserDict::remove_lemma_from_sync_list(uint32 offset) {
-    offset &= kUserDictOffsetMask;
-    uint32 i = 0;
-    for (; i < dict_info_.sync_count; i++) {
-        unsigned int off = (syncs_[i] & kUserDictOffsetMask);
-        if (off == offset) break;
-    }
-    if (i < dict_info_.sync_count) {
-        syncs_[i] = syncs_[dict_info_.sync_count - 1];
-        dict_info_.sync_count--;
-    }
-}
-#endif
-
-#ifdef ___PREDICT_ENABLED___
-void UserDict::remove_lemma_from_predict_list(uint32 offset) {
-    offset &= kUserDictOffsetMask;
-    uint32 i = 0;
-    for (; i < dict_info_.lemma_count; i++) {
-        unsigned int off = (predicts_[i] & kUserDictOffsetMask);
-        if (off == offset) {
-            predicts_[i] |= kUserDictOffsetFlagRemove;
-            break;
-        }
-    }
-}
-#endif
-
-bool UserDict::remove_lemma_by_offset_index(int offset_index) {
-    if (is_valid_state() == false) return 0;
-
-    int32 off = offset_index;
-    if (off == -1) {
-        return false;
-    }
-
-    uint32 offset = offsets_[off];
-    uint32 nchar = get_lemma_nchar(offset);
-
-    offsets_[off] |= kUserDictOffsetFlagRemove;
-
-#ifdef ___SYNC_ENABLED___
-    // Remove corresponding sync item
-    remove_lemma_from_sync_list(offset);
-#endif
-
-#ifdef ___PREDICT_ENABLED___
-    remove_lemma_from_predict_list(offset);
-#endif
-    dict_info_.free_count++;
-    dict_info_.free_size += (2 + (nchar << 2));
-
-    if (state_ < USER_DICT_OFFSET_DIRTY) state_ = USER_DICT_OFFSET_DIRTY;
-    return true;
-}
-
-bool UserDict::remove_lemma(LemmaIdType lemma_id) {
-    if (is_valid_state() == false) return 0;
-    if (is_valid_lemma_id(lemma_id) == false) return false;
-    uint32 offset = offsets_by_id_[lemma_id - start_id_];
-
-    uint32 nchar = get_lemma_nchar(offset);
-    uint16 *spl = get_lemma_spell_ids(offset);
-    uint16 *wrd = get_lemma_word(offset);
-
-    int32 off = locate_in_offsets(wrd, spl, nchar);
-
-    return remove_lemma_by_offset_index(off);
-}
-
-void UserDict::flush_cache() {
-    LemmaIdType start_id = start_id_;
-    const char *file = strdup(dict_file_);
-    if (!file) return;
-    close_dict();
-    load_dict(file, start_id, kUserDictIdEnd);
-    free((void *)file);
-#ifdef ___CACHE_ENABLED___
-    cache_init();
-#endif
-    return;
-}
-
-bool UserDict::reset(const char *file) {
-    FILE *fp = fopen(file, "w+");
-    if (!fp) {
-        return false;
-    }
-    uint32 version = kUserDictVersion;
-    size_t wred = fwrite(&version, 1, 4, fp);
-    UserDictInfo info;
-    memset(&info, 0, sizeof(info));
-    // By default, no limitation for lemma count and size
-    // thereby, reclaim_ratio is never used
-    wred += fwrite(&info, 1, sizeof(info), fp);
-    if (wred != sizeof(info) + sizeof(version)) {
-        fclose(fp);
-        unlink(file);
-        return false;
-    }
-    fclose(fp);
-    return true;
-}
-
-bool UserDict::validate(const char *file) {
-    // b is ignored in POSIX compatible os including Linux
-    // while b is important flag for Windows to specify binary mode
-    FILE *fp = fopen(file, "rb");
-    if (!fp) {
-        return false;
-    }
-
-    size_t size;
-    size_t readed;
-    uint32 version;
-    UserDictInfo dict_info;
-
-    // validate
-    int err = fseek(fp, 0, SEEK_END);
-    if (err) {
-        goto error;
-    }
-
-    size = ftell(fp);
-    if (size < 4 + sizeof(dict_info)) {
-        goto error;
-    }
-
-    err = fseek(fp, 0, SEEK_SET);
-    if (err) {
-        goto error;
-    }
-
-    readed = fread(&version, 1, sizeof(version), fp);
-    if (readed < sizeof(version)) {
-        goto error;
-    }
-    if (version != kUserDictVersion) {
-        goto error;
-    }
-
-    err = fseek(fp, -1 * sizeof(dict_info), SEEK_END);
-    if (err) {
-        goto error;
-    }
-
-    readed = fread(&dict_info, 1, sizeof(dict_info), fp);
-    if (readed != sizeof(dict_info)) {
-        goto error;
-    }
-
-    if (size != get_dict_file_size(&dict_info)) {
-        goto error;
-    }
-
-    fclose(fp);
-    return true;
-
-error:
-    fclose(fp);
-    return false;
-}
-
-bool UserDict::load(const char *file, LemmaIdType start_id) {
-    if (0 != pthread_mutex_trylock(&g_mutex_)) {
-        return false;
-    }
-    // b is ignored in POSIX compatible os including Linux
-    // while b is important flag for Windows to specify binary mode
-    FILE *fp = fopen(file, "rb");
-    if (!fp) {
-        pthread_mutex_unlock(&g_mutex_);
-        return false;
-    }
-
-    size_t readed, toread;
-    UserDictInfo dict_info;
-    uint8 *lemmas = NULL;
-    uint32 *offsets = NULL;
-#ifdef ___SYNC_ENABLED___
-    uint32 *syncs = NULL;
-#endif
-    uint32 *scores = NULL;
-    uint32 *ids = NULL;
-    uint32 *offsets_by_id = NULL;
-#ifdef ___PREDICT_ENABLED___
-    uint32 *predicts = NULL;
-#endif
-    size_t i;
-    int err;
-
-    err = fseek(fp, -1 * sizeof(dict_info), SEEK_END);
-    if (err) goto error;
-
-    readed = fread(&dict_info, 1, sizeof(dict_info), fp);
-    if (readed != sizeof(dict_info)) goto error;
-
-    lemmas = (uint8 *)malloc(dict_info.lemma_size + (kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2))));
-
-    if (!lemmas) goto error;
-
-    offsets = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
-    if (!offsets) goto error;
-
-#ifdef ___PREDICT_ENABLED___
-    predicts = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
-    if (!predicts) goto error;
-#endif
-
-#ifdef ___SYNC_ENABLED___
-    syncs = (uint32 *)malloc((dict_info.sync_count + kUserDictPreAlloc) << 2);
-    if (!syncs) goto error;
-#endif
-
-    scores = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
-    if (!scores) goto error;
-
-    ids = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
-    if (!ids) goto error;
-
-    offsets_by_id = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
-    if (!offsets_by_id) goto error;
-
-    err = fseek(fp, 4, SEEK_SET);
-    if (err) goto error;
-
-    readed = 0;
-    while (readed < dict_info.lemma_size && !ferror(fp) && !feof(fp)) {
-        readed += fread(lemmas + readed, 1, dict_info.lemma_size - readed, fp);
-    }
-    if (readed < dict_info.lemma_size) goto error;
-
-    toread = (dict_info.lemma_count << 2);
-    readed = 0;
-    while (readed < toread && !ferror(fp) && !feof(fp)) {
-        readed += fread((((uint8 *)offsets) + readed), 1, toread - readed, fp);
-    }
-    if (readed < toread) goto error;
-
-#ifdef ___PREDICT_ENABLED___
-    toread = (dict_info.lemma_count << 2);
-    readed = 0;
-    while (readed < toread && !ferror(fp) && !feof(fp)) {
-        readed += fread((((uint8 *)predicts) + readed), 1, toread - readed, fp);
-    }
-    if (readed < toread) goto error;
-#endif
-
-    readed = 0;
-    while (readed < toread && !ferror(fp) && !feof(fp)) {
-        readed += fread((((uint8 *)scores) + readed), 1, toread - readed, fp);
-    }
-    if (readed < toread) goto error;
-
-#ifdef ___SYNC_ENABLED___
-    toread = (dict_info.sync_count << 2);
-    readed = 0;
-    while (readed < toread && !ferror(fp) && !feof(fp)) {
-        readed += fread((((uint8 *)syncs) + readed), 1, toread - readed, fp);
-    }
-    if (readed < toread) goto error;
-#endif
-
-    for (i = 0; i < dict_info.lemma_count; i++) {
-        ids[i] = start_id + i;
-        offsets_by_id[i] = offsets[i];
-    }
-
-    lemmas_ = lemmas;
-    offsets_ = offsets;
-#ifdef ___SYNC_ENABLED___
-    syncs_ = syncs;
-    sync_count_size_ = dict_info.sync_count + kUserDictPreAlloc;
-#endif
-    offsets_by_id_ = offsets_by_id;
-    scores_ = scores;
-    ids_ = ids;
-#ifdef ___PREDICT_ENABLED___
-    predicts_ = predicts;
-#endif
-    lemma_count_left_ = kUserDictPreAlloc;
-    lemma_size_left_ = kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2));
-    memcpy(&dict_info_, &dict_info, sizeof(dict_info));
-    state_ = USER_DICT_SYNC;
-
-    fclose(fp);
-
-    pthread_mutex_unlock(&g_mutex_);
-    return true;
-
-error:
-    if (lemmas) free(lemmas);
-    if (offsets) free(offsets);
-#ifdef ___SYNC_ENABLED___
-    if (syncs) free(syncs);
-#endif
-    if (scores) free(scores);
-    if (ids) free(ids);
-    if (offsets_by_id) free(offsets_by_id);
-#ifdef ___PREDICT_ENABLED___
-    if (predicts) free(predicts);
-#endif
-    fclose(fp);
-    pthread_mutex_unlock(&g_mutex_);
-    return false;
-}
-
-void UserDict::write_back() {
-    // XXX write back is only allowed from close_dict due to thread-safe sake
-    if (state_ == USER_DICT_NONE || state_ == USER_DICT_SYNC) return;
-    int fd = open(dict_file_, O_WRONLY);
-    if (fd == -1) return;
-    switch (state_) {
-        case USER_DICT_DEFRAGMENTED:
-            write_back_all(fd);
-            break;
-        case USER_DICT_LEMMA_DIRTY:
-            write_back_lemma(fd);
-            break;
-        case USER_DICT_OFFSET_DIRTY:
-            write_back_offset(fd);
-            break;
-        case USER_DICT_SCORE_DIRTY:
-            write_back_score(fd);
-            break;
-#ifdef ___SYNC_ENABLED___
-        case USER_DICT_SYNC_DIRTY:
-            write_back_sync(fd);
-            break;
-#endif
-        default:
-            break;
-    }
-    // It seems truncate is not need on Linux, Windows except Mac
-    // I am doing it here anyway for safety.
-    off_t cur = lseek(fd, 0, SEEK_CUR);
-    ftruncate(fd, cur);
-    close(fd);
-    state_ = USER_DICT_SYNC;
-}
-
-#ifdef ___SYNC_ENABLED___
-void UserDict::write_back_sync(int fd) {
-    int err = lseek(fd,
-                    4 + dict_info_.lemma_size + (dict_info_.lemma_count << 3)
-#ifdef ___PREDICT_ENABLED___
-                        + (dict_info_.lemma_count << 2)
-#endif
-                        ,
-                    SEEK_SET);
-    if (err == -1) return;
-    write(fd, syncs_, dict_info_.sync_count << 2);
-    write(fd, &dict_info_, sizeof(dict_info_));
-}
-#endif
-
-void UserDict::write_back_offset(int fd) {
-    int err = lseek(fd, 4 + dict_info_.lemma_size, SEEK_SET);
-    if (err == -1) return;
-    write(fd, offsets_, dict_info_.lemma_count << 2);
-#ifdef ___PREDICT_ENABLED___
-    write(fd, predicts_, dict_info_.lemma_count << 2);
-#endif
-    write(fd, scores_, dict_info_.lemma_count << 2);
-#ifdef ___SYNC_ENABLED___
-    write(fd, syncs_, dict_info_.sync_count << 2);
-#endif
-    write(fd, &dict_info_, sizeof(dict_info_));
-}
-
-void UserDict::write_back_score(int fd) {
-    int err = lseek(fd,
-                    4 + dict_info_.lemma_size + (dict_info_.lemma_count << 2)
-#ifdef ___PREDICT_ENABLED___
-                        + (dict_info_.lemma_count << 2)
-#endif
-                        ,
-                    SEEK_SET);
-    if (err == -1) return;
-    write(fd, scores_, dict_info_.lemma_count << 2);
-#ifdef ___SYNC_ENABLED___
-    write(fd, syncs_, dict_info_.sync_count << 2);
-#endif
-    write(fd, &dict_info_, sizeof(dict_info_));
-}
-
-void UserDict::write_back_lemma(int fd) {
-    int err = lseek(fd, 4, SEEK_SET);
-    if (err == -1) return;
-    // New lemmas are always appended, no need to write whole lemma block
-    size_t need_write = kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2)) - lemma_size_left_;
-    err = lseek(fd, dict_info_.lemma_size - need_write, SEEK_CUR);
-    if (err == -1) return;
-    write(fd, lemmas_ + dict_info_.lemma_size - need_write, need_write);
-
-    write(fd, offsets_, dict_info_.lemma_count << 2);
-#ifdef ___PREDICT_ENABLED___
-    write(fd, predicts_, dict_info_.lemma_count << 2);
-#endif
-    write(fd, scores_, dict_info_.lemma_count << 2);
-#ifdef ___SYNC_ENABLED___
-    write(fd, syncs_, dict_info_.sync_count << 2);
-#endif
-    write(fd, &dict_info_, sizeof(dict_info_));
-}
-
-void UserDict::write_back_all(int fd) {
-    // XXX lemma_size is handled differently in writeall
-    // and writelemma. I update lemma_size and lemma_count in different
-    // places for these two cases. Should fix it to make it consistent.
-    int err = lseek(fd, 4, SEEK_SET);
-    if (err == -1) return;
-    write(fd, lemmas_, dict_info_.lemma_size);
-    write(fd, offsets_, dict_info_.lemma_count << 2);
-#ifdef ___PREDICT_ENABLED___
-    write(fd, predicts_, dict_info_.lemma_count << 2);
-#endif
-    write(fd, scores_, dict_info_.lemma_count << 2);
-#ifdef ___SYNC_ENABLED___
-    write(fd, syncs_, dict_info_.sync_count << 2);
-#endif
-    write(fd, &dict_info_, sizeof(dict_info_));
-}
-
-#ifdef ___CACHE_ENABLED___
-bool UserDict::load_cache(UserDictSearchable *searchable, uint32 *offset, uint32 *length) {
-    UserDictCache *cache = &caches_[searchable->splids_len - 1];
-    if (cache->head == cache->tail) return false;
-
-    uint16 j, sig_len = kMaxLemmaSize / 4;
-    uint16 i = cache->head;
-    while (1) {
-        j = 0;
-        for (; j < sig_len; j++) {
-            if (cache->signatures[i][j] != searchable->signature[j]) break;
-        }
-        if (j < sig_len) {
-            i++;
-            if (i >= kUserDictCacheSize) i -= kUserDictCacheSize;
-            if (i == cache->tail) break;
-            continue;
-        }
-        *offset = cache->offsets[i];
-        *length = cache->lengths[i];
-        return true;
-    }
-    return false;
-}
-
-void UserDict::save_cache(UserDictSearchable *searchable, uint32 offset, uint32 length) {
-    UserDictCache *cache = &caches_[searchable->splids_len - 1];
-    uint16 next = cache->tail;
-
-    cache->offsets[next] = offset;
-    cache->lengths[next] = length;
-    uint16 sig_len = kMaxLemmaSize / 4;
-    uint16 j = 0;
-    for (; j < sig_len; j++) {
-        cache->signatures[next][j] = searchable->signature[j];
-    }
-
-    if (++next >= kUserDictCacheSize) {
-        next -= kUserDictCacheSize;
-    }
-    if (next == cache->head) {
-        cache->head++;
-        if (cache->head >= kUserDictCacheSize) {
-            cache->head -= kUserDictCacheSize;
-        }
-    }
-    cache->tail = next;
-}
-
-void UserDict::reset_cache() { memset(caches_, 0, sizeof(caches_)); }
-
-bool UserDict::load_miss_cache(UserDictSearchable *searchable) {
-    UserDictMissCache *cache = &miss_caches_[searchable->splids_len - 1];
-    if (cache->head == cache->tail) return false;
-
-    uint16 j, sig_len = kMaxLemmaSize / 4;
-    uint16 i = cache->head;
-    while (1) {
-        j = 0;
-        for (; j < sig_len; j++) {
-            if (cache->signatures[i][j] != searchable->signature[j]) break;
-        }
-        if (j < sig_len) {
-            i++;
-            if (i >= kUserDictMissCacheSize) i -= kUserDictMissCacheSize;
-            if (i == cache->tail) break;
-            continue;
-        }
-        return true;
-    }
-    return false;
-}
-
-void UserDict::save_miss_cache(UserDictSearchable *searchable) {
-    UserDictMissCache *cache = &miss_caches_[searchable->splids_len - 1];
-    uint16 next = cache->tail;
-
-    uint16 sig_len = kMaxLemmaSize / 4;
-    uint16 j = 0;
-    for (; j < sig_len; j++) {
-        cache->signatures[next][j] = searchable->signature[j];
-    }
-
-    if (++next >= kUserDictMissCacheSize) {
-        next -= kUserDictMissCacheSize;
-    }
-    if (next == cache->head) {
-        cache->head++;
-        if (cache->head >= kUserDictMissCacheSize) {
-            cache->head -= kUserDictMissCacheSize;
-        }
-    }
-    cache->tail = next;
-}
-
-void UserDict::reset_miss_cache() { memset(miss_caches_, 0, sizeof(miss_caches_)); }
-
-void UserDict::cache_init() {
-    reset_cache();
-    reset_miss_cache();
-}
-
-bool UserDict::cache_hit(UserDictSearchable *searchable, uint32 *offset, uint32 *length) {
-    bool hit = load_miss_cache(searchable);
-    if (hit) {
-        *offset = 0;
-        *length = 0;
-        return true;
-    }
-    hit = load_cache(searchable, offset, length);
-    if (hit) {
-        return true;
-    }
-    return false;
-}
-
-void UserDict::cache_push(UserDictCacheType type, UserDictSearchable *searchable, uint32 offset, uint32 length) {
-    switch (type) {
-        case USER_DICT_MISS_CACHE:
-            save_miss_cache(searchable);
-            break;
-        case USER_DICT_CACHE:
-            save_cache(searchable, offset, length);
-            break;
-        default:
-            break;
-    }
-}
-
-#endif
-
-void UserDict::defragment(void) {
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_BEGIN;
-#endif
-    if (is_valid_state() == false) return;
-    // Fixup offsets_, set REMOVE flag to lemma's flag if needed
-    size_t first_freed = 0;
-    size_t first_inuse = 0;
-    while (first_freed < dict_info_.lemma_count) {
-        // Find first freed offset
-        while ((offsets_[first_freed] & kUserDictOffsetFlagRemove) == 0 && first_freed < dict_info_.lemma_count) {
-            first_freed++;
-        }
-        if (first_freed < dict_info_.lemma_count) {
-            // Save REMOVE flag to lemma flag
-            int off = offsets_[first_freed];
-            set_lemma_flag(off, kUserDictLemmaFlagRemove);
-        } else {
-            break;
-        }
-        // Find first inuse offse after first_freed
-        first_inuse = first_freed + 1;
-        while ((offsets_[first_inuse] & kUserDictOffsetFlagRemove) && (first_inuse < dict_info_.lemma_count)) {
-            // Save REMOVE flag to lemma flag
-            int off = offsets_[first_inuse];
-            set_lemma_flag(off, kUserDictLemmaFlagRemove);
-            first_inuse++;
-        }
-        if (first_inuse >= dict_info_.lemma_count) {
-            break;
-        }
-        // Swap offsets_
-        int tmp = offsets_[first_inuse];
-        offsets_[first_inuse] = offsets_[first_freed];
-        offsets_[first_freed] = tmp;
-        // Move scores_, no need to swap
-        tmp = scores_[first_inuse];
-        scores_[first_inuse] = scores_[first_freed];
-        scores_[first_freed] = tmp;
-        // Swap ids_
-        LemmaIdType tmpid = ids_[first_inuse];
-        ids_[first_inuse] = ids_[first_freed];
-        ids_[first_freed] = tmpid;
-        // Go on
-        first_freed++;
-    }
-#ifdef ___PREDICT_ENABLED___
-    // Fixup predicts_
-    first_freed = 0;
-    first_inuse = 0;
-    while (first_freed < dict_info_.lemma_count) {
-        // Find first freed offset
-        while ((predicts_[first_freed] & kUserDictOffsetFlagRemove) == 0 && first_freed < dict_info_.lemma_count) {
-            first_freed++;
-        }
-        if (first_freed >= dict_info_.lemma_count) break;
-        // Find first inuse offse after first_freed
-        first_inuse = first_freed + 1;
-        while ((predicts_[first_inuse] & kUserDictOffsetFlagRemove) && (first_inuse < dict_info_.lemma_count)) {
-            first_inuse++;
-        }
-        if (first_inuse >= dict_info_.lemma_count) {
-            break;
-        }
-        // Swap offsets_
-        int tmp = predicts_[first_inuse];
-        predicts_[first_inuse] = predicts_[first_freed];
-        predicts_[first_freed] = tmp;
-        // Go on
-        first_freed++;
-    }
-#endif
-    dict_info_.lemma_count = first_freed;
-    // Fixup lemmas_
-    size_t begin = 0;
-    size_t end = 0;
-    size_t dst = 0;
-    int total_size = dict_info_.lemma_size + lemma_size_left_;
-    int total_count = dict_info_.lemma_count + lemma_count_left_;
-    size_t real_size = total_size - lemma_size_left_;
-    while (dst < real_size) {
-        unsigned char flag = get_lemma_flag(dst);
-        unsigned char nchr = get_lemma_nchar(dst);
-        if ((flag & kUserDictLemmaFlagRemove) == 0) {
-            dst += nchr * 4 + 2;
-            continue;
-        }
-        break;
-    }
-    if (dst >= real_size) return;
-
-    end = dst;
-    while (end < real_size) {
-        begin = end + get_lemma_nchar(end) * 4 + 2;
-    repeat:
-        // not used any more
-        if (begin >= real_size) break;
-        unsigned char flag = get_lemma_flag(begin);
-        unsigned char nchr = get_lemma_nchar(begin);
-        if (flag & kUserDictLemmaFlagRemove) {
-            begin += nchr * 4 + 2;
-            goto repeat;
-        }
-        end = begin + nchr * 4 + 2;
-        while (end < real_size) {
-            unsigned char eflag = get_lemma_flag(end);
-            unsigned char enchr = get_lemma_nchar(end);
-            if ((eflag & kUserDictLemmaFlagRemove) == 0) {
-                end += enchr * 4 + 2;
-                continue;
-            }
-            break;
-        }
-        memmove(lemmas_ + dst, lemmas_ + begin, end - begin);
-        for (size_t j = 0; j < dict_info_.lemma_count; j++) {
-            if (offsets_[j] >= begin && offsets_[j] < end) {
-                offsets_[j] -= (begin - dst);
-                offsets_by_id_[ids_[j] - start_id_] = offsets_[j];
-            }
-#ifdef ___PREDICT_ENABLED___
-            if (predicts_[j] >= begin && predicts_[j] < end) {
-                predicts_[j] -= (begin - dst);
-            }
-#endif
-        }
-#ifdef ___SYNC_ENABLED___
-        for (size_t j = 0; j < dict_info_.sync_count; j++) {
-            if (syncs_[j] >= begin && syncs_[j] < end) {
-                syncs_[j] -= (begin - dst);
-            }
-        }
-#endif
-        dst += (end - begin);
-    }
-
-    dict_info_.free_count = 0;
-    dict_info_.free_size = 0;
-    dict_info_.lemma_size = dst;
-    lemma_size_left_ = total_size - dict_info_.lemma_size;
-    lemma_count_left_ = total_count - dict_info_.lemma_count;
-
-    // XXX Without following code,
-    // offsets_by_id_ is not reordered.
-    // That's to say, all removed lemmas' ids are not collected back.
-    // There may not be room for addition of new lemmas due to
-    // offsests_by_id_ reason, although lemma_size_left_ is fixed.
-    // By default, we do want defrag as fast as possible, because
-    // during defrag procedure, other peers can not write new lemmas
-    // to user dictionary file.
-    // XXX If write-back is invoked immediately after
-    // this defragment, no need to fix up following in-mem data.
-    for (uint32 i = 0; i < dict_info_.lemma_count; i++) {
-        ids_[i] = start_id_ + i;
-        offsets_by_id_[i] = offsets_[i];
-    }
-
-    state_ = USER_DICT_DEFRAGMENTED;
-
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_END;
-    LOGD_PERF("defragment");
-#endif
-}
-
-#ifdef ___SYNC_ENABLED___
-void UserDict::clear_sync_lemmas(unsigned int start, unsigned int end) {
-    if (is_valid_state() == false) return;
-    if (end > dict_info_.sync_count) end = dict_info_.sync_count;
-    memmove(syncs_ + start, syncs_ + end, (dict_info_.sync_count - end) << 2);
-    dict_info_.sync_count -= (end - start);
-    if (state_ < USER_DICT_SYNC_DIRTY) state_ = USER_DICT_SYNC_DIRTY;
-}
-
-int UserDict::get_sync_count() {
-    if (is_valid_state() == false) return 0;
-    return dict_info_.sync_count;
-}
-
-LemmaIdType UserDict::put_lemma_no_sync(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt) {
-    int again = 0;
-begin:
-    LemmaIdType id;
-    uint32 *syncs_bak = syncs_;
-    syncs_ = NULL;
-    id = _put_lemma(lemma_str, splids, lemma_len, count, lmt);
-    syncs_ = syncs_bak;
-    if (id == 0 && again == 0) {
-        if ((dict_info_.limit_lemma_count > 0 && dict_info_.lemma_count >= dict_info_.limit_lemma_count) || (dict_info_.limit_lemma_size > 0 && dict_info_.lemma_size + (2 + (lemma_len << 2)) > dict_info_.limit_lemma_size)) {
-            // XXX Always reclaim and defrag in sync code path
-            //     sync thread is background thread and ok with heavy work
-            reclaim();
-            defragment();
-            flush_cache();
-            again = 1;
-            goto begin;
-        }
-    }
-    return id;
-}
-
-int UserDict::put_lemmas_no_sync_from_utf16le_string(char16 *lemmas, int len) {
-    int newly_added = 0;
-
-    SpellingParser *spl_parser = new SpellingParser();
-    if (!spl_parser) {
-        return 0;
-    }
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_BEGIN;
-#endif
-    char16 *ptr = lemmas;
-
-    // Extract pinyin,words,frequence,last_mod_time
-    char16 *p = ptr, *py16 = ptr;
-    char16 *hz16 = NULL;
-    int py16_len = 0;
-    uint16 splid[kMaxLemmaSize];
-    int splid_len = 0;
-    int hz16_len = 0;
-    char16 *fr16 = NULL;
-    int fr16_len = 0;
-
-    while (p - ptr < len) {
-        // Pinyin
-        py16 = p;
-        splid_len = 0;
-        while (*p != 0x2c && (p - ptr) < len) {
-            if (*p == 0x20) splid_len++;
-            p++;
-        }
-        splid_len++;
-        if (p - ptr == len) break;
-        py16_len = p - py16;
-        if (kMaxLemmaSize < splid_len) {
-            break;
-        }
-        bool is_pre;
-        int splidl = spl_parser->splstr16_to_idxs_f(py16, py16_len, splid, NULL, kMaxLemmaSize, is_pre);
-        if (splidl != splid_len) break;
-        // Phrase
-        hz16 = ++p;
-        while (*p != 0x2c && (p - ptr) < len) {
-            p++;
-        }
-        hz16_len = p - hz16;
-        if (hz16_len != splid_len) break;
-        // Frequency
-        fr16 = ++p;
-        fr16_len = 0;
-        while (*p != 0x2c && (p - ptr) < len) {
-            p++;
-        }
-        fr16_len = p - fr16;
-        uint32 intf = (uint32)utf16le_atoll(fr16, fr16_len);
-        // Last modified time
-        fr16 = ++p;
-        fr16_len = 0;
-        while (*p != 0x3b && (p - ptr) < len) {
-            p++;
-        }
-        fr16_len = p - fr16;
-        uint64 last_mod = utf16le_atoll(fr16, fr16_len);
-
-        put_lemma_no_sync(hz16, splid, splid_len, intf, last_mod);
-        newly_added++;
-
-        p++;
-    }
-
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_END;
-    LOGD_PERF("put_lemmas_no_sync_from_utf16le_string");
-#endif
-    return newly_added;
-}
-
-int UserDict::get_sync_lemmas_in_utf16le_string_from_beginning(char16 *str, int size, int *count) {
-    int len = 0;
-    *count = 0;
-
-    int left_len = size;
-
-    if (is_valid_state() == false) return len;
-
-    SpellingTrie *spl_trie = &SpellingTrie::get_instance();
-    if (!spl_trie) {
-        return 0;
-    }
-
-    uint32 i;
-    for (i = 0; i < dict_info_.sync_count; i++) {
-        int offset = syncs_[i];
-        uint32 nchar = get_lemma_nchar(offset);
-        uint16 *spl = get_lemma_spell_ids(offset);
-        uint16 *wrd = get_lemma_word(offset);
-        int score = _get_lemma_score(wrd, spl, nchar);
-
-        static char score_temp[32], *pscore_temp = score_temp;
-        static char16 temp[256], *ptemp = temp;
-
-        pscore_temp = score_temp;
-        ptemp = temp;
-
-        uint32 j;
-        // Add pinyin
-        for (j = 0; j < nchar; j++) {
-            int ret_len = spl_trie->get_spelling_str16(spl[j], ptemp, temp + sizeof(temp) - ptemp);
-            if (ret_len <= 0) break;
-            ptemp += ret_len;
-            if (ptemp < temp + sizeof(temp) - 1) {
-                *(ptemp++) = ' ';
-            } else {
-                j = 0;
-                break;
-            }
-        }
-        if (j < nchar) {
-            continue;
-        }
-        ptemp--;
-        if (ptemp < temp + sizeof(temp) - 1) {
-            *(ptemp++) = ',';
-        } else {
-            continue;
-        }
-        // Add phrase
-        for (j = 0; j < nchar; j++) {
-            if (ptemp < temp + sizeof(temp) - 1) {
-                *(ptemp++) = wrd[j];
-            } else {
-                break;
-            }
-        }
-        if (j < nchar) {
-            continue;
-        }
-        if (ptemp < temp + sizeof(temp) - 1) {
-            *(ptemp++) = ',';
-        } else {
-            continue;
-        }
-        // Add frequency
-        uint32 intf = extract_score_freq(score);
-        int ret_len = utf16le_lltoa(intf, ptemp, temp + sizeof(temp) - ptemp);
-        if (ret_len <= 0) continue;
-        ptemp += ret_len;
-        if (ptemp < temp + sizeof(temp) - 1) {
-            *(ptemp++) = ',';
-        } else {
-            continue;
-        }
-        // Add last modified time
-        uint64 last_mod = extract_score_lmt(score);
-        ret_len = utf16le_lltoa(last_mod, ptemp, temp + sizeof(temp) - ptemp);
-        if (ret_len <= 0) continue;
-        ptemp += ret_len;
-        if (ptemp < temp + sizeof(temp) - 1) {
-            *(ptemp++) = ';';
-        } else {
-            continue;
-        }
-
-        // Write to string
-        int need_len = ptemp - temp;
-        if (need_len > left_len) break;
-        memcpy(str + len, temp, need_len * 2);
-        left_len -= need_len;
-
-        len += need_len;
-        (*count)++;
-    }
-
-    if (len > 0) {
-        if (state_ < USER_DICT_SYNC_DIRTY) state_ = USER_DICT_SYNC_DIRTY;
-    }
-    return len;
-}
-
-#endif
-
-bool UserDict::state(UserDictStat *stat) {
-    if (is_valid_state() == false) return false;
-    if (!stat) return false;
-    stat->version = version_;
-    stat->file_name = dict_file_;
-    stat->load_time.tv_sec = load_time_.tv_sec;
-    stat->load_time.tv_usec = load_time_.tv_usec;
-    pthread_mutex_lock(&g_mutex_);
-    stat->last_update.tv_sec = g_last_update_.tv_sec;
-    stat->last_update.tv_usec = g_last_update_.tv_usec;
-    pthread_mutex_unlock(&g_mutex_);
-    stat->disk_size = get_dict_file_size(&dict_info_);
-    stat->lemma_count = dict_info_.lemma_count;
-    stat->lemma_size = dict_info_.lemma_size;
-    stat->delete_count = dict_info_.free_count;
-    stat->delete_size = dict_info_.free_size;
-#ifdef ___SYNC_ENABLED___
-    stat->sync_count = dict_info_.sync_count;
-#endif
-    stat->limit_lemma_count = dict_info_.limit_lemma_count;
-    stat->limit_lemma_size = dict_info_.limit_lemma_size;
-    stat->reclaim_ratio = dict_info_.reclaim_ratio;
-    return true;
-}
-
-void UserDict::set_limit(uint32 max_lemma_count, uint32 max_lemma_size, uint32 reclaim_ratio) {
-    dict_info_.limit_lemma_count = max_lemma_count;
-    dict_info_.limit_lemma_size = max_lemma_size;
-    if (reclaim_ratio > 100) reclaim_ratio = 100;
-    dict_info_.reclaim_ratio = reclaim_ratio;
-}
-
-void UserDict::reclaim() {
-    if (is_valid_state() == false) return;
-
-    switch (dict_info_.reclaim_ratio) {
-        case 0:
-            return;
-        case 100:
-            // TODO: CLEAR to be implemented
-            assert(false);
-            return;
-        default:
-            break;
-    }
-
-    // XXX Reclaim is only based on count, not size
-    uint32 count = dict_info_.lemma_count;
-    int rc = count * dict_info_.reclaim_ratio / 100;
-
-    UserDictScoreOffsetPair *score_offset_pairs = NULL;
-    score_offset_pairs = (UserDictScoreOffsetPair *)malloc(sizeof(UserDictScoreOffsetPair) * rc);
-    if (score_offset_pairs == NULL) {
-        return;
-    }
-
-    for (int i = 0; i < rc; i++) {
-        int s = scores_[i];
-        score_offset_pairs[i].score = s;
-        score_offset_pairs[i].offset_index = i;
-    }
-
-    for (int i = (rc + 1) / 2; i >= 0; i--) shift_down(score_offset_pairs, i, rc);
-
-    for (uint32 i = rc; i < dict_info_.lemma_count; i++) {
-        int s = scores_[i];
-        if (s < score_offset_pairs[0].score) {
-            score_offset_pairs[0].score = s;
-            score_offset_pairs[0].offset_index = i;
-            shift_down(score_offset_pairs, 0, rc);
-        }
-    }
-
-    for (int i = 0; i < rc; i++) {
-        int off = score_offset_pairs[i].offset_index;
-        remove_lemma_by_offset_index(off);
-    }
-    if (rc > 0) {
-        if (state_ < USER_DICT_OFFSET_DIRTY) state_ = USER_DICT_OFFSET_DIRTY;
-    }
-
-    free(score_offset_pairs);
-}
-
-inline void UserDict::swap(UserDictScoreOffsetPair *sop, int i, int j) {
-    int s = sop[i].score;
-    int p = sop[i].offset_index;
-    sop[i].score = sop[j].score;
-    sop[i].offset_index = sop[j].offset_index;
-    sop[j].score = s;
-    sop[j].offset_index = p;
-}
-
-void UserDict::shift_down(UserDictScoreOffsetPair *sop, int i, int n) {
-    int par = i;
-    while (par < n) {
-        int left = par * 2 + 1;
-        int right = left + 1;
-        if (left >= n && right >= n) break;
-        if (right >= n) {
-            if (sop[left].score > sop[par].score) {
-                swap(sop, left, par);
-                par = left;
-                continue;
-            }
-        } else if (sop[left].score > sop[right].score && sop[left].score > sop[par].score) {
-            swap(sop, left, par);
-            par = left;
-            continue;
-        } else if (sop[right].score > sop[left].score && sop[right].score > sop[par].score) {
-            swap(sop, right, par);
-            par = right;
-            continue;
-        }
-        break;
-    }
-}
-
-LemmaIdType UserDict::put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count) { return _put_lemma(lemma_str, splids, lemma_len, count, time(NULL)); }
-
-LemmaIdType UserDict::_put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt) {
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_BEGIN;
-#endif
-    if (is_valid_state() == false) return 0;
-    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
-    if (off != -1) {
-        int delta_score = count - scores_[off];
-        dict_info_.total_nfreq += delta_score;
-        scores_[off] = build_score(lmt, count);
-        if (state_ < USER_DICT_SCORE_DIRTY) state_ = USER_DICT_SCORE_DIRTY;
-#ifdef ___DEBUG_PERF___
-        DEBUG_PERF_END;
-        LOGD_PERF("_put_lemma(update)");
-#endif
-        return ids_[off];
-    } else {
-        if ((dict_info_.limit_lemma_count > 0 && dict_info_.lemma_count >= dict_info_.limit_lemma_count) || (dict_info_.limit_lemma_size > 0 && dict_info_.lemma_size + (2 + (lemma_len << 2)) > dict_info_.limit_lemma_size)) {
-            // XXX Don't defragment here, it's too time-consuming.
-            return 0;
-        }
-        int flushed = 0;
-        if (lemma_count_left_ == 0 || lemma_size_left_ < (size_t)(2 + (lemma_len << 2))) {
-            // XXX When there is no space for new lemma, we flush to disk
-            // flush_cache() may be called by upper user
-            // and better place shoule be found instead of here
-            flush_cache();
-            flushed = 1;
-            // Or simply return and do nothing
-            // return 0;
-        }
-#ifdef ___DEBUG_PERF___
-        DEBUG_PERF_END;
-        LOGD_PERF(flushed ? "_put_lemma(flush+add)" : "_put_lemma(add)");
-#endif
-        LemmaIdType id = append_a_lemma(lemma_str, splids, lemma_len, count, lmt);
-#ifdef ___SYNC_ENABLED___
-        if (syncs_ && id != 0) {
-            queue_lemma_for_sync(id);
-        }
-#endif
-        return id;
-    }
-    return 0;
-}
-
-#ifdef ___SYNC_ENABLED___
-void UserDict::queue_lemma_for_sync(LemmaIdType id) {
-    if (dict_info_.sync_count < sync_count_size_) {
-        syncs_[dict_info_.sync_count++] = offsets_by_id_[id - start_id_];
-    } else {
-        uint32 *syncs = (uint32 *)realloc(syncs_, (sync_count_size_ + kUserDictPreAlloc) << 2);
-        if (syncs) {
-            sync_count_size_ += kUserDictPreAlloc;
-            syncs_ = syncs;
-            syncs_[dict_info_.sync_count++] = offsets_by_id_[id - start_id_];
-        }
-    }
-}
-#endif
-
-LemmaIdType UserDict::update_lemma(LemmaIdType lemma_id, int16 delta_count, bool selected) {
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_BEGIN;
-#endif
-    if (is_valid_state() == false) return 0;
-    if (is_valid_lemma_id(lemma_id) == false) return 0;
-    uint32 offset = offsets_by_id_[lemma_id - start_id_];
-    uint8 lemma_len = get_lemma_nchar(offset);
-    char16 *lemma_str = get_lemma_word(offset);
-    uint16 *splids = get_lemma_spell_ids(offset);
-
-    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
-    if (off != -1) {
-        int score = scores_[off];
-        int count = extract_score_freq(score);
-        uint64 lmt = extract_score_lmt(score);
-        if (count + delta_count > kUserDictMaxFrequency || count + delta_count < count) {
-            delta_count = kUserDictMaxFrequency - count;
-        }
-        count += delta_count;
-        dict_info_.total_nfreq += delta_count;
-        if (selected) {
-            lmt = time(NULL);
-        }
-        scores_[off] = build_score(lmt, count);
-        if (state_ < USER_DICT_SCORE_DIRTY) state_ = USER_DICT_SCORE_DIRTY;
-#ifdef ___DEBUG_PERF___
-        DEBUG_PERF_END;
-        LOGD_PERF("update_lemma");
-#endif
-#ifdef ___SYNC_ENABLED___
-        queue_lemma_for_sync(ids_[off]);
-#endif
-        return ids_[off];
-    }
-    return 0;
-}
-
-size_t UserDict::get_total_lemma_count() { return dict_info_.total_nfreq; }
-
-void UserDict::set_total_lemma_count_of_others(size_t count) { total_other_nfreq_ = count; }
-
-LemmaIdType UserDict::append_a_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt) {
-    LemmaIdType id = get_max_lemma_id() + 1;
-    size_t offset = dict_info_.lemma_size;
-    if (offset > kUserDictOffsetMask) return 0;
-
-    lemmas_[offset] = 0;
-    lemmas_[offset + 1] = (uint8)lemma_len;
-    for (size_t i = 0; i < lemma_len; i++) {
-        *((uint16 *)&lemmas_[offset + 2 + (i << 1)]) = splids[i];
-        *((char16 *)&lemmas_[offset + 2 + (lemma_len << 1) + (i << 1)]) = lemma_str[i];
-    }
-    uint32 off = dict_info_.lemma_count;
-    offsets_[off] = offset;
-    scores_[off] = build_score(lmt, count);
-    ids_[off] = id;
-#ifdef ___PREDICT_ENABLED___
-    predicts_[off] = offset;
-#endif
-
-    offsets_by_id_[id - start_id_] = offset;
-
-    dict_info_.lemma_count++;
-    dict_info_.lemma_size += (2 + (lemma_len << 2));
-    lemma_count_left_--;
-    lemma_size_left_ -= (2 + (lemma_len << 2));
-
-    // Sort
-
-    UserDictSearchable searchable;
-    prepare_locate(&searchable, splids, lemma_len);
-
-    size_t i = 0;
-    while (i < off) {
-        offset = offsets_[i];
-        uint32 nchar = get_lemma_nchar(offset);
-        uint16 *spl = get_lemma_spell_ids(offset);
-
-        if (0 <= fuzzy_compare_spell_id(spl, nchar, &searchable)) break;
-        i++;
-    }
-    if (i != off) {
-        uint32 temp = offsets_[off];
-        memmove(offsets_ + i + 1, offsets_ + i, (off - i) << 2);
-        offsets_[i] = temp;
-
-        temp = scores_[off];
-        memmove(scores_ + i + 1, scores_ + i, (off - i) << 2);
-        scores_[i] = temp;
-
-        temp = ids_[off];
-        memmove(ids_ + i + 1, ids_ + i, (off - i) << 2);
-        ids_[i] = temp;
-    }
-
-#ifdef ___PREDICT_ENABLED___
-    uint32 j = 0;
-    uint16 *words_new = get_lemma_word(predicts_[off]);
-    j = locate_where_to_insert_in_predicts(words_new, lemma_len);
-    if (j != off) {
-        uint32 temp = predicts_[off];
-        memmove(predicts_ + j + 1, predicts_ + j, (off - j) << 2);
-        predicts_[j] = temp;
-    }
-#endif
-
-    if (state_ < USER_DICT_LEMMA_DIRTY) state_ = USER_DICT_LEMMA_DIRTY;
-
-#ifdef ___CACHE_ENABLED___
-    cache_init();
-#endif
-
-    dict_info_.total_nfreq += count;
-    return id;
-}
-}  // namespace ime_pinyin
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../include/userdict.h"
+#include "../include/splparser.h"
+#include "../include/ngram.h"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#ifdef _WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <assert.h>
+#include <ctype.h>
+#include <sys/types.h>
+#ifdef _WIN32
+#undef max
+#undef min
+#include <windows.h>
+#else
+#include <pthread.h>
+#endif
+#include <math.h>
+
+namespace ime_pinyin {
+
+#ifdef _WIN32
+static int gettimeofday(struct timeval *tp, void *) {
+    if (!tp) {
+        return -1;
+    }
+
+    // 获取当前时间的 FILETIME
+    FILETIME ft;
+    GetSystemTimeAsFileTime(&ft);
+
+    // 将 FILETIME 转换为 1970 年以来的时间（Unix 时间）
+    ULARGE_INTEGER ull;
+    ull.LowPart = ft.dwLowDateTime;
+    ull.HighPart = ft.dwHighDateTime;
+
+    // FILETIME 是从 1601 年 1 月 1 日开始的 100 纳秒单位时间
+    const long long WINDOWS_TICKS_PER_SEC = 10000000LL;  // 每秒 1e7 个 100 纳秒单位
+    const long long EPOCH_DIFFERENCE = 11644473600LL;    // Unix epoch 到 Windows epoch 的秒数差
+
+    long long unix_time_in_microseconds = ull.QuadPart / 10 - EPOCH_DIFFERENCE * 1000000LL;
+
+    // 填充 timeval 结构
+    tp->tv_sec = (long)(unix_time_in_microseconds / 1000000LL);
+    tp->tv_usec = (long)(unix_time_in_microseconds % 1000000LL);
+
+    return 0;
+}
+#endif
+
+#ifdef ___DEBUG_PERF___
+static uint64 _ellapse_ = 0;
+static struct timeval _tv_start_, _tv_end_;
+#define DEBUG_PERF_BEGIN                 \
+    do {                                 \
+        gettimeofday(&_tv_start_, NULL); \
+    } while (0)
+#define DEBUG_PERF_END                                                                                         \
+    do {                                                                                                       \
+        gettimeofday(&_tv_end_, NULL);                                                                         \
+        _ellapse_ = (_tv_end_.tv_sec - _tv_start_.tv_sec) * 1000000 + (_tv_end_.tv_usec - _tv_start_.tv_usec); \
+    } while (0)
+#define LOGD_PERF(message) ALOGD("PERFORMANCE[%s] %llu usec.", message, _ellapse_);
+#else
+#define DEBUG_PERF_BEGIN
+#define DEBUG_PERF_END
+#define LOGD_PERF(message)
+#endif
+
+// XXX File load and write are thread-safe by g_mutex_
+#ifdef _WIN32
+static CRITICAL_SECTION g_mutex_;  // 使用 Windows 的临界区对象
+#define pthread_mutex_lock(MUTEX) EnterCriticalSection(MUTEX)
+#define pthread_mutex_unlock(MUTEX) LeaveCriticalSection(MUTEX)
+#define pthread_mutex_trylock(MUTEX) (TryEnterCriticalSection(MUTEX) != 0)
+#else
+#include <pthread.h>
+static pthread_mutex_t g_mutex_ = PTHREAD_MUTEX_INITIALIZER;
+#endif
+static struct timeval g_last_update_ = {0, 0};
+
+inline uint32 UserDict::get_dict_file_size(UserDictInfo *info) {
+    return (4 + info->lemma_size + (info->lemma_count << 3)
+#ifdef ___PREDICT_ENABLED___
+            + (info->lemma_count << 2)
+#endif
+#ifdef ___SYNC_ENABLED___
+            + (info->sync_count << 2)
+#endif
+            + sizeof(*info));
+}
+
+inline LmaScoreType UserDict::translate_score(int raw_score) {
+    // 1) ori_freq: original user frequency
+    uint32 ori_freq = extract_score_freq(raw_score);
+    // 2) lmt_off: lmt index (week offset for example)
+    uint64 lmt_off = ((raw_score & 0xffff0000) >> 16);
+    if (kUserDictLMTBitWidth < 16) {
+        uint64 mask = ~(1 << kUserDictLMTBitWidth);
+        lmt_off &= mask;
+    }
+    // 3) now_off: current time index (current week offset for example)
+    // assuming load_time_ is around current time
+    uint64 now_off = load_time_.tv_sec;
+    now_off = (now_off - kUserDictLMTSince) / kUserDictLMTGranularity;
+    now_off = (now_off << (64 - kUserDictLMTBitWidth));
+    now_off = (now_off >> (64 - kUserDictLMTBitWidth));
+    // 4) factor: decide expand-factor
+    int delta = now_off - lmt_off;
+    if (delta > 4) delta = 4;
+    int factor = 80 - (delta << 4);
+
+    double tf = (double)(dict_info_.total_nfreq + total_other_nfreq_);
+    return (LmaScoreType)(log((double)factor * (double)ori_freq / tf) * NGram::kLogValueAmplifier);
+}
+
+inline int UserDict::extract_score_freq(int raw_score) {
+    // Frequence stored in lowest 16 bits
+    int freq = (raw_score & 0x0000ffff);
+    return freq;
+}
+
+inline uint64 UserDict::extract_score_lmt(int raw_score) {
+    uint64 lmt = ((raw_score & 0xffff0000) >> 16);
+    if (kUserDictLMTBitWidth < 16) {
+        uint64 mask = ~(1 << kUserDictLMTBitWidth);
+        lmt &= mask;
+    }
+    lmt = lmt * kUserDictLMTGranularity + kUserDictLMTSince;
+    return lmt;
+}
+
+inline int UserDict::build_score(uint64 lmt, int freq) {
+    lmt = (lmt - kUserDictLMTSince) / kUserDictLMTGranularity;
+    lmt = (lmt << (64 - kUserDictLMTBitWidth));
+    lmt = (lmt >> (64 - kUserDictLMTBitWidth));
+    uint16 lmt16 = (uint16)lmt;
+    int s = freq;
+    s &= 0x0000ffff;
+    s = (lmt16 << 16) | s;
+    return s;
+}
+
+inline int64 UserDict::utf16le_atoll(uint16 *s, int len) {
+    int64 ret = 0;
+    if (len <= 0) return ret;
+
+    int flag = 1;
+    const uint16 *endp = s + len;
+    if (*s == '-') {
+        flag = -1;
+        s++;
+    } else if (*s == '+') {
+        s++;
+    }
+
+    while (*s >= '0' && *s <= '9' && s < endp) {
+        ret += ret * 10 + (*s) - '0';
+        s++;
+    }
+    return ret * flag;
+}
+
+inline int UserDict::utf16le_lltoa(int64 v, uint16 *s, int size) {
+    if (!s || size <= 0) return 0;
+    uint16 *endp = s + size;
+    int ret_len = 0;
+    if (v < 0) {
+        *(s++) = '-';
+        ++ret_len;
+        v *= -1;
+    }
+
+    uint16 *b = s;
+    while (s < endp && v != 0) {
+        *(s++) = '0' + (v % 10);
+        v = v / 10;
+        ++ret_len;
+    }
+
+    if (v != 0) return 0;
+
+    --s;
+
+    while (b < s) {
+        *b = *s;
+        ++b, --s;
+    }
+
+    return ret_len;
+}
+
+inline void UserDict::set_lemma_flag(uint32 offset, uint8 flag) {
+    offset &= kUserDictOffsetMask;
+    lemmas_[offset] |= flag;
+}
+
+inline char UserDict::get_lemma_flag(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    return (char)(lemmas_[offset]);
+}
+
+inline char UserDict::get_lemma_nchar(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    return (char)(lemmas_[offset + 1]);
+}
+
+inline uint16 *UserDict::get_lemma_spell_ids(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    return (uint16 *)(lemmas_ + offset + 2);
+}
+
+inline uint16 *UserDict::get_lemma_word(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    uint8 nchar = get_lemma_nchar(offset);
+    return (uint16 *)(lemmas_ + offset + 2 + (nchar << 1));
+}
+
+inline LemmaIdType UserDict::get_max_lemma_id() {
+    // When a lemma is deleted, we don't not claim its id back for
+    // simplicity and performance
+    return start_id_ + dict_info_.lemma_count - 1;
+}
+
+inline bool UserDict::is_valid_lemma_id(LemmaIdType id) {
+    if (id >= start_id_ && id <= get_max_lemma_id()) return true;
+    return false;
+}
+
+inline bool UserDict::is_valid_state() {
+    if (state_ == USER_DICT_NONE) return false;
+    return true;
+}
+
+UserDict::UserDict()
+    : start_id_(0),
+      version_(0),
+      lemmas_(NULL),
+      offsets_(NULL),
+      scores_(NULL),
+      ids_(NULL),
+#ifdef ___PREDICT_ENABLED___
+      predicts_(NULL),
+#endif
+#ifdef ___SYNC_ENABLED___
+      syncs_(NULL),
+      sync_count_size_(0),
+#endif
+      offsets_by_id_(NULL),
+      lemma_count_left_(0),
+      lemma_size_left_(0),
+      dict_file_(NULL),
+      state_(USER_DICT_NONE) {
+    memset(&dict_info_, 0, sizeof(dict_info_));
+    memset(&load_time_, 0, sizeof(load_time_));
+#ifdef ___CACHE_ENABLED___
+    cache_init();
+#endif
+}
+
+UserDict::~UserDict() { close_dict(); }
+
+bool UserDict::load_dict(const char *file_name, LemmaIdType start_id, LemmaIdType end_id) {
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    dict_file_ = strdup(file_name);
+    if (!dict_file_) return false;
+
+    start_id_ = start_id;
+
+    if (false == validate(file_name) && false == reset(file_name)) {
+        goto error;
+    }
+    if (false == load(file_name, start_id)) {
+        goto error;
+    }
+
+    state_ = USER_DICT_SYNC;
+
+    gettimeofday(&load_time_, NULL);
+
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_END;
+    LOGD_PERF("load_dict");
+#endif
+    return true;
+error:
+    free((void *)dict_file_);
+    dict_file_ = NULL;
+    start_id_ = 0;
+    return false;
+}
+
+bool UserDict::close_dict() {
+    if (state_ == USER_DICT_NONE) return true;
+    if (state_ == USER_DICT_SYNC) goto out;
+
+    // If dictionary is written back by others,
+    // we can not simply write back here
+    // To do a safe flush, we have to discard all newly added
+    // lemmas and try to reload dict file.
+    pthread_mutex_lock(&g_mutex_);
+    if (load_time_.tv_sec > g_last_update_.tv_sec || (load_time_.tv_sec == g_last_update_.tv_sec && load_time_.tv_usec > g_last_update_.tv_usec)) {
+        write_back();
+        gettimeofday(&g_last_update_, NULL);
+    }
+    pthread_mutex_unlock(&g_mutex_);
+
+out:
+    free((void *)dict_file_);
+    free(lemmas_);
+    free(offsets_);
+    free(offsets_by_id_);
+    free(scores_);
+    free(ids_);
+#ifdef ___PREDICT_ENABLED___
+    free(predicts_);
+#endif
+
+    version_ = 0;
+    dict_file_ = NULL;
+    lemmas_ = NULL;
+#ifdef ___SYNC_ENABLED___
+    syncs_ = NULL;
+    sync_count_size_ = 0;
+#endif
+    offsets_ = NULL;
+    offsets_by_id_ = NULL;
+    scores_ = NULL;
+    ids_ = NULL;
+#ifdef ___PREDICT_ENABLED___
+    predicts_ = NULL;
+#endif
+
+    memset(&dict_info_, 0, sizeof(dict_info_));
+    lemma_count_left_ = 0;
+    lemma_size_left_ = 0;
+    state_ = USER_DICT_NONE;
+
+    return true;
+}
+
+size_t UserDict::number_of_lemmas() { return dict_info_.lemma_count; }
+
+void UserDict::reset_milestones(uint16 from_step, MileStoneHandle from_handle) { return; }
+
+MileStoneHandle UserDict::extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num) {
+    if (is_valid_state() == false) return 0;
+
+    bool need_extend = false;
+
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    *lpi_num = _get_lpis(dep->splids, dep->splids_extended + 1, lpi_items, lpi_max, &need_extend);
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_END;
+    LOGD_PERF("extend_dict");
+#endif
+    return ((*lpi_num > 0 || need_extend) ? 1 : 0);
+}
+
+int UserDict::is_fuzzy_prefix_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable) {
+    if (len1 < searchable->splids_len) return 0;
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    uint32 i = 0;
+    for (i = 0; i < searchable->splids_len; i++) {
+        const char py1 = *spl_trie.get_spelling_str(id1[i]);
+        uint16 off = 8 * (i % 4);
+        const char py2 = ((searchable->signature[i / 4] & (0xff << off)) >> off);
+        if (py1 == py2) continue;
+        return 0;
+    }
+    return 1;
+}
+
+int UserDict::fuzzy_compare_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable) {
+    if (len1 < searchable->splids_len) return -1;
+    if (len1 > searchable->splids_len) return 1;
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    uint32 i = 0;
+    for (i = 0; i < len1; i++) {
+        const char py1 = *spl_trie.get_spelling_str(id1[i]);
+        uint16 off = 8 * (i % 4);
+        const char py2 = ((searchable->signature[i / 4] & (0xff << off)) >> off);
+        if (py1 == py2) continue;
+        if (py1 > py2) return 1;
+        return -1;
+    }
+    return 0;
+}
+
+bool UserDict::is_prefix_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable) {
+    if (fulllen < searchable->splids_len) return false;
+
+    uint32 i = 0;
+    for (; i < searchable->splids_len; i++) {
+        uint16 start_id = searchable->splid_start[i];
+        uint16 count = searchable->splid_count[i];
+        if (fullids[i] >= start_id && fullids[i] < start_id + count)
+            continue;
+        else
+            return false;
+    }
+    return true;
+}
+
+bool UserDict::equal_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable) {
+    if (fulllen != searchable->splids_len) return false;
+
+    uint32 i = 0;
+    for (; i < fulllen; i++) {
+        uint16 start_id = searchable->splid_start[i];
+        uint16 count = searchable->splid_count[i];
+        if (fullids[i] >= start_id && fullids[i] < start_id + count)
+            continue;
+        else
+            return false;
+    }
+    return true;
+}
+
+int32 UserDict::locate_first_in_offsets(const UserDictSearchable *searchable) {
+    int32 begin = 0;
+    int32 end = dict_info_.lemma_count - 1;
+    int32 middle = -1;
+
+    int32 first_prefix = middle;
+    int32 last_matched = middle;
+
+    while (begin <= end) {
+        middle = (begin + end) >> 1;
+        uint32 offset = offsets_[middle];
+        uint8 nchar = get_lemma_nchar(offset);
+        const uint16 *splids = get_lemma_spell_ids(offset);
+        int cmp = fuzzy_compare_spell_id(splids, nchar, searchable);
+        int pre = is_fuzzy_prefix_spell_id(splids, nchar, searchable);
+
+        if (pre) first_prefix = middle;
+
+        if (cmp < 0) {
+            begin = middle + 1;
+        } else if (cmp > 0) {
+            end = middle - 1;
+        } else {
+            end = middle - 1;
+            last_matched = middle;
+        }
+    }
+
+    return first_prefix;
+}
+
+void UserDict::prepare_locate(UserDictSearchable *searchable, const uint16 *splid_str, uint16 splid_str_len) {
+    searchable->splids_len = splid_str_len;
+    memset(searchable->signature, 0, sizeof(searchable->signature));
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    uint32 i = 0;
+    for (; i < splid_str_len; i++) {
+        if (spl_trie.is_half_id(splid_str[i])) {
+            searchable->splid_count[i] = spl_trie.half_to_full(splid_str[i], &(searchable->splid_start[i]));
+        } else {
+            searchable->splid_count[i] = 1;
+            searchable->splid_start[i] = splid_str[i];
+        }
+        const unsigned char py = *spl_trie.get_spelling_str(splid_str[i]);
+        searchable->signature[i >> 2] |= (py << (8 * (i % 4)));
+    }
+}
+
+size_t UserDict::get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max) { return _get_lpis(splid_str, splid_str_len, lpi_items, lpi_max, NULL); }
+
+size_t UserDict::_get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend) {
+    bool tmp_extend;
+    if (!need_extend) need_extend = &tmp_extend;
+
+    *need_extend = false;
+
+    if (is_valid_state() == false) return 0;
+    if (lpi_max <= 0) return 0;
+
+    if (0 == pthread_mutex_trylock(&g_mutex_)) {
+        if (load_time_.tv_sec < g_last_update_.tv_sec || (load_time_.tv_sec == g_last_update_.tv_sec && load_time_.tv_usec < g_last_update_.tv_usec)) {
+            // Others updated disk file, have to reload
+            pthread_mutex_unlock(&g_mutex_);
+            flush_cache();
+        } else {
+            pthread_mutex_unlock(&g_mutex_);
+        }
+    } else {
+    }
+
+    UserDictSearchable searchable;
+    prepare_locate(&searchable, splid_str, splid_str_len);
+
+    uint32 max_off = dict_info_.lemma_count;
+#ifdef ___CACHE_ENABLED___
+    int32 middle;
+    uint32 start, count;
+    bool cached = cache_hit(&searchable, &start, &count);
+    if (cached) {
+        middle = start;
+        max_off = start + count;
+    } else {
+        middle = locate_first_in_offsets(&searchable);
+        start = middle;
+    }
+#else
+    int32 middle = locate_first_in_offsets(&searchable);
+#endif
+
+    if (middle == -1) {
+#ifdef ___CACHE_ENABLED___
+        if (!cached) cache_push(USER_DICT_MISS_CACHE, &searchable, 0, 0);
+#endif
+        return 0;
+    }
+
+    size_t lpi_current = 0;
+
+    bool fuzzy_break = false;
+    bool prefix_break = false;
+    while ((size_t)middle < max_off && !fuzzy_break && !prefix_break) {
+        if (lpi_current >= lpi_max) break;
+        uint32 offset = offsets_[middle];
+        // Ignore deleted lemmas
+        if (offset & kUserDictOffsetFlagRemove) {
+            middle++;
+            continue;
+        }
+        uint8 nchar = get_lemma_nchar(offset);
+        uint16 *splids = get_lemma_spell_ids(offset);
+#ifdef ___CACHE_ENABLED___
+        if (!cached && 0 != fuzzy_compare_spell_id(splids, nchar, &searchable)) {
+#else
+        if (0 != fuzzy_compare_spell_id(splids, nchar, &searchable)) {
+#endif
+            fuzzy_break = true;
+        }
+
+        if (prefix_break == false) {
+            if (is_fuzzy_prefix_spell_id(splids, nchar, &searchable)) {
+                if (*need_extend == false && is_prefix_spell_id(splids, nchar, &searchable)) {
+                    *need_extend = true;
+                }
+            } else {
+                prefix_break = true;
+            }
+        }
+
+        if (equal_spell_id(splids, nchar, &searchable) == true) {
+            lpi_items[lpi_current].psb = translate_score(scores_[middle]);
+            lpi_items[lpi_current].id = ids_[middle];
+            lpi_items[lpi_current].lma_len = nchar;
+            lpi_current++;
+        }
+        middle++;
+    }
+
+#ifdef ___CACHE_ENABLED___
+    if (!cached) {
+        count = middle - start;
+        cache_push(USER_DICT_CACHE, &searchable, start, count);
+    }
+#endif
+
+    return lpi_current;
+}
+
+uint16 UserDict::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max) {
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(id_lemma) == false) return 0;
+    uint32 offset = offsets_by_id_[id_lemma - start_id_];
+    uint8 nchar = get_lemma_nchar(offset);
+    char16 *str = get_lemma_word(offset);
+    uint16 m = nchar < str_max - 1 ? nchar : str_max - 1;
+    int i = 0;
+    for (; i < m; i++) {
+        str_buf[i] = str[i];
+    }
+    str_buf[i] = 0;
+    return m;
+}
+
+uint16 UserDict::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid) {
+    if (is_valid_lemma_id(id_lemma) == false) return 0;
+    uint32 offset = offsets_by_id_[id_lemma - start_id_];
+    uint8 nchar = get_lemma_nchar(offset);
+    const uint16 *ids = get_lemma_spell_ids(offset);
+    int i = 0;
+    for (; i < nchar && i < splids_max; i++) splids[i] = ids[i];
+    return i;
+}
+
+size_t UserDict::predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used) {
+    uint32 new_added = 0;
+#ifdef ___PREDICT_ENABLED___
+    int32 end = dict_info_.lemma_count - 1;
+    int j = locate_first_in_predicts((const uint16 *)last_hzs, hzs_len);
+    if (j == -1) return 0;
+
+    while (j <= end) {
+        uint32 offset = predicts_[j];
+        // Ignore deleted lemmas
+        if (offset & kUserDictOffsetFlagRemove) {
+            j++;
+            continue;
+        }
+        uint32 nchar = get_lemma_nchar(offset);
+        uint16 *words = get_lemma_word(offset);
+        uint16 *splids = get_lemma_spell_ids(offset);
+
+        if (nchar <= hzs_len) {
+            j++;
+            continue;
+        }
+
+        if (memcmp(words, last_hzs, hzs_len << 1) == 0) {
+            if (new_added >= npre_max) {
+                return new_added;
+            }
+            uint32 cpy_len = (nchar < kMaxPredictSize ? (nchar << 1) : (kMaxPredictSize << 1)) - (hzs_len << 1);
+            npre_items[new_added].his_len = hzs_len;
+            npre_items[new_added].psb = get_lemma_score(words, splids, nchar);
+            memcpy(npre_items[new_added].pre_hzs, words + hzs_len, cpy_len);
+            if ((cpy_len >> 1) < kMaxPredictSize) {
+                npre_items[new_added].pre_hzs[cpy_len >> 1] = 0;
+            }
+            new_added++;
+        } else {
+            break;
+        }
+
+        j++;
+    }
+#endif
+    return new_added;
+}
+
+int32 UserDict::locate_in_offsets(char16 lemma_str[], uint16 splid_str[], uint16 lemma_len) {
+    int32 max_off = dict_info_.lemma_count;
+
+    UserDictSearchable searchable;
+    prepare_locate(&searchable, splid_str, lemma_len);
+#ifdef ___CACHE_ENABLED___
+    int32 off;
+    uint32 start, count;
+    bool cached = load_cache(&searchable, &start, &count);
+    if (cached) {
+        off = start;
+        max_off = start + count;
+    } else {
+        off = locate_first_in_offsets(&searchable);
+        start = off;
+    }
+#else
+    int32 off = locate_first_in_offsets(&searchable);
+#endif
+
+    if (off == -1) {
+        return off;
+    }
+
+    while (off < max_off) {
+        uint32 offset = offsets_[off];
+        if (offset & kUserDictOffsetFlagRemove) {
+            off++;
+            continue;
+        }
+        uint16 *splids = get_lemma_spell_ids(offset);
+#ifdef ___CACHE_ENABLED___
+        if (!cached && 0 != fuzzy_compare_spell_id(splids, lemma_len, &searchable)) break;
+#else
+        if (0 != fuzzy_compare_spell_id(splids, lemma_len, &searchable)) break;
+#endif
+        if (equal_spell_id(splids, lemma_len, &searchable) == true) {
+            uint16 *str = get_lemma_word(offset);
+            uint32 i = 0;
+            for (i = 0; i < lemma_len; i++) {
+                if (str[i] == lemma_str[i]) continue;
+                break;
+            }
+            if (i < lemma_len) {
+                off++;
+                continue;
+            }
+#ifdef ___CACHE_ENABLED___
+            // No need to save_cache here, since current function is invoked by
+            // put_lemma. It's rarely possible for a user input same lemma twice.
+            // That means first time user type a new lemma, it is newly added into
+            // user dictionary, then it's possible that user type the same lemma
+            // again.
+            // Another reason save_cache can not be invoked here is this function
+            // aborts when lemma is found, and it never knows the count.
+#endif
+            return off;
+        }
+        off++;
+    }
+
+    return -1;
+}
+
+#ifdef ___PREDICT_ENABLED___
+uint32 UserDict::locate_where_to_insert_in_predicts(const uint16 *words, int lemma_len) {
+    int32 begin = 0;
+    int32 end = dict_info_.lemma_count - 1;
+    int32 middle = end;
+
+    uint32 last_matched = middle;
+
+    while (begin <= end) {
+        middle = (begin + end) >> 1;
+        uint32 offset = offsets_[middle];
+        uint8 nchar = get_lemma_nchar(offset);
+        const uint16 *ws = get_lemma_word(offset);
+
+        uint32 minl = nchar < lemma_len ? nchar : lemma_len;
+        uint32 k = 0;
+        int cmp = 0;
+
+        for (; k < minl; k++) {
+            if (ws[k] < words[k]) {
+                cmp = -1;
+                break;
+            } else if (ws[k] > words[k]) {
+                cmp = 1;
+                break;
+            }
+        }
+        if (cmp == 0) {
+            if (nchar < lemma_len)
+                cmp = -1;
+            else if (nchar > lemma_len)
+                cmp = 1;
+        }
+
+        if (cmp < 0) {
+            begin = middle + 1;
+            last_matched = middle;
+        } else if (cmp > 0) {
+            end = middle - 1;
+        } else {
+            end = middle - 1;
+            last_matched = middle;
+        }
+    }
+
+    return last_matched;
+}
+
+int32 UserDict::locate_first_in_predicts(const uint16 *words, int lemma_len) {
+    int32 begin = 0;
+    int32 end = dict_info_.lemma_count - 1;
+    int32 middle = -1;
+
+    int32 last_matched = middle;
+
+    while (begin <= end) {
+        middle = (begin + end) >> 1;
+        uint32 offset = offsets_[middle];
+        uint8 nchar = get_lemma_nchar(offset);
+        const uint16 *ws = get_lemma_word(offset);
+
+        uint32 minl = nchar < lemma_len ? nchar : lemma_len;
+        uint32 k = 0;
+        int cmp = 0;
+
+        for (; k < minl; k++) {
+            if (ws[k] < words[k]) {
+                cmp = -1;
+                break;
+            } else if (ws[k] > words[k]) {
+                cmp = 1;
+                break;
+            }
+        }
+        if (cmp == 0) {
+            if (nchar >= lemma_len) last_matched = middle;
+            if (nchar < lemma_len)
+                cmp = -1;
+            else if (nchar > lemma_len)
+                cmp = 1;
+        }
+
+        if (cmp < 0) {
+            begin = middle + 1;
+        } else if (cmp > 0) {
+            end = middle - 1;
+        } else {
+            end = middle - 1;
+        }
+    }
+
+    return last_matched;
+}
+
+#endif
+
+LemmaIdType UserDict::get_lemma_id(char16 lemma_str[], uint16 splids[], uint16 lemma_len) {
+    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
+    if (off == -1) {
+        return 0;
+    }
+
+    return ids_[off];
+}
+
+LmaScoreType UserDict::get_lemma_score(LemmaIdType lemma_id) {
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(lemma_id) == false) return 0;
+
+    return translate_score(_get_lemma_score(lemma_id));
+}
+
+LmaScoreType UserDict::get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len) {
+    if (is_valid_state() == false) return 0;
+    return translate_score(_get_lemma_score(lemma_str, splids, lemma_len));
+}
+
+int UserDict::_get_lemma_score(LemmaIdType lemma_id) {
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(lemma_id) == false) return 0;
+
+    uint32 offset = offsets_by_id_[lemma_id - start_id_];
+
+    uint32 nchar = get_lemma_nchar(offset);
+    uint16 *spl = get_lemma_spell_ids(offset);
+    uint16 *wrd = get_lemma_word(offset);
+
+    int32 off = locate_in_offsets(wrd, spl, nchar);
+    if (off == -1) {
+        return 0;
+    }
+
+    return scores_[off];
+}
+
+int UserDict::_get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len) {
+    if (is_valid_state() == false) return 0;
+
+    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
+    if (off == -1) {
+        return 0;
+    }
+
+    return scores_[off];
+}
+
+#ifdef ___SYNC_ENABLED___
+void UserDict::remove_lemma_from_sync_list(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    uint32 i = 0;
+    for (; i < dict_info_.sync_count; i++) {
+        unsigned int off = (syncs_[i] & kUserDictOffsetMask);
+        if (off == offset) break;
+    }
+    if (i < dict_info_.sync_count) {
+        syncs_[i] = syncs_[dict_info_.sync_count - 1];
+        dict_info_.sync_count--;
+    }
+}
+#endif
+
+#ifdef ___PREDICT_ENABLED___
+void UserDict::remove_lemma_from_predict_list(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    uint32 i = 0;
+    for (; i < dict_info_.lemma_count; i++) {
+        unsigned int off = (predicts_[i] & kUserDictOffsetMask);
+        if (off == offset) {
+            predicts_[i] |= kUserDictOffsetFlagRemove;
+            break;
+        }
+    }
+}
+#endif
+
+bool UserDict::remove_lemma_by_offset_index(int offset_index) {
+    if (is_valid_state() == false) return 0;
+
+    int32 off = offset_index;
+    if (off == -1) {
+        return false;
+    }
+
+    uint32 offset = offsets_[off];
+    uint32 nchar = get_lemma_nchar(offset);
+
+    offsets_[off] |= kUserDictOffsetFlagRemove;
+
+#ifdef ___SYNC_ENABLED___
+    // Remove corresponding sync item
+    remove_lemma_from_sync_list(offset);
+#endif
+
+#ifdef ___PREDICT_ENABLED___
+    remove_lemma_from_predict_list(offset);
+#endif
+    dict_info_.free_count++;
+    dict_info_.free_size += (2 + (nchar << 2));
+
+    if (state_ < USER_DICT_OFFSET_DIRTY) state_ = USER_DICT_OFFSET_DIRTY;
+    return true;
+}
+
+bool UserDict::remove_lemma(LemmaIdType lemma_id) {
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(lemma_id) == false) return false;
+    uint32 offset = offsets_by_id_[lemma_id - start_id_];
+
+    uint32 nchar = get_lemma_nchar(offset);
+    uint16 *spl = get_lemma_spell_ids(offset);
+    uint16 *wrd = get_lemma_word(offset);
+
+    int32 off = locate_in_offsets(wrd, spl, nchar);
+
+    return remove_lemma_by_offset_index(off);
+}
+
+void UserDict::flush_cache() {
+    LemmaIdType start_id = start_id_;
+    if (!dict_file_) return;
+    const char *file = strdup(dict_file_);
+    if (!file) return;
+    close_dict();
+    load_dict(file, start_id, kUserDictIdEnd);
+    free((void *)file);
+#ifdef ___CACHE_ENABLED___
+    cache_init();
+#endif
+    return;
+}
+
+bool UserDict::reset(const char *file) {
+    FILE *fp = fopen(file, "w+");
+    if (!fp) {
+        return false;
+    }
+    uint32 version = kUserDictVersion;
+    size_t wred = fwrite(&version, 1, 4, fp);
+    UserDictInfo info;
+    memset(&info, 0, sizeof(info));
+    // By default, no limitation for lemma count and size
+    // thereby, reclaim_ratio is never used
+    wred += fwrite(&info, 1, sizeof(info), fp);
+    if (wred != sizeof(info) + sizeof(version)) {
+        fclose(fp);
+        unlink(file);
+        return false;
+    }
+    fclose(fp);
+    return true;
+}
+
+bool UserDict::validate(const char *file) {
+    // b is ignored in POSIX compatible os including Linux
+    // while b is important flag for Windows to specify binary mode
+    FILE *fp = fopen(file, "rb");
+    if (!fp) {
+        return false;
+    }
+
+    size_t size;
+    size_t readed;
+    uint32 version;
+    UserDictInfo dict_info;
+
+    // validate
+    int err = fseek(fp, 0, SEEK_END);
+    if (err) {
+        goto error;
+    }
+
+    size = ftell(fp);
+    if (size < 4 + sizeof(dict_info)) {
+        goto error;
+    }
+
+    err = fseek(fp, 0, SEEK_SET);
+    if (err) {
+        goto error;
+    }
+
+    readed = fread(&version, 1, sizeof(version), fp);
+    if (readed < sizeof(version)) {
+        goto error;
+    }
+    if (version != kUserDictVersion) {
+        goto error;
+    }
+
+    err = fseek(fp, -1 * sizeof(dict_info), SEEK_END);
+    if (err) {
+        goto error;
+    }
+
+    readed = fread(&dict_info, 1, sizeof(dict_info), fp);
+    if (readed != sizeof(dict_info)) {
+        goto error;
+    }
+
+    if (size != get_dict_file_size(&dict_info)) {
+        goto error;
+    }
+
+    fclose(fp);
+    return true;
+
+error:
+    fclose(fp);
+    return false;
+}
+
+bool UserDict::load(const char *file, LemmaIdType start_id) {
+    if (0 != pthread_mutex_trylock(&g_mutex_)) {
+        return false;
+    }
+    // b is ignored in POSIX compatible os including Linux
+    // while b is important flag for Windows to specify binary mode
+    FILE *fp = fopen(file, "rb");
+    if (!fp) {
+        pthread_mutex_unlock(&g_mutex_);
+        return false;
+    }
+
+    size_t readed, toread;
+    UserDictInfo dict_info;
+    uint8 *lemmas = NULL;
+    uint32 *offsets = NULL;
+#ifdef ___SYNC_ENABLED___
+    uint32 *syncs = NULL;
+#endif
+    uint32 *scores = NULL;
+    uint32 *ids = NULL;
+    uint32 *offsets_by_id = NULL;
+#ifdef ___PREDICT_ENABLED___
+    uint32 *predicts = NULL;
+#endif
+    size_t i;
+    int err;
+
+    err = fseek(fp, -1 * sizeof(dict_info), SEEK_END);
+    if (err) goto error;
+
+    readed = fread(&dict_info, 1, sizeof(dict_info), fp);
+    if (readed != sizeof(dict_info)) goto error;
+
+    lemmas = (uint8 *)malloc(dict_info.lemma_size + (kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2))));
+
+    if (!lemmas) goto error;
+
+    offsets = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!offsets) goto error;
+
+#ifdef ___PREDICT_ENABLED___
+    predicts = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!predicts) goto error;
+#endif
+
+#ifdef ___SYNC_ENABLED___
+    syncs = (uint32 *)malloc((dict_info.sync_count + kUserDictPreAlloc) << 2);
+    if (!syncs) goto error;
+#endif
+
+    scores = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!scores) goto error;
+
+    ids = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!ids) goto error;
+
+    offsets_by_id = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!offsets_by_id) goto error;
+
+    err = fseek(fp, 4, SEEK_SET);
+    if (err) goto error;
+
+    readed = 0;
+    while (readed < dict_info.lemma_size && !ferror(fp) && !feof(fp)) {
+        readed += fread(lemmas + readed, 1, dict_info.lemma_size - readed, fp);
+    }
+    if (readed < dict_info.lemma_size) goto error;
+
+    toread = (dict_info.lemma_count << 2);
+    readed = 0;
+    while (readed < toread && !ferror(fp) && !feof(fp)) {
+        readed += fread((((uint8 *)offsets) + readed), 1, toread - readed, fp);
+    }
+    if (readed < toread) goto error;
+
+#ifdef ___PREDICT_ENABLED___
+    toread = (dict_info.lemma_count << 2);
+    readed = 0;
+    while (readed < toread && !ferror(fp) && !feof(fp)) {
+        readed += fread((((uint8 *)predicts) + readed), 1, toread - readed, fp);
+    }
+    if (readed < toread) goto error;
+#endif
+
+    readed = 0;
+    while (readed < toread && !ferror(fp) && !feof(fp)) {
+        readed += fread((((uint8 *)scores) + readed), 1, toread - readed, fp);
+    }
+    if (readed < toread) goto error;
+
+#ifdef ___SYNC_ENABLED___
+    toread = (dict_info.sync_count << 2);
+    readed = 0;
+    while (readed < toread && !ferror(fp) && !feof(fp)) {
+        readed += fread((((uint8 *)syncs) + readed), 1, toread - readed, fp);
+    }
+    if (readed < toread) goto error;
+#endif
+
+    for (i = 0; i < dict_info.lemma_count; i++) {
+        ids[i] = start_id + i;
+        offsets_by_id[i] = offsets[i];
+    }
+
+    lemmas_ = lemmas;
+    offsets_ = offsets;
+#ifdef ___SYNC_ENABLED___
+    syncs_ = syncs;
+    sync_count_size_ = dict_info.sync_count + kUserDictPreAlloc;
+#endif
+    offsets_by_id_ = offsets_by_id;
+    scores_ = scores;
+    ids_ = ids;
+#ifdef ___PREDICT_ENABLED___
+    predicts_ = predicts;
+#endif
+    lemma_count_left_ = kUserDictPreAlloc;
+    lemma_size_left_ = kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2));
+    memcpy(&dict_info_, &dict_info, sizeof(dict_info));
+    state_ = USER_DICT_SYNC;
+
+    fclose(fp);
+
+    pthread_mutex_unlock(&g_mutex_);
+    return true;
+
+error:
+    if (lemmas) free(lemmas);
+    if (offsets) free(offsets);
+#ifdef ___SYNC_ENABLED___
+    if (syncs) free(syncs);
+#endif
+    if (scores) free(scores);
+    if (ids) free(ids);
+    if (offsets_by_id) free(offsets_by_id);
+#ifdef ___PREDICT_ENABLED___
+    if (predicts) free(predicts);
+#endif
+    fclose(fp);
+    pthread_mutex_unlock(&g_mutex_);
+    return false;
+}
+
+void UserDict::write_back() {
+    // XXX write back is only allowed from close_dict due to thread-safe sake
+    if (state_ == USER_DICT_NONE || state_ == USER_DICT_SYNC) return;
+    int fd = open(dict_file_, O_WRONLY);
+    if (fd == -1) return;
+    switch (state_) {
+        case USER_DICT_DEFRAGMENTED:
+            write_back_all(fd);
+            break;
+        case USER_DICT_LEMMA_DIRTY:
+            write_back_lemma(fd);
+            break;
+        case USER_DICT_OFFSET_DIRTY:
+            write_back_offset(fd);
+            break;
+        case USER_DICT_SCORE_DIRTY:
+            write_back_score(fd);
+            break;
+#ifdef ___SYNC_ENABLED___
+        case USER_DICT_SYNC_DIRTY:
+            write_back_sync(fd);
+            break;
+#endif
+        default:
+            break;
+    }
+    // It seems truncate is not need on Linux, Windows except Mac
+    // I am doing it here anyway for safety.
+    off_t cur = lseek(fd, 0, SEEK_CUR);
+#ifndef _WIN32
+    ftruncate(fd, cur);
+#endif
+    close(fd);
+    state_ = USER_DICT_SYNC;
+}
+
+#ifdef ___SYNC_ENABLED___
+void UserDict::write_back_sync(int fd) {
+    int err = lseek(fd,
+                    4 + dict_info_.lemma_size + (dict_info_.lemma_count << 3)
+#ifdef ___PREDICT_ENABLED___
+                        + (dict_info_.lemma_count << 2)
+#endif
+                        ,
+                    SEEK_SET);
+    if (err == -1) return;
+    write(fd, syncs_, dict_info_.sync_count << 2);
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+#endif
+
+void UserDict::write_back_offset(int fd) {
+    int err = lseek(fd, 4 + dict_info_.lemma_size, SEEK_SET);
+    if (err == -1) return;
+    write(fd, offsets_, dict_info_.lemma_count << 2);
+#ifdef ___PREDICT_ENABLED___
+    write(fd, predicts_, dict_info_.lemma_count << 2);
+#endif
+    write(fd, scores_, dict_info_.lemma_count << 2);
+#ifdef ___SYNC_ENABLED___
+    write(fd, syncs_, dict_info_.sync_count << 2);
+#endif
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+
+void UserDict::write_back_score(int fd) {
+    int err = lseek(fd,
+                    4 + dict_info_.lemma_size + (dict_info_.lemma_count << 2)
+#ifdef ___PREDICT_ENABLED___
+                        + (dict_info_.lemma_count << 2)
+#endif
+                        ,
+                    SEEK_SET);
+    if (err == -1) return;
+    write(fd, scores_, dict_info_.lemma_count << 2);
+#ifdef ___SYNC_ENABLED___
+    write(fd, syncs_, dict_info_.sync_count << 2);
+#endif
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+
+void UserDict::write_back_lemma(int fd) {
+    int err = lseek(fd, 4, SEEK_SET);
+    if (err == -1) return;
+    // New lemmas are always appended, no need to write whole lemma block
+    size_t need_write = kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2)) - lemma_size_left_;
+    err = lseek(fd, dict_info_.lemma_size - need_write, SEEK_CUR);
+    if (err == -1) return;
+    write(fd, lemmas_ + dict_info_.lemma_size - need_write, need_write);
+
+    write(fd, offsets_, dict_info_.lemma_count << 2);
+#ifdef ___PREDICT_ENABLED___
+    write(fd, predicts_, dict_info_.lemma_count << 2);
+#endif
+    write(fd, scores_, dict_info_.lemma_count << 2);
+#ifdef ___SYNC_ENABLED___
+    write(fd, syncs_, dict_info_.sync_count << 2);
+#endif
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+
+void UserDict::write_back_all(int fd) {
+    // XXX lemma_size is handled differently in writeall
+    // and writelemma. I update lemma_size and lemma_count in different
+    // places for these two cases. Should fix it to make it consistent.
+    int err = lseek(fd, 4, SEEK_SET);
+    if (err == -1) return;
+    write(fd, lemmas_, dict_info_.lemma_size);
+    write(fd, offsets_, dict_info_.lemma_count << 2);
+#ifdef ___PREDICT_ENABLED___
+    write(fd, predicts_, dict_info_.lemma_count << 2);
+#endif
+    write(fd, scores_, dict_info_.lemma_count << 2);
+#ifdef ___SYNC_ENABLED___
+    write(fd, syncs_, dict_info_.sync_count << 2);
+#endif
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+
+#ifdef ___CACHE_ENABLED___
+bool UserDict::load_cache(UserDictSearchable *searchable, uint32 *offset, uint32 *length) {
+    UserDictCache *cache = &caches_[searchable->splids_len - 1];
+    if (cache->head == cache->tail) return false;
+
+    uint16 j, sig_len = kMaxLemmaSize / 4;
+    uint16 i = cache->head;
+    while (1) {
+        j = 0;
+        for (; j < sig_len; j++) {
+            if (cache->signatures[i][j] != searchable->signature[j]) break;
+        }
+        if (j < sig_len) {
+            i++;
+            if (i >= kUserDictCacheSize) i -= kUserDictCacheSize;
+            if (i == cache->tail) break;
+            continue;
+        }
+        *offset = cache->offsets[i];
+        *length = cache->lengths[i];
+        return true;
+    }
+    return false;
+}
+
+void UserDict::save_cache(UserDictSearchable *searchable, uint32 offset, uint32 length) {
+    UserDictCache *cache = &caches_[searchable->splids_len - 1];
+    uint16 next = cache->tail;
+
+    cache->offsets[next] = offset;
+    cache->lengths[next] = length;
+    uint16 sig_len = kMaxLemmaSize / 4;
+    uint16 j = 0;
+    for (; j < sig_len; j++) {
+        cache->signatures[next][j] = searchable->signature[j];
+    }
+
+    if (++next >= kUserDictCacheSize) {
+        next -= kUserDictCacheSize;
+    }
+    if (next == cache->head) {
+        cache->head++;
+        if (cache->head >= kUserDictCacheSize) {
+            cache->head -= kUserDictCacheSize;
+        }
+    }
+    cache->tail = next;
+}
+
+void UserDict::reset_cache() { memset(caches_, 0, sizeof(caches_)); }
+
+bool UserDict::load_miss_cache(UserDictSearchable *searchable) {
+    UserDictMissCache *cache = &miss_caches_[searchable->splids_len - 1];
+    if (cache->head == cache->tail) return false;
+
+    uint16 j, sig_len = kMaxLemmaSize / 4;
+    uint16 i = cache->head;
+    while (1) {
+        j = 0;
+        for (; j < sig_len; j++) {
+            if (cache->signatures[i][j] != searchable->signature[j]) break;
+        }
+        if (j < sig_len) {
+            i++;
+            if (i >= kUserDictMissCacheSize) i -= kUserDictMissCacheSize;
+            if (i == cache->tail) break;
+            continue;
+        }
+        return true;
+    }
+    return false;
+}
+
+void UserDict::save_miss_cache(UserDictSearchable *searchable) {
+    UserDictMissCache *cache = &miss_caches_[searchable->splids_len - 1];
+    uint16 next = cache->tail;
+
+    uint16 sig_len = kMaxLemmaSize / 4;
+    uint16 j = 0;
+    for (; j < sig_len; j++) {
+        cache->signatures[next][j] = searchable->signature[j];
+    }
+
+    if (++next >= kUserDictMissCacheSize) {
+        next -= kUserDictMissCacheSize;
+    }
+    if (next == cache->head) {
+        cache->head++;
+        if (cache->head >= kUserDictMissCacheSize) {
+            cache->head -= kUserDictMissCacheSize;
+        }
+    }
+    cache->tail = next;
+}
+
+void UserDict::reset_miss_cache() { memset(miss_caches_, 0, sizeof(miss_caches_)); }
+
+void UserDict::cache_init() {
+    reset_cache();
+    reset_miss_cache();
+}
+
+bool UserDict::cache_hit(UserDictSearchable *searchable, uint32 *offset, uint32 *length) {
+    bool hit = load_miss_cache(searchable);
+    if (hit) {
+        *offset = 0;
+        *length = 0;
+        return true;
+    }
+    hit = load_cache(searchable, offset, length);
+    if (hit) {
+        return true;
+    }
+    return false;
+}
+
+void UserDict::cache_push(UserDictCacheType type, UserDictSearchable *searchable, uint32 offset, uint32 length) {
+    switch (type) {
+        case USER_DICT_MISS_CACHE:
+            save_miss_cache(searchable);
+            break;
+        case USER_DICT_CACHE:
+            save_cache(searchable, offset, length);
+            break;
+        default:
+            break;
+    }
+}
+
+#endif
+
+void UserDict::defragment(void) {
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    if (is_valid_state() == false) return;
+    // Fixup offsets_, set REMOVE flag to lemma's flag if needed
+    size_t first_freed = 0;
+    size_t first_inuse = 0;
+    while (first_freed < dict_info_.lemma_count) {
+        // Find first freed offset
+        while ((offsets_[first_freed] & kUserDictOffsetFlagRemove) == 0 && first_freed < dict_info_.lemma_count) {
+            first_freed++;
+        }
+        if (first_freed < dict_info_.lemma_count) {
+            // Save REMOVE flag to lemma flag
+            int off = offsets_[first_freed];
+            set_lemma_flag(off, kUserDictLemmaFlagRemove);
+        } else {
+            break;
+        }
+        // Find first inuse offse after first_freed
+        first_inuse = first_freed + 1;
+        while ((offsets_[first_inuse] & kUserDictOffsetFlagRemove) && (first_inuse < dict_info_.lemma_count)) {
+            // Save REMOVE flag to lemma flag
+            int off = offsets_[first_inuse];
+            set_lemma_flag(off, kUserDictLemmaFlagRemove);
+            first_inuse++;
+        }
+        if (first_inuse >= dict_info_.lemma_count) {
+            break;
+        }
+        // Swap offsets_
+        int tmp = offsets_[first_inuse];
+        offsets_[first_inuse] = offsets_[first_freed];
+        offsets_[first_freed] = tmp;
+        // Move scores_, no need to swap
+        tmp = scores_[first_inuse];
+        scores_[first_inuse] = scores_[first_freed];
+        scores_[first_freed] = tmp;
+        // Swap ids_
+        LemmaIdType tmpid = ids_[first_inuse];
+        ids_[first_inuse] = ids_[first_freed];
+        ids_[first_freed] = tmpid;
+        // Go on
+        first_freed++;
+    }
+#ifdef ___PREDICT_ENABLED___
+    // Fixup predicts_
+    first_freed = 0;
+    first_inuse = 0;
+    while (first_freed < dict_info_.lemma_count) {
+        // Find first freed offset
+        while ((predicts_[first_freed] & kUserDictOffsetFlagRemove) == 0 && first_freed < dict_info_.lemma_count) {
+            first_freed++;
+        }
+        if (first_freed >= dict_info_.lemma_count) break;
+        // Find first inuse offse after first_freed
+        first_inuse = first_freed + 1;
+        while ((predicts_[first_inuse] & kUserDictOffsetFlagRemove) && (first_inuse < dict_info_.lemma_count)) {
+            first_inuse++;
+        }
+        if (first_inuse >= dict_info_.lemma_count) {
+            break;
+        }
+        // Swap offsets_
+        int tmp = predicts_[first_inuse];
+        predicts_[first_inuse] = predicts_[first_freed];
+        predicts_[first_freed] = tmp;
+        // Go on
+        first_freed++;
+    }
+#endif
+    dict_info_.lemma_count = first_freed;
+    // Fixup lemmas_
+    size_t begin = 0;
+    size_t end = 0;
+    size_t dst = 0;
+    int total_size = dict_info_.lemma_size + lemma_size_left_;
+    int total_count = dict_info_.lemma_count + lemma_count_left_;
+    size_t real_size = total_size - lemma_size_left_;
+    while (dst < real_size) {
+        unsigned char flag = get_lemma_flag(dst);
+        unsigned char nchr = get_lemma_nchar(dst);
+        if ((flag & kUserDictLemmaFlagRemove) == 0) {
+            dst += nchr * 4 + 2;
+            continue;
+        }
+        break;
+    }
+    if (dst >= real_size) return;
+
+    end = dst;
+    while (end < real_size) {
+        begin = end + get_lemma_nchar(end) * 4 + 2;
+    repeat:
+        // not used any more
+        if (begin >= real_size) break;
+        unsigned char flag = get_lemma_flag(begin);
+        unsigned char nchr = get_lemma_nchar(begin);
+        if (flag & kUserDictLemmaFlagRemove) {
+            begin += nchr * 4 + 2;
+            goto repeat;
+        }
+        end = begin + nchr * 4 + 2;
+        while (end < real_size) {
+            unsigned char eflag = get_lemma_flag(end);
+            unsigned char enchr = get_lemma_nchar(end);
+            if ((eflag & kUserDictLemmaFlagRemove) == 0) {
+                end += enchr * 4 + 2;
+                continue;
+            }
+            break;
+        }
+        memmove(lemmas_ + dst, lemmas_ + begin, end - begin);
+        for (size_t j = 0; j < dict_info_.lemma_count; j++) {
+            if (offsets_[j] >= begin && offsets_[j] < end) {
+                offsets_[j] -= (begin - dst);
+                offsets_by_id_[ids_[j] - start_id_] = offsets_[j];
+            }
+#ifdef ___PREDICT_ENABLED___
+            if (predicts_[j] >= begin && predicts_[j] < end) {
+                predicts_[j] -= (begin - dst);
+            }
+#endif
+        }
+#ifdef ___SYNC_ENABLED___
+        for (size_t j = 0; j < dict_info_.sync_count; j++) {
+            if (syncs_[j] >= begin && syncs_[j] < end) {
+                syncs_[j] -= (begin - dst);
+            }
+        }
+#endif
+        dst += (end - begin);
+    }
+
+    dict_info_.free_count = 0;
+    dict_info_.free_size = 0;
+    dict_info_.lemma_size = dst;
+    lemma_size_left_ = total_size - dict_info_.lemma_size;
+    lemma_count_left_ = total_count - dict_info_.lemma_count;
+
+    // XXX Without following code,
+    // offsets_by_id_ is not reordered.
+    // That's to say, all removed lemmas' ids are not collected back.
+    // There may not be room for addition of new lemmas due to
+    // offsests_by_id_ reason, although lemma_size_left_ is fixed.
+    // By default, we do want defrag as fast as possible, because
+    // during defrag procedure, other peers can not write new lemmas
+    // to user dictionary file.
+    // XXX If write-back is invoked immediately after
+    // this defragment, no need to fix up following in-mem data.
+    for (uint32 i = 0; i < dict_info_.lemma_count; i++) {
+        ids_[i] = start_id_ + i;
+        offsets_by_id_[i] = offsets_[i];
+    }
+
+    state_ = USER_DICT_DEFRAGMENTED;
+
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_END;
+    LOGD_PERF("defragment");
+#endif
+}
+
+#ifdef ___SYNC_ENABLED___
+void UserDict::clear_sync_lemmas(unsigned int start, unsigned int end) {
+    if (is_valid_state() == false) return;
+    if (end > dict_info_.sync_count) end = dict_info_.sync_count;
+    memmove(syncs_ + start, syncs_ + end, (dict_info_.sync_count - end) << 2);
+    dict_info_.sync_count -= (end - start);
+    if (state_ < USER_DICT_SYNC_DIRTY) state_ = USER_DICT_SYNC_DIRTY;
+}
+
+int UserDict::get_sync_count() {
+    if (is_valid_state() == false) return 0;
+    return dict_info_.sync_count;
+}
+
+LemmaIdType UserDict::put_lemma_no_sync(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt) {
+    int again = 0;
+begin:
+    LemmaIdType id;
+    uint32 *syncs_bak = syncs_;
+    syncs_ = NULL;
+    id = _put_lemma(lemma_str, splids, lemma_len, count, lmt);
+    syncs_ = syncs_bak;
+    if (id == 0 && again == 0) {
+        if ((dict_info_.limit_lemma_count > 0 && dict_info_.lemma_count >= dict_info_.limit_lemma_count) || (dict_info_.limit_lemma_size > 0 && dict_info_.lemma_size + (2 + (lemma_len << 2)) > dict_info_.limit_lemma_size)) {
+            // XXX Always reclaim and defrag in sync code path
+            //     sync thread is background thread and ok with heavy work
+            reclaim();
+            defragment();
+            flush_cache();
+            again = 1;
+            goto begin;
+        }
+    }
+    return id;
+}
+
+int UserDict::put_lemmas_no_sync_from_utf16le_string(char16 *lemmas, int len) {
+    int newly_added = 0;
+
+    SpellingParser *spl_parser = new SpellingParser();
+    if (!spl_parser) {
+        return 0;
+    }
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    char16 *ptr = lemmas;
+
+    // Extract pinyin,words,frequence,last_mod_time
+    char16 *p = ptr, *py16 = ptr;
+    char16 *hz16 = NULL;
+    int py16_len = 0;
+    uint16 splid[kMaxLemmaSize];
+    int splid_len = 0;
+    int hz16_len = 0;
+    char16 *fr16 = NULL;
+    int fr16_len = 0;
+
+    while (p - ptr < len) {
+        // Pinyin
+        py16 = p;
+        splid_len = 0;
+        while (*p != 0x2c && (p - ptr) < len) {
+            if (*p == 0x20) splid_len++;
+            p++;
+        }
+        splid_len++;
+        if (p - ptr == len) break;
+        py16_len = p - py16;
+        if (kMaxLemmaSize < splid_len) {
+            break;
+        }
+        bool is_pre;
+        int splidl = spl_parser->splstr16_to_idxs_f(py16, py16_len, splid, NULL, kMaxLemmaSize, is_pre);
+        if (splidl != splid_len) break;
+        // Phrase
+        hz16 = ++p;
+        while (*p != 0x2c && (p - ptr) < len) {
+            p++;
+        }
+        hz16_len = p - hz16;
+        if (hz16_len != splid_len) break;
+        // Frequency
+        fr16 = ++p;
+        fr16_len = 0;
+        while (*p != 0x2c && (p - ptr) < len) {
+            p++;
+        }
+        fr16_len = p - fr16;
+        uint32 intf = (uint32)utf16le_atoll(fr16, fr16_len);
+        // Last modified time
+        fr16 = ++p;
+        fr16_len = 0;
+        while (*p != 0x3b && (p - ptr) < len) {
+            p++;
+        }
+        fr16_len = p - fr16;
+        uint64 last_mod = utf16le_atoll(fr16, fr16_len);
+
+        put_lemma_no_sync(hz16, splid, splid_len, intf, last_mod);
+        newly_added++;
+
+        p++;
+    }
+
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_END;
+    LOGD_PERF("put_lemmas_no_sync_from_utf16le_string");
+#endif
+    return newly_added;
+}
+
+int UserDict::get_sync_lemmas_in_utf16le_string_from_beginning(char16 *str, int size, int *count) {
+    int len = 0;
+    *count = 0;
+
+    int left_len = size;
+
+    if (is_valid_state() == false) return len;
+
+    SpellingTrie *spl_trie = &SpellingTrie::get_instance();
+    if (!spl_trie) {
+        return 0;
+    }
+
+    uint32 i;
+    for (i = 0; i < dict_info_.sync_count; i++) {
+        int offset = syncs_[i];
+        uint32 nchar = get_lemma_nchar(offset);
+        uint16 *spl = get_lemma_spell_ids(offset);
+        uint16 *wrd = get_lemma_word(offset);
+        int score = _get_lemma_score(wrd, spl, nchar);
+
+        static char score_temp[32], *pscore_temp = score_temp;
+        static char16 temp[256], *ptemp = temp;
+
+        pscore_temp = score_temp;
+        ptemp = temp;
+
+        uint32 j;
+        // Add pinyin
+        for (j = 0; j < nchar; j++) {
+            int ret_len = spl_trie->get_spelling_str16(spl[j], ptemp, temp + sizeof(temp) - ptemp);
+            if (ret_len <= 0) break;
+            ptemp += ret_len;
+            if (ptemp < temp + sizeof(temp) - 1) {
+                *(ptemp++) = ' ';
+            } else {
+                j = 0;
+                break;
+            }
+        }
+        if (j < nchar) {
+            continue;
+        }
+        ptemp--;
+        if (ptemp < temp + sizeof(temp) - 1) {
+            *(ptemp++) = ',';
+        } else {
+            continue;
+        }
+        // Add phrase
+        for (j = 0; j < nchar; j++) {
+            if (ptemp < temp + sizeof(temp) - 1) {
+                *(ptemp++) = wrd[j];
+            } else {
+                break;
+            }
+        }
+        if (j < nchar) {
+            continue;
+        }
+        if (ptemp < temp + sizeof(temp) - 1) {
+            *(ptemp++) = ',';
+        } else {
+            continue;
+        }
+        // Add frequency
+        uint32 intf = extract_score_freq(score);
+        int ret_len = utf16le_lltoa(intf, ptemp, temp + sizeof(temp) - ptemp);
+        if (ret_len <= 0) continue;
+        ptemp += ret_len;
+        if (ptemp < temp + sizeof(temp) - 1) {
+            *(ptemp++) = ',';
+        } else {
+            continue;
+        }
+        // Add last modified time
+        uint64 last_mod = extract_score_lmt(score);
+        ret_len = utf16le_lltoa(last_mod, ptemp, temp + sizeof(temp) - ptemp);
+        if (ret_len <= 0) continue;
+        ptemp += ret_len;
+        if (ptemp < temp + sizeof(temp) - 1) {
+            *(ptemp++) = ';';
+        } else {
+            continue;
+        }
+
+        // Write to string
+        int need_len = ptemp - temp;
+        if (need_len > left_len) break;
+        memcpy(str + len, temp, need_len * 2);
+        left_len -= need_len;
+
+        len += need_len;
+        (*count)++;
+    }
+
+    if (len > 0) {
+        if (state_ < USER_DICT_SYNC_DIRTY) state_ = USER_DICT_SYNC_DIRTY;
+    }
+    return len;
+}
+
+#endif
+
+bool UserDict::state(UserDictStat *stat) {
+    if (is_valid_state() == false) return false;
+    if (!stat) return false;
+    stat->version = version_;
+    stat->file_name = dict_file_;
+    stat->load_time.tv_sec = load_time_.tv_sec;
+    stat->load_time.tv_usec = load_time_.tv_usec;
+    pthread_mutex_lock(&g_mutex_);
+    stat->last_update.tv_sec = g_last_update_.tv_sec;
+    stat->last_update.tv_usec = g_last_update_.tv_usec;
+    pthread_mutex_unlock(&g_mutex_);
+    stat->disk_size = get_dict_file_size(&dict_info_);
+    stat->lemma_count = dict_info_.lemma_count;
+    stat->lemma_size = dict_info_.lemma_size;
+    stat->delete_count = dict_info_.free_count;
+    stat->delete_size = dict_info_.free_size;
+#ifdef ___SYNC_ENABLED___
+    stat->sync_count = dict_info_.sync_count;
+#endif
+    stat->limit_lemma_count = dict_info_.limit_lemma_count;
+    stat->limit_lemma_size = dict_info_.limit_lemma_size;
+    stat->reclaim_ratio = dict_info_.reclaim_ratio;
+    return true;
+}
+
+void UserDict::set_limit(uint32 max_lemma_count, uint32 max_lemma_size, uint32 reclaim_ratio) {
+    dict_info_.limit_lemma_count = max_lemma_count;
+    dict_info_.limit_lemma_size = max_lemma_size;
+    if (reclaim_ratio > 100) reclaim_ratio = 100;
+    dict_info_.reclaim_ratio = reclaim_ratio;
+}
+
+void UserDict::reclaim() {
+    if (is_valid_state() == false) return;
+
+    switch (dict_info_.reclaim_ratio) {
+        case 0:
+            return;
+        case 100:
+            // TODO: CLEAR to be implemented
+            assert(false);
+            return;
+        default:
+            break;
+    }
+
+    // XXX Reclaim is only based on count, not size
+    uint32 count = dict_info_.lemma_count;
+    int rc = count * dict_info_.reclaim_ratio / 100;
+
+    UserDictScoreOffsetPair *score_offset_pairs = NULL;
+    score_offset_pairs = (UserDictScoreOffsetPair *)malloc(sizeof(UserDictScoreOffsetPair) * rc);
+    if (score_offset_pairs == NULL) {
+        return;
+    }
+
+    for (int i = 0; i < rc; i++) {
+        int s = scores_[i];
+        score_offset_pairs[i].score = s;
+        score_offset_pairs[i].offset_index = i;
+    }
+
+    for (int i = (rc + 1) / 2; i >= 0; i--) shift_down(score_offset_pairs, i, rc);
+
+    for (uint32 i = rc; i < dict_info_.lemma_count; i++) {
+        int s = scores_[i];
+        if (s < score_offset_pairs[0].score) {
+            score_offset_pairs[0].score = s;
+            score_offset_pairs[0].offset_index = i;
+            shift_down(score_offset_pairs, 0, rc);
+        }
+    }
+
+    for (int i = 0; i < rc; i++) {
+        int off = score_offset_pairs[i].offset_index;
+        remove_lemma_by_offset_index(off);
+    }
+    if (rc > 0) {
+        if (state_ < USER_DICT_OFFSET_DIRTY) state_ = USER_DICT_OFFSET_DIRTY;
+    }
+
+    free(score_offset_pairs);
+}
+
+inline void UserDict::swap(UserDictScoreOffsetPair *sop, int i, int j) {
+    int s = sop[i].score;
+    int p = sop[i].offset_index;
+    sop[i].score = sop[j].score;
+    sop[i].offset_index = sop[j].offset_index;
+    sop[j].score = s;
+    sop[j].offset_index = p;
+}
+
+void UserDict::shift_down(UserDictScoreOffsetPair *sop, int i, int n) {
+    int par = i;
+    while (par < n) {
+        int left = par * 2 + 1;
+        int right = left + 1;
+        if (left >= n && right >= n) break;
+        if (right >= n) {
+            if (sop[left].score > sop[par].score) {
+                swap(sop, left, par);
+                par = left;
+                continue;
+            }
+        } else if (sop[left].score > sop[right].score && sop[left].score > sop[par].score) {
+            swap(sop, left, par);
+            par = left;
+            continue;
+        } else if (sop[right].score > sop[left].score && sop[right].score > sop[par].score) {
+            swap(sop, right, par);
+            par = right;
+            continue;
+        }
+        break;
+    }
+}
+
+LemmaIdType UserDict::put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count) { return _put_lemma(lemma_str, splids, lemma_len, count, time(NULL)); }
+
+LemmaIdType UserDict::_put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt) {
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    if (is_valid_state() == false) return 0;
+    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
+    if (off != -1) {
+        int delta_score = count - scores_[off];
+        dict_info_.total_nfreq += delta_score;
+        scores_[off] = build_score(lmt, count);
+        if (state_ < USER_DICT_SCORE_DIRTY) state_ = USER_DICT_SCORE_DIRTY;
+#ifdef ___DEBUG_PERF___
+        DEBUG_PERF_END;
+        LOGD_PERF("_put_lemma(update)");
+#endif
+        return ids_[off];
+    } else {
+        if ((dict_info_.limit_lemma_count > 0 && dict_info_.lemma_count >= dict_info_.limit_lemma_count) || (dict_info_.limit_lemma_size > 0 && dict_info_.lemma_size + (2 + (lemma_len << 2)) > dict_info_.limit_lemma_size)) {
+            // XXX Don't defragment here, it's too time-consuming.
+            return 0;
+        }
+        int flushed = 0;
+        if (lemma_count_left_ == 0 || lemma_size_left_ < (size_t)(2 + (lemma_len << 2))) {
+            // XXX When there is no space for new lemma, we flush to disk
+            // flush_cache() may be called by upper user
+            // and better place shoule be found instead of here
+            flush_cache();
+            flushed = 1;
+            // Or simply return and do nothing
+            // return 0;
+        }
+#ifdef ___DEBUG_PERF___
+        DEBUG_PERF_END;
+        LOGD_PERF(flushed ? "_put_lemma(flush+add)" : "_put_lemma(add)");
+#endif
+        LemmaIdType id = append_a_lemma(lemma_str, splids, lemma_len, count, lmt);
+#ifdef ___SYNC_ENABLED___
+        if (syncs_ && id != 0) {
+            queue_lemma_for_sync(id);
+        }
+#endif
+        return id;
+    }
+    return 0;
+}
+
+#ifdef ___SYNC_ENABLED___
+void UserDict::queue_lemma_for_sync(LemmaIdType id) {
+    if (dict_info_.sync_count < sync_count_size_) {
+        syncs_[dict_info_.sync_count++] = offsets_by_id_[id - start_id_];
+    } else {
+        uint32 *syncs = (uint32 *)realloc(syncs_, (sync_count_size_ + kUserDictPreAlloc) << 2);
+        if (syncs) {
+            sync_count_size_ += kUserDictPreAlloc;
+            syncs_ = syncs;
+            syncs_[dict_info_.sync_count++] = offsets_by_id_[id - start_id_];
+        }
+    }
+}
+#endif
+
+LemmaIdType UserDict::update_lemma(LemmaIdType lemma_id, int16 delta_count, bool selected) {
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(lemma_id) == false) return 0;
+    uint32 offset = offsets_by_id_[lemma_id - start_id_];
+    uint8 lemma_len = get_lemma_nchar(offset);
+    char16 *lemma_str = get_lemma_word(offset);
+    uint16 *splids = get_lemma_spell_ids(offset);
+
+    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
+    if (off != -1) {
+        int score = scores_[off];
+        int count = extract_score_freq(score);
+        uint64 lmt = extract_score_lmt(score);
+        if (count + delta_count > kUserDictMaxFrequency || count + delta_count < count) {
+            delta_count = kUserDictMaxFrequency - count;
+        }
+        count += delta_count;
+        dict_info_.total_nfreq += delta_count;
+        if (selected) {
+            lmt = time(NULL);
+        }
+        scores_[off] = build_score(lmt, count);
+        if (state_ < USER_DICT_SCORE_DIRTY) state_ = USER_DICT_SCORE_DIRTY;
+#ifdef ___DEBUG_PERF___
+        DEBUG_PERF_END;
+        LOGD_PERF("update_lemma");
+#endif
+#ifdef ___SYNC_ENABLED___
+        queue_lemma_for_sync(ids_[off]);
+#endif
+        return ids_[off];
+    }
+    return 0;
+}
+
+size_t UserDict::get_total_lemma_count() { return dict_info_.total_nfreq; }
+
+void UserDict::set_total_lemma_count_of_others(size_t count) { total_other_nfreq_ = count; }
+
+LemmaIdType UserDict::append_a_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt) {
+    LemmaIdType id = get_max_lemma_id() + 1;
+    size_t offset = dict_info_.lemma_size;
+    if (offset > kUserDictOffsetMask) return 0;
+
+    lemmas_[offset] = 0;
+    lemmas_[offset + 1] = (uint8)lemma_len;
+    for (size_t i = 0; i < lemma_len; i++) {
+        *((uint16 *)&lemmas_[offset + 2 + (i << 1)]) = splids[i];
+        *((char16 *)&lemmas_[offset + 2 + (lemma_len << 1) + (i << 1)]) = lemma_str[i];
+    }
+    uint32 off = dict_info_.lemma_count;
+    offsets_[off] = offset;
+    scores_[off] = build_score(lmt, count);
+    ids_[off] = id;
+#ifdef ___PREDICT_ENABLED___
+    predicts_[off] = offset;
+#endif
+
+    offsets_by_id_[id - start_id_] = offset;
+
+    dict_info_.lemma_count++;
+    dict_info_.lemma_size += (2 + (lemma_len << 2));
+    lemma_count_left_--;
+    lemma_size_left_ -= (2 + (lemma_len << 2));
+
+    // Sort
+
+    UserDictSearchable searchable;
+    prepare_locate(&searchable, splids, lemma_len);
+
+    size_t i = 0;
+    while (i < off) {
+        offset = offsets_[i];
+        uint32 nchar = get_lemma_nchar(offset);
+        uint16 *spl = get_lemma_spell_ids(offset);
+
+        if (0 <= fuzzy_compare_spell_id(spl, nchar, &searchable)) break;
+        i++;
+    }
+    if (i != off) {
+        uint32 temp = offsets_[off];
+        memmove(offsets_ + i + 1, offsets_ + i, (off - i) << 2);
+        offsets_[i] = temp;
+
+        temp = scores_[off];
+        memmove(scores_ + i + 1, scores_ + i, (off - i) << 2);
+        scores_[i] = temp;
+
+        temp = ids_[off];
+        memmove(ids_ + i + 1, ids_ + i, (off - i) << 2);
+        ids_[i] = temp;
+    }
+
+#ifdef ___PREDICT_ENABLED___
+    uint32 j = 0;
+    uint16 *words_new = get_lemma_word(predicts_[off]);
+    j = locate_where_to_insert_in_predicts(words_new, lemma_len);
+    if (j != off) {
+        uint32 temp = predicts_[off];
+        memmove(predicts_ + j + 1, predicts_ + j, (off - j) << 2);
+        predicts_[j] = temp;
+    }
+#endif
+
+    if (state_ < USER_DICT_LEMMA_DIRTY) state_ = USER_DICT_LEMMA_DIRTY;
+
+#ifdef ___CACHE_ENABLED___
+    cache_init();
+#endif
+
+    dict_info_.total_nfreq += count;
+    return id;
+}
+}  // namespace ime_pinyin
diff --git a/tests/main.cpp b/tests/main.cpp
index 5284f0f..d5437bd 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -1,35 +1,35 @@
-#include "../src/include/pinyinime.h"
-#include <codecvt>
-#include <iostream>
-#include <locale>
-#include <string>
-
-std::string fromUtf16(const ime_pinyin::char16 *buf, size_t len) {
-    // 转换为标准 char16_t
-    std::u16string utf16Str(reinterpret_cast<const char16_t *>(buf), len);
-    std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
-    return convert.to_bytes(utf16Str);
-}
-
-int main() {
-    if (!ime_pinyin::im_open_decoder("./data/dict_pinyin.dat", "./data/user_dict.dat")) {
-        std::cout << "fany bug.\n";
-        return 0;
-    }
-
-    std::string pinyin = "ni'ma'si'le";
-    pinyin = "ni'ma'mei'si";
-    pinyin = "ni'shuo'ni'ma'ne";
-    size_t cand_cnt = ime_pinyin::im_search(pinyin.c_str(), pinyin.size());
-    ime_pinyin::char16 buf[256] = {0};
-    std::string msg;
-    for (size_t i = 0; i < 100; ++i) {
-        ime_pinyin::im_get_candidate(i, buf, 255);
-        size_t len = 0;
-        while (buf[len] != 0 && len < 255) ++len;
-        msg.append(fromUtf16(buf, len) + " ");
-    }
-    std::cout << "候选项数量: " << cand_cnt << std::endl;
-    std::cout << "候选项本体: " << msg << std::endl;
-    return 0;
-}
+#include "../src/include/pinyinime.h"
+#include <codecvt>
+#include <iostream>
+#include <locale>
+#include <string>
+
+std::string fromUtf16(const ime_pinyin::char16 *buf, size_t len) {
+    // 转换为标准 char16_t
+    std::u16string utf16Str(reinterpret_cast<const char16_t *>(buf), len);
+    std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
+    return convert.to_bytes(utf16Str);
+}
+
+int main() {
+    if (!ime_pinyin::im_open_decoder("./data/dict_pinyin.dat", "./data/user_dict.dat")) {
+        std::cout << "fany bug.\n";
+        return 0;
+    }
+
+    std::string pinyin = "ni'ma'si'le";
+    pinyin = "ni'ma'mei'si";
+    pinyin = "ni'shuo'ni'ma'ne";
+    size_t cand_cnt = ime_pinyin::im_search(pinyin.c_str(), pinyin.size());
+    ime_pinyin::char16 buf[256] = {0};
+    std::string msg;
+    for (size_t i = 0; i < 100; ++i) {
+        ime_pinyin::im_get_candidate(i, buf, 255);
+        size_t len = 0;
+        while (buf[len] != 0 && len < 255) ++len;
+        msg.append(fromUtf16(buf, len) + " ");
+    }
+    std::cout << "候选项数量: " << cand_cnt << std::endl;
+    std::cout << "候选项本体: " << msg << std::endl;
+    return 0;
+}