diff --git a/.clang-format b/.clang-format
index cd62434..f9dcb2c 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,6 +1,6 @@
 ---
-BasedOnStyle: llvm
-IndentWidth: 2
+BasedOnStyle: Google
+IndentWidth: 4
 
 # do not sort header files
 SortIncludes: false
diff --git a/.vscode/c_cpp_propertities.json b/.vscode/c_cpp_propertities.json
index 8a66452..f10fc59 100644
--- a/.vscode/c_cpp_propertities.json
+++ b/.vscode/c_cpp_propertities.json
@@ -8,7 +8,7 @@
       ],
       "intelliSenseMode": "linux-gcc-x64",
       "compilerPath": "/usr/bin/g++",
-      "cppStandard": "c++17"
+      "cppStandard": "c++11"
     }
   ],
   "version": 4
diff --git a/.vscode/launch.json b/.vscode/launch.json
index eddf28a..b47d649 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -8,7 +8,7 @@
       "name": "(gdb) Launch",
       "type": "cppdbg",
       "request": "launch",
-      "program": "${workspaceFolder}/build/bin/linuxcppdemo",
+      "program": "${workspaceFolder}/build/bin/pinyinime",
       "args": [],
       "stopAtEntry": false,
       "cwd": "${fileDirname}",
diff --git a/.vscode/settings.json b/.vscode/settings.json
index de186de..c2b0a7e 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,10 +1,8 @@
 {
-  "C_Cpp.default.cppStandard": "c++17",
-  "editor.tabSize": 2,
+  "editor.tabSize": 4,
   "editor.indentSize": "tabSize",
   "[cpp]": {
-    "editor.tabSize": 2
+    "editor.tabSize": 4
   },
-  "C_Cpp.formatting": "clangFormat",
   "cmake.generator": "Unix Makefiles"
 }
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c24514f..920865b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,65 +1,57 @@
-cmake_minimum_required(VERSION 3.15)  # Qt6 需要 CMake 3.15 或更高版本
+cmake_minimum_required(VERSION 3.15)
 
-# 设置项目名称和版本
 project(pinyinime VERSION 1.0 LANGUAGES CXX)
 
-# 包含头文件路径
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/include)
 
-# 设置 C++ 标准为 C++17
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
-
-# 设置 C 标准为 C99
 set(CMAKE_C_STANDARD 99)
 set(CMAKE_C_STANDARD_REQUIRED True)
 
-# 添加头文件
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
 set(HEADERS
-    atomdictbase.h
-    dictbuilder.h
-    dictdef.h
-    dictlist.h
-    dicttrie.h
-    lpicache.h
-    matrixsearch.h
-    mystdlib.h
-    ngram.h
-    pinyinime.h
-    searchutility.h
-    spellingtable.h
-    spellingtrie.h
-    splparser.h
-    sync.h
-    userdict.h
-    utf16char.h
-    utf16reader.h
+    ./src/include/atomdictbase.h
+    ./src/include/dictbuilder.h
+    ./src/include/dictdef.h
+    ./src/include/dictlist.h
+    ./src/include/dicttrie.h
+    ./src/include/lpicache.h
+    ./src/include/matrixsearch.h
+    ./src/include/mystdlib.h
+    ./src/include/ngram.h
+    ./src/include/pinyinime.h
+    ./src/include/searchutility.h
+    ./src/include/spellingtable.h
+    ./src/include/spellingtrie.h
+    ./src/include/splparser.h
+    ./src/include/sync.h
+    ./src/include/userdict.h
+    ./src/include/utf16char.h
+    ./src/include/utf16reader.h
 )
 
-# 添加源文件
 set(SOURCES
-    dictbuilder.cpp
-    dictlist.cpp
-    dicttrie.cpp
-    lpicache.cpp
-    matrixsearch.cpp
-    mystdlib.cpp
-    ngram.cpp
-    pinyinime.cpp
-    searchutility.cpp
-    spellingtable.cpp
-    spellingtrie.cpp
-    splparser.cpp
-    sync.cpp
-    userdict.cpp
-    utf16char.cpp
-    utf16reader.cpp
-    main.cpp
+    ./src/share/dictbuilder.cpp
+    ./src/share/dictlist.cpp
+    ./src/share/dicttrie.cpp
+    ./src/share/lpicache.cpp
+    ./src/share/matrixsearch.cpp
+    ./src/share/mystdlib.cpp
+    ./src/share/ngram.cpp
+    ./src/share/pinyinime.cpp
+    ./src/share/searchutility.cpp
+    ./src/share/spellingtable.cpp
+    ./src/share/spellingtrie.cpp
+    ./src/share/splparser.cpp
+    ./src/share/sync.cpp
+    ./src/share/userdict.cpp
+    ./src/share/utf16char.cpp
+    ./src/share/utf16reader.cpp
+    ./tests/main.cpp
 )
 
-# 创建库
-add_executable(pinyinime ${SOURCES} ${HEADERS})
-# add_executable(pinyinime "./maintest.cpp")
+set(MY_EXECUTABLE_NAME "pinyinime")
 
-# 如果需要安装，可以添加 install 命令
-# install(TARGETS pinyinime DESTINATION lib)
\ No newline at end of file
+add_executable(pinyinime ${SOURCES} ${HEADERS})
\ No newline at end of file
diff --git a/atomdictbase.h b/atomdictbase.h
deleted file mode 100644
index 0a70a51..0000000
--- a/atomdictbase.h
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * This class defines AtomDictBase class which is the base class for all atom
- * dictionaries. Atom dictionaries are managed by the decoder class
- * MatrixSearch.
- *
- * When the user appends a new character to the Pinyin string, all enabled atom
- * dictionaries' extend_dict() will be called at least once to get candidates
- * ended in this step (the information of starting step is also given in the
- * parameter). Usually, when extend_dict() is called, a MileStoneHandle object
- * returned by a previous calling for a earlier step is given to speed up the
- * look-up process, and a new MileStoneHandle object will be returned if
- * the extension is successful.
- *
- * A returned MileStoneHandle object should keep alive until Function
- * reset_milestones() is called and this object is noticed to be reset.
- *
- * Usually, the atom dictionary can use step information to manage its
- * MileStoneHandle objects, or it can make the objects in ascendant order to
- * make the reset easier.
- *
- * When the decoder loads the dictionary, it will give a starting lemma id for
- * this atom dictionary to map a inner id to a global id. Global ids should be
- * used when an atom dictionary talks to any component outside.
- */
-#ifndef PINYINIME_INCLUDE_ATOMDICTBASE_H__
-#define PINYINIME_INCLUDE_ATOMDICTBASE_H__
-
-#include <stdlib.h>
-#include "./dictdef.h"
-#include "./searchutility.h"
-
-namespace ime_pinyin {
-class AtomDictBase {
- public:
-  virtual ~AtomDictBase() {}
-
-  /**
-   * Load an atom dictionary from a file.
-   *
-   * @param file_name The file name to load dictionary.
-   * @param start_id The starting id used for this atom dictionary.
-   * @param end_id The end id (included) which can be used for this atom
-   * dictionary. User dictionary will always use the last id space, so it can
-   * ignore this paramter. All other atom dictionaries should check this
-   * parameter.
-   * @return True if succeed.
-   */
-  virtual bool load_dict(const char *file_name, LemmaIdType start_id,
-                         LemmaIdType end_id) = 0;
-
-  /**
-   * Close this atom dictionary.
-   *
-   * @return True if succeed.
-   */
-  virtual bool close_dict() = 0;
-
-  /**
-   * Get the total number of lemmas in this atom dictionary.
-   *
-   * @return The total number of lemmas.
-   */
-  virtual size_t number_of_lemmas() = 0;
-
-  /**
-   * This function is called by the decoder when user deletes a character from
-   * the input string, or begins a new input string.
-   *
-   * Different atom dictionaries may implement this function in different way.
-   * an atom dictionary can use one of these two parameters (or both) to reset
-   * its corresponding MileStoneHandle objects according its detailed
-   * implementation.
-   *
-   * For example, if an atom dictionary uses step information to manage its
-   * MileStoneHandle objects, parameter from_step can be used to identify which
-   * objects should be reset; otherwise, if another atom dictionary does not
-   * use the detailed step information, it only uses ascendant handles
-   * (according to step. For the same step, earlier call, smaller handle), it
-   * can easily reset those MileStoneHandle which are larger than from_handle.
-   *
-   * The decoder always reset the decoding state by step. So when it begins
-   * resetting, it will call reset_milestones() of its atom dictionaries with
-   * the step information, and the MileStoneHandle objects returned by the
-   * earliest calling of extend_dict() for that step.
-   *
-   * If an atom dictionary does not implement incremental search, this function
-   * can be totally ignored.
-   *
-   * @param from_step From which step(included) the MileStoneHandle
-   * objects should be reset.
-   * @param from_handle The ealiest MileStoneHandle object for step from_step
-   */
-  virtual void reset_milestones(uint16 from_step,
-                                MileStoneHandle from_handle) = 0;
-
-  /**
-   * Used to extend in this dictionary. The handle returned should keep valid
-   * until reset_milestones() is called.
-   *
-   * @param from_handle Its previous returned extended handle without the new
-   * spelling id, it can be used to speed up the extending.
-   * @param dep The paramter used for extending.
-   * @param lpi_items Used to fill in the lemmas matched.
-   * @param lpi_max The length of the buffer
-   * @param lpi_num Used to return the newly added items.
-   * @return The new mile stone for this extending. 0 if fail.
-   */
-  virtual MileStoneHandle extend_dict(MileStoneHandle from_handle,
-                                      const DictExtPara *dep,
-                                      LmaPsbItem *lpi_items,
-                                      size_t lpi_max, size_t *lpi_num) = 0;
-
-  /**
-   * Get lemma items with scores according to a spelling id stream.
-   * This atom dictionary does not need to sort the returned items.
-   *
-   * @param splid_str The spelling id stream buffer.
-   * @param splid_str_len The length of the spelling id stream buffer.
-   * @param lpi_items Used to return matched lemma items with scores.
-   * @param lpi_max The maximum size of the buffer to return result.
-   * @return The number of matched items which have been filled in to lpi_items.
-   */
-  virtual size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
-                          LmaPsbItem *lpi_items, size_t lpi_max) = 0;
-
-  /**
-   * Get a lemma string (The Chinese string) by the given lemma id.
-   *
-   * @param id_lemma The lemma id to get the string.
-   * @param str_buf The buffer to return the Chinese string.
-   * @param str_max The maximum size of the buffer.
-   * @return The length of the string, 0 if fail.
-   */
-  virtual uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
-                               uint16 str_max) = 0;
-
-  /**
-   * Get the full spelling ids for the given lemma id.
-   * If the given buffer is too short, return 0.
-   *
-   * @param splids Used to return the spelling ids.
-   * @param splids_max The maximum buffer length of splids.
-   * @param arg_valid Used to indicate if the incoming parameters have been
-   * initialized are valid. If it is true, the splids and splids_max are valid
-   * and there may be half ids in splids to be updated to full ids. In this
-   * case, splids_max is the number of valid ids in splids.
-   * @return The number of ids in the buffer.
-   */
-  virtual uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
-                                  uint16 splids_max, bool arg_valid) = 0;
-
-  /**
-   * Function used for prediction.
-   * No need to sort the newly added items.
-   *
-   * @param last_hzs The last n Chinese chracters(called Hanzi), its length
-   * should be less than or equal to kMaxPredictSize.
-   * @param hzs_len specifies the length(<= kMaxPredictSize) of the history.
-   * @param npre_items Used used to return the result.
-   * @param npre_max The length of the buffer to return result
-   * @param b4_used Number of prediction result (from npre_items[-b4_used])
-   * from other atom dictionaries. A atom ditionary can just ignore it.
-   * @return The number of prediction result from this atom dictionary.
-   */
-  virtual size_t predict(const char16 last_hzs[], uint16 hzs_len,
-                         NPredictItem *npre_items, size_t npre_max,
-                         size_t b4_used) = 0;
-
-  /**
-   * Add a lemma to the dictionary. If the dictionary allows to add new
-   * items and this item does not exist, add it.
-   *
-   * @param lemma_str The Chinese string of the lemma.
-   * @param splids The spelling ids of the lemma.
-   * @param lemma_len The length of the Chinese lemma.
-   * @param count The frequency count for this lemma.
-   */
-  virtual LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
-                                uint16 lemma_len, uint16 count) = 0;
-
-  /**
-   * Update a lemma's occuring count.
-   *
-   * @param lemma_id The lemma id to update.
-   * @param delta_count The frequnecy count to ajust.
-   * @param selected Indicate whether this lemma is selected by user and
-   * submitted to target edit box.
-   * @return The id if succeed, 0 if fail.
-   */
-  virtual LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
-                                   bool selected) = 0;
-
-  /**
-   * Get the lemma id for the given lemma.
-   *
-   * @param lemma_str The Chinese string of the lemma.
-   * @param splids The spelling ids of the lemma.
-   * @param lemma_len The length of the lemma.
-   * @return The matched lemma id, or 0 if fail.
-   */
-  virtual LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
-                                   uint16 lemma_len) = 0;
-
-  /**
-   * Get the lemma score.
-   *
-   * @param lemma_id The lemma id to get score.
-   * @return The score of the lemma, or 0 if fail.
-   */
-  virtual LmaScoreType get_lemma_score(LemmaIdType lemma_id) = 0;
-
-  /**
-   * Get the lemma score.
-   *
-   * @param lemma_str The Chinese string of the lemma.
-   * @param splids The spelling ids of the lemma.
-   * @param lemma_len The length of the lemma.
-   * @return The score of the lamm, or 0 if fail.
-   */
-  virtual LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
-                                uint16 lemma_len) = 0;
-
-  /**
-   * If the dictionary allowed, remove a lemma from it.
-   *
-   * @param lemma_id The id of the lemma to remove.
-   * @return True if succeed.
-   */
-  virtual bool remove_lemma(LemmaIdType lemma_id) = 0;
-
-  /**
-   * Get the total occuring count of this atom dictionary.
-   *
-   * @return The total occuring count of this atom dictionary.
-   */
-  virtual size_t get_total_lemma_count() = 0;
-
-  /**
-   * Set the total occuring count of other atom dictionaries.
-   *
-   * @param count The total occuring count of other atom dictionaies.
-   */
-  virtual void set_total_lemma_count_of_others(size_t count) = 0;
-
-  /**
-   * Notify this atom dictionary to flush the cached data to persistent storage
-   * if necessary.
-   */
-  virtual void flush_cache() = 0;
-};
-}
-
-#endif  // PINYINIME_INCLUDE_ATOMDICTBASE_H__
diff --git a/dictbuilder.cpp b/dictbuilder.cpp
deleted file mode 100644
index 623ec3d..0000000
--- a/dictbuilder.cpp
+++ /dev/null
@@ -1,1070 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "./dictbuilder.h"
-#include "./dicttrie.h"
-#include "./mystdlib.h"
-#include "./ngram.h"
-#include "./searchutility.h"
-#include "./spellingtable.h"
-#include "./spellingtrie.h"
-#include "./splparser.h"
-#include "./utf16reader.h"
-
-namespace ime_pinyin {
-
-#ifdef ___BUILD_MODEL___
-
-static const size_t kReadBufLen = 512;
-static const size_t kSplTableHashLen = 2000;
-
-// Compare a SingleCharItem, first by Hanzis, then by spelling ids, then by
-// frequencies.
-int cmp_scis_hz_splid_freq(const void* p1, const void* p2) {
-  const SingleCharItem *s1, *s2;
-  s1 = static_cast<const SingleCharItem*>(p1);
-  s2 = static_cast<const SingleCharItem*>(p2);
-
-  if (s1->hz < s2->hz)
-    return -1;
-  if (s1->hz > s2->hz)
-    return 1;
-
-  if (s1->splid.half_splid < s2->splid.half_splid)
-    return -1;
-  if (s1->splid.half_splid > s2->splid.half_splid)
-    return 1;
-
-  if (s1->splid.full_splid < s2->splid.full_splid)
-    return -1;
-  if (s1->splid.full_splid > s2->splid.full_splid)
-    return 1;
-
-  if (s1->freq > s2->freq)
-    return -1;
-  if (s1->freq < s2->freq)
-    return 1;
-  return 0;
-}
-
-int cmp_scis_hz_splid(const void* p1, const void* p2) {
-  const SingleCharItem *s1, *s2;
-  s1 = static_cast<const SingleCharItem*>(p1);
-  s2 = static_cast<const SingleCharItem*>(p2);
-
-  if (s1->hz < s2->hz)
-    return -1;
-  if (s1->hz > s2->hz)
-    return 1;
-
-  if (s1->splid.half_splid < s2->splid.half_splid)
-    return -1;
-  if (s1->splid.half_splid > s2->splid.half_splid)
-    return 1;
-
-  if (s1->splid.full_splid < s2->splid.full_splid)
-    return -1;
-  if (s1->splid.full_splid > s2->splid.full_splid)
-    return 1;
-
-  return 0;
-}
-
-int cmp_lemma_entry_hzs(const void* p1, const void* p2) {
-  size_t size1 = utf16_strlen(((const LemmaEntry*)p1)->hanzi_str);
-  size_t size2 = utf16_strlen(((const LemmaEntry*)p2)->hanzi_str);
-  if (size1 < size2)
-    return -1;
-  else if (size1 > size2)
-    return 1;
-
-  return utf16_strcmp(((const LemmaEntry*)p1)->hanzi_str,
-                      ((const LemmaEntry*)p2)->hanzi_str);
-}
-
-int compare_char16(const void* p1, const void* p2) {
-  if (*((const char16*)p1) < *((const char16*)p2))
-    return -1;
-  if (*((const char16*)p1) > *((const char16*)p2))
-    return 1;
-  return 0;
-}
-
-int compare_py(const void* p1, const void* p2) {
-  int ret = utf16_strcmp(((const LemmaEntry*)p1)->spl_idx_arr,
-                         ((const LemmaEntry*)p2)->spl_idx_arr);
-
-  if (0 != ret)
-    return ret;
-
-  return static_cast<int>(((const LemmaEntry*)p2)->freq) -
-         static_cast<int>(((const LemmaEntry*)p1)->freq);
-}
-
-// First hanzi, if the same, then Pinyin
-int cmp_lemma_entry_hzspys(const void* p1, const void* p2) {
-  size_t size1 = utf16_strlen(((const LemmaEntry*)p1)->hanzi_str);
-  size_t size2 = utf16_strlen(((const LemmaEntry*)p2)->hanzi_str);
-  if (size1 < size2)
-    return -1;
-  else if (size1 > size2)
-    return 1;
-  int ret = utf16_strcmp(((const LemmaEntry*)p1)->hanzi_str,
-                         ((const LemmaEntry*)p2)->hanzi_str);
-
-  if (0 != ret)
-    return ret;
-
-  ret = utf16_strcmp(((const LemmaEntry*)p1)->spl_idx_arr,
-                     ((const LemmaEntry*)p2)->spl_idx_arr);
-  return ret;
-}
-
-int compare_splid2(const void* p1, const void* p2) {
-  int ret = utf16_strcmp(((const LemmaEntry*)p1)->spl_idx_arr,
-                         ((const LemmaEntry*)p2)->spl_idx_arr);
-  return ret;
-}
-
-DictBuilder::DictBuilder() {
-  lemma_arr_ = NULL;
-  lemma_num_ = 0;
-
-  scis_ = NULL;
-  scis_num_ = 0;
-
-  lma_nodes_le0_ = NULL;
-  lma_nodes_ge1_ = NULL;
-
-  lma_nds_used_num_le0_ = 0;
-  lma_nds_used_num_ge1_ = 0;
-
-  homo_idx_buf_ = NULL;
-  homo_idx_num_eq1_ = 0;
-  homo_idx_num_gt1_ = 0;
-
-  top_lmas_ = NULL;
-  top_lmas_num_ = 0;
-
-  spl_table_ = NULL;
-  spl_parser_ = NULL;
-}
-
-DictBuilder::~DictBuilder() {
-  free_resource();
-}
-
-bool DictBuilder::alloc_resource(size_t lma_num) {
-  if (0 == lma_num)
-    return false;
-
-  free_resource();
-
-  lemma_num_ = lma_num;
-  lemma_arr_ = new LemmaEntry[lemma_num_];
-
-  top_lmas_num_ = 0;
-  top_lmas_ = new LemmaEntry[kTopScoreLemmaNum];
-
-  // New the scis_ buffer to the possible maximum size.
-  scis_num_ = lemma_num_ * kMaxLemmaSize;
-  scis_ = new SingleCharItem[scis_num_];
-
-  // The root and first level nodes is less than kMaxSpellingNum + 1
-  lma_nds_used_num_le0_ = 0;
-  lma_nodes_le0_ = new LmaNodeLE0[kMaxSpellingNum + 1];
-
-  // Other nodes is less than lemma_num
-  lma_nds_used_num_ge1_ = 0;
-  lma_nodes_ge1_ = new LmaNodeGE1[lemma_num_];
-
-  homo_idx_buf_ = new LemmaIdType[lemma_num_];
-  spl_table_ = new SpellingTable();
-  spl_parser_ = new SpellingParser();
-
-  if (NULL == lemma_arr_ || NULL == top_lmas_ ||
-      NULL == scis_ || NULL == spl_table_ ||
-      NULL == spl_parser_ || NULL == lma_nodes_le0_ ||
-      NULL == lma_nodes_ge1_ || NULL == homo_idx_buf_) {
-    free_resource();
-    return false;
-  }
-
-  memset(lemma_arr_, 0, sizeof(LemmaEntry) * lemma_num_);
-  memset(scis_, 0, sizeof(SingleCharItem) * scis_num_);
-  memset(lma_nodes_le0_, 0, sizeof(LmaNodeLE0) * (kMaxSpellingNum + 1));
-  memset(lma_nodes_ge1_, 0, sizeof(LmaNodeGE1) * lemma_num_);
-  memset(homo_idx_buf_, 0, sizeof(LemmaIdType) * lemma_num_);
-  spl_table_->init_table(kMaxPinyinSize, kSplTableHashLen, true);
-
-  return true;
-}
-
-char16* DictBuilder::read_valid_hanzis(const char *fn_validhzs, size_t *num) {
-  if (NULL == fn_validhzs || NULL == num)
-    return NULL;
-
-  *num = 0;
-  FILE *fp = fopen(fn_validhzs, "rb");
-  if (NULL == fp)
-    return NULL;
-
-  char16 utf16header;
-  if (fread(&utf16header, sizeof(char16), 1, fp) != 1 ||
-      0xfeff != utf16header) {
-    fclose(fp);
-    return NULL;
-  }
-
-  fseek(fp, 0, SEEK_END);
-  *num = ftell(fp) / sizeof(char16);
-  assert(*num >= 1);
-  *num -= 1;
-
-  char16 *hzs = new char16[*num];
-  if (NULL == hzs) {
-    fclose(fp);
-    return NULL;
-  }
-
-  fseek(fp, 2, SEEK_SET);
-
-  if (fread(hzs, sizeof(char16), *num, fp) != *num) {
-    fclose(fp);
-    delete [] hzs;
-    return NULL;
-  }
-  fclose(fp);
-
-  myqsort(hzs, *num, sizeof(char16), compare_char16);
-  return hzs;
-}
-
-bool DictBuilder::hz_in_hanzis_list(const char16 *hzs, size_t hzs_len,
-                                    char16 hz) {
-  if (NULL == hzs)
-    return false;
-
-  char16 *found;
-  found = static_cast<char16*>(
-      mybsearch(&hz, hzs, hzs_len, sizeof(char16), compare_char16));
-  if (NULL == found)
-    return false;
-
-  assert(*found == hz);
-  return true;
-}
-
-// The caller makes sure that the parameters are valid.
-bool DictBuilder::str_in_hanzis_list(const char16 *hzs, size_t hzs_len,
-                                     const char16 *str, size_t str_len) {
-  if (NULL == hzs || NULL == str)
-    return false;
-
-  for (size_t pos = 0; pos < str_len; pos++) {
-    if (!hz_in_hanzis_list(hzs, hzs_len, str[pos]))
-      return false;
-  }
-  return true;
-}
-
-void DictBuilder::get_top_lemmas() {
-  top_lmas_num_ = 0;
-  if (NULL == lemma_arr_)
-    return;
-
-  for (size_t pos = 0; pos < lemma_num_; pos++) {
-    if (0 == top_lmas_num_) {
-      top_lmas_[0] = lemma_arr_[pos];
-      top_lmas_num_ = 1;
-      continue;
-    }
-
-    if (lemma_arr_[pos].freq > top_lmas_[top_lmas_num_ - 1].freq) {
-      if (kTopScoreLemmaNum > top_lmas_num_)
-        top_lmas_num_ += 1;
-
-      size_t move_pos;
-      for (move_pos = top_lmas_num_ - 1; move_pos > 0; move_pos--) {
-        top_lmas_[move_pos] = top_lmas_[move_pos - 1];
-        if (0 == move_pos - 1 ||
-            (move_pos - 1 > 0 &&
-             top_lmas_[move_pos - 2].freq > lemma_arr_[pos].freq)) {
-          break;
-        }
-      }
-      assert(move_pos > 0);
-      top_lmas_[move_pos - 1] = lemma_arr_[pos];
-    } else if (kTopScoreLemmaNum > top_lmas_num_) {
-      top_lmas_[top_lmas_num_] = lemma_arr_[pos];
-      top_lmas_num_ += 1;
-    }
-  }
-
-  if (kPrintDebug0) {
-    printf("\n------Top Lemmas------------------\n");
-    for (size_t pos = 0; pos < top_lmas_num_; pos++) {
-      printf("--%d, idx:%06d, score:%.5f\n", pos, top_lmas_[pos].idx_by_hz,
-             top_lmas_[pos].freq);
-    }
-  }
-}
-
-void DictBuilder::free_resource() {
-  if (NULL != lemma_arr_)
-    delete [] lemma_arr_;
-
-  if (NULL != scis_)
-    delete [] scis_;
-
-  if (NULL != lma_nodes_le0_)
-    delete [] lma_nodes_le0_;
-
-  if (NULL != lma_nodes_ge1_)
-    delete [] lma_nodes_ge1_;
-
-  if (NULL != homo_idx_buf_)
-    delete [] homo_idx_buf_;
-
-  if (NULL != spl_table_)
-    delete spl_table_;
-
-  if (NULL != spl_parser_)
-    delete spl_parser_;
-
-  lemma_arr_ = NULL;
-  scis_ = NULL;
-  lma_nodes_le0_ = NULL;
-  lma_nodes_ge1_ = NULL;
-  homo_idx_buf_ = NULL;
-  spl_table_ = NULL;
-  spl_parser_ = NULL;
-
-  lemma_num_ = 0;
-  lma_nds_used_num_le0_ = 0;
-  lma_nds_used_num_ge1_ = 0;
-  homo_idx_num_eq1_ = 0;
-  homo_idx_num_gt1_ = 0;
-}
-
-size_t DictBuilder::read_raw_dict(const char* fn_raw,
-                                  const char *fn_validhzs,
-                                  size_t max_item) {
-  if (NULL == fn_raw) return 0;
-
-  Utf16Reader utf16_reader;
-  if (!utf16_reader.open(fn_raw, kReadBufLen * 10))
-    return false;
-
-  char16 read_buf[kReadBufLen];
-
-  // Read the number of lemmas in the file
-  size_t lemma_num = 240000;
-
-  // allocate resource required
-  if (!alloc_resource(lemma_num)) {
-    utf16_reader.close();
-  }
-
-  // Read the valid Hanzi list.
-  char16 *valid_hzs = NULL;
-  size_t valid_hzs_num = 0;
-  valid_hzs = read_valid_hanzis(fn_validhzs, &valid_hzs_num);
-
-  // Begin reading the lemma entries
-  for (size_t i = 0; i < max_item; i++) {
-    // read next entry
-    if (!utf16_reader.readline(read_buf, kReadBufLen)) {
-      lemma_num = i;
-      break;
-    }
-
-    size_t token_size;
-    char16 *token;
-    char16 *to_tokenize = read_buf;
-
-    // Get the Hanzi string
-    token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
-    if (NULL == token) {
-      free_resource();
-      utf16_reader.close();
-      return false;
-    }
-
-    size_t lemma_size = utf16_strlen(token);
-
-    if (lemma_size > kMaxLemmaSize) {
-      i--;
-      continue;
-    }
-
-    if (lemma_size > 4) {
-      i--;
-      continue;
-    }
-
-    // Copy to the lemma entry
-    utf16_strcpy(lemma_arr_[i].hanzi_str, token);
-
-    lemma_arr_[i].hz_str_len = token_size;
-
-    // Get the freq string
-    token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
-    if (NULL == token) {
-      free_resource();
-      utf16_reader.close();
-      return false;
-    }
-    lemma_arr_[i].freq = utf16_atof(token);
-
-    if (lemma_size > 1 && lemma_arr_[i].freq < 60) {
-      i--;
-      continue;
-    }
-
-    // Get GBK mark, if no valid Hanzi list available, all items which contains
-    // GBK characters will be discarded. Otherwise, all items which contains
-    // characters outside of the valid Hanzi list will be discarded.
-    token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
-    assert(NULL != token);
-    int gbk_flag = utf16_atoi(token);
-    if (NULL == valid_hzs || 0 == valid_hzs_num) {
-      if (0 != gbk_flag) {
-        i--;
-        continue;
-      }
-    } else {
-      if (!str_in_hanzis_list(valid_hzs, valid_hzs_num,
-          lemma_arr_[i].hanzi_str, lemma_arr_[i].hz_str_len)) {
-        i--;
-        continue;
-      }
-    }
-
-    // Get spelling String
-    bool spelling_not_support = false;
-    for (size_t hz_pos = 0; hz_pos < (size_t)lemma_arr_[i].hz_str_len;
-         hz_pos++) {
-      // Get a Pinyin
-      token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
-      if (NULL == token) {
-        free_resource();
-        utf16_reader.close();
-        return false;
-      }
-
-      assert(utf16_strlen(token) <= kMaxPinyinSize);
-
-      utf16_strcpy_tochar(lemma_arr_[i].pinyin_str[hz_pos], token);
-
-      format_spelling_str(lemma_arr_[i].pinyin_str[hz_pos]);
-
-      // Put the pinyin to the spelling table
-      if (!spl_table_->put_spelling(lemma_arr_[i].pinyin_str[hz_pos],
-                                    lemma_arr_[i].freq)) {
-        spelling_not_support = true;
-        break;
-      }
-    }
-
-    // The whole line must have been parsed fully, otherwise discard this one.
-    token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
-    if (spelling_not_support || NULL != token) {
-      i--;
-      continue;
-    }
-  }
-
-  delete [] valid_hzs;
-  utf16_reader.close();
-
-  printf("read succesfully, lemma num: %d\n", lemma_num);
-
-  return lemma_num;
-}
-
-bool DictBuilder::build_dict(const char *fn_raw,
-                             const char *fn_validhzs,
-                             DictTrie *dict_trie) {
-  if (NULL == fn_raw || NULL == dict_trie)
-    return false;
-
-  lemma_num_ = read_raw_dict(fn_raw, fn_validhzs, 240000);
-  if (0 == lemma_num_)
-    return false;
-
-  // Arrange the spelling table, and build a spelling tree
-  // The size of an spelling. '\0' is included. If the spelling table is
-  // initialized to calculate the spelling scores, the last char in the
-  // spelling string will be score, and it is also included in spl_item_size.
-  size_t spl_item_size;
-  size_t spl_num;
-  const char* spl_buf;
-  spl_buf = spl_table_->arrange(&spl_item_size, &spl_num);
-  if (NULL == spl_buf) {
-    free_resource();
-    return false;
-  }
-
-  SpellingTrie &spl_trie = SpellingTrie::get_instance();
-
-  if (!spl_trie.construct(spl_buf, spl_item_size, spl_num,
-                          spl_table_->get_score_amplifier(),
-                          spl_table_->get_average_score())) {
-    free_resource();
-    return false;
-  }
-
-  printf("spelling tree construct successfully.\n");
-
-  // Convert the spelling string to idxs
-  for (size_t i = 0; i < lemma_num_; i++) {
-    for (size_t hz_pos = 0; hz_pos < (size_t)lemma_arr_[i].hz_str_len;
-         hz_pos++) {
-      uint16 spl_idxs[2];
-      uint16 spl_start_pos[3];
-      bool is_pre = true;
-      int spl_idx_num =
-        spl_parser_->splstr_to_idxs(lemma_arr_[i].pinyin_str[hz_pos],
-                                    strlen(lemma_arr_[i].pinyin_str[hz_pos]),
-                                    spl_idxs, spl_start_pos, 2, is_pre);
-      assert(1 == spl_idx_num);
-
-      if (spl_trie.is_half_id(spl_idxs[0])) {
-        uint16 num = spl_trie.half_to_full(spl_idxs[0], spl_idxs);
-        assert(0 != num);
-      }
-      lemma_arr_[i].spl_idx_arr[hz_pos] = spl_idxs[0];
-    }
-  }
-
-  // Sort the lemma items according to the hanzi, and give each unique item a
-  // id
-  sort_lemmas_by_hz();
-
-  scis_num_ = build_scis();
-
-  // Construct the dict list
-  dict_trie->dict_list_ = new DictList();
-  bool dl_success = dict_trie->dict_list_->init_list(scis_, scis_num_,
-                                                     lemma_arr_, lemma_num_);
-  assert(dl_success);
-
-  // Construct the NGram information
-  NGram& ngram = NGram::get_instance();
-  ngram.build_unigram(lemma_arr_, lemma_num_,
-                      lemma_arr_[lemma_num_ - 1].idx_by_hz + 1);
-
-  // sort the lemma items according to the spelling idx string
-  myqsort(lemma_arr_, lemma_num_, sizeof(LemmaEntry), compare_py);
-
-  get_top_lemmas();
-
-#ifdef ___DO_STATISTICS___
-  stat_init();
-#endif
-
-  lma_nds_used_num_le0_ = 1;  // The root node
-  bool dt_success = construct_subset(static_cast<void*>(lma_nodes_le0_),
-                                     lemma_arr_, 0, lemma_num_, 0);
-  if (!dt_success) {
-    free_resource();
-    return false;
-  }
-
-#ifdef ___DO_STATISTICS___
-  stat_print();
-#endif
-
-  // Move the node data and homo data to the DictTrie
-  dict_trie->root_ = new LmaNodeLE0[lma_nds_used_num_le0_];
-  dict_trie->nodes_ge1_ = new LmaNodeGE1[lma_nds_used_num_ge1_];
-  size_t lma_idx_num = homo_idx_num_eq1_ + homo_idx_num_gt1_ + top_lmas_num_;
-  dict_trie->lma_idx_buf_ = new unsigned char[lma_idx_num * kLemmaIdSize];
-  assert(NULL != dict_trie->root_);
-  assert(NULL != dict_trie->lma_idx_buf_);
-  dict_trie->lma_node_num_le0_ = lma_nds_used_num_le0_;
-  dict_trie->lma_node_num_ge1_ = lma_nds_used_num_ge1_;
-  dict_trie->lma_idx_buf_len_ = lma_idx_num * kLemmaIdSize;
-  dict_trie->top_lmas_num_ = top_lmas_num_;
-
-  memcpy(dict_trie->root_, lma_nodes_le0_,
-         sizeof(LmaNodeLE0) * lma_nds_used_num_le0_);
-  memcpy(dict_trie->nodes_ge1_, lma_nodes_ge1_,
-         sizeof(LmaNodeGE1) * lma_nds_used_num_ge1_);
-
-  for (size_t pos = 0; pos < homo_idx_num_eq1_ + homo_idx_num_gt1_; pos++) {
-    id_to_charbuf(dict_trie->lma_idx_buf_ + pos * kLemmaIdSize,
-                  homo_idx_buf_[pos]);
-  }
-
-  for (size_t pos = homo_idx_num_eq1_ + homo_idx_num_gt1_;
-       pos < lma_idx_num; pos++) {
-    LemmaIdType idx =
-        top_lmas_[pos - homo_idx_num_eq1_ - homo_idx_num_gt1_].idx_by_hz;
-    id_to_charbuf(dict_trie->lma_idx_buf_ + pos * kLemmaIdSize, idx);
-  }
-
-  if (kPrintDebug0) {
-    printf("homo_idx_num_eq1_: %d\n", homo_idx_num_eq1_);
-    printf("homo_idx_num_gt1_: %d\n", homo_idx_num_gt1_);
-    printf("top_lmas_num_: %d\n", top_lmas_num_);
-  }
-
-  free_resource();
-
-  if (kPrintDebug0) {
-    printf("Building dict succeds\n");
-  }
-  return dt_success;
-}
-
-void DictBuilder::id_to_charbuf(unsigned char *buf, LemmaIdType id) {
-  if (NULL == buf) return;
-  for (size_t pos = 0; pos < kLemmaIdSize; pos++) {
-    (buf)[pos] = (unsigned char)(id >> (pos * 8));
-  }
-}
-
-void DictBuilder::set_son_offset(LmaNodeGE1 *node, size_t offset) {
-  node->son_1st_off_l = static_cast<uint16>(offset);
-  node->son_1st_off_h = static_cast<unsigned char>(offset >> 16);
-}
-
-void DictBuilder:: set_homo_id_buf_offset(LmaNodeGE1 *node, size_t offset) {
-  node->homo_idx_buf_off_l = static_cast<uint16>(offset);
-  node->homo_idx_buf_off_h = static_cast<unsigned char>(offset >> 16);
-
-}
-
-// All spelling strings will be converted to upper case, except that
-// spellings started with "ZH"/"CH"/"SH" will be converted to
-// "Zh"/"Ch"/"Sh"
-void DictBuilder::format_spelling_str(char *spl_str) {
-  if (NULL == spl_str)
-    return;
-
-  uint16 pos = 0;
-  while ('\0' != spl_str[pos]) {
-    if (spl_str[pos] >= 'a' && spl_str[pos] <= 'z')
-      spl_str[pos] = spl_str[pos] - 'a' + 'A';
-
-    if (1 == pos && 'H' == spl_str[pos]) {
-      if ('C' == spl_str[0] || 'S' == spl_str[0] || 'Z' == spl_str[0]) {
-        spl_str[pos] = 'h';
-      }
-    }
-    pos++;
-  }
-}
-
-LemmaIdType DictBuilder::sort_lemmas_by_hz() {
-  if (NULL == lemma_arr_ || 0 == lemma_num_)
-    return 0;
-
-  myqsort(lemma_arr_, lemma_num_, sizeof(LemmaEntry), cmp_lemma_entry_hzs);
-
-  lemma_arr_[0].idx_by_hz = 1;
-  LemmaIdType idx_max = 1;
-  for (size_t i = 1; i < lemma_num_; i++) {
-    if (utf16_strcmp(lemma_arr_[i].hanzi_str, lemma_arr_[i-1].hanzi_str)) {
-      idx_max++;
-      lemma_arr_[i].idx_by_hz = idx_max;
-    } else {
-      idx_max++;
-      lemma_arr_[i].idx_by_hz = idx_max;
-    }
-  }
-  return idx_max + 1;
-}
-
-size_t DictBuilder::build_scis() {
-  if (NULL == scis_ || lemma_num_ * kMaxLemmaSize > scis_num_)
-    return 0;
-
-  SpellingTrie &spl_trie = SpellingTrie::get_instance();
-
-  // This first one is blank, because id 0 is invalid.
-  scis_[0].freq = 0;
-  scis_[0].hz = 0;
-  scis_[0].splid.full_splid = 0;
-  scis_[0].splid.half_splid = 0;
-  scis_num_ = 1;
-
-  // Copy the hanzis to the buffer
-  for (size_t pos = 0; pos < lemma_num_; pos++) {
-    size_t hz_num = lemma_arr_[pos].hz_str_len;
-    for (size_t hzpos = 0; hzpos < hz_num; hzpos++) {
-      scis_[scis_num_].hz = lemma_arr_[pos].hanzi_str[hzpos];
-      scis_[scis_num_].splid.full_splid = lemma_arr_[pos].spl_idx_arr[hzpos];
-      scis_[scis_num_].splid.half_splid =
-          spl_trie.full_to_half(scis_[scis_num_].splid.full_splid);
-      if (1 == hz_num)
-        scis_[scis_num_].freq = lemma_arr_[pos].freq;
-      else
-        scis_[scis_num_].freq = 0.000001;
-      scis_num_++;
-    }
-  }
-
-  myqsort(scis_, scis_num_, sizeof(SingleCharItem), cmp_scis_hz_splid_freq);
-
-  // Remove repeated items
-  size_t unique_scis_num = 1;
-  for (size_t pos = 1; pos < scis_num_; pos++) {
-    if (scis_[pos].hz == scis_[pos - 1].hz &&
-        scis_[pos].splid.full_splid == scis_[pos - 1].splid.full_splid)
-      continue;
-    scis_[unique_scis_num] = scis_[pos];
-    scis_[unique_scis_num].splid.half_splid =
-        spl_trie.full_to_half(scis_[pos].splid.full_splid);
-    unique_scis_num++;
-  }
-
-  scis_num_ = unique_scis_num;
-
-  // Update the lemma list.
-  for (size_t pos = 0; pos < lemma_num_; pos++) {
-    size_t hz_num = lemma_arr_[pos].hz_str_len;
-    for (size_t hzpos = 0; hzpos < hz_num; hzpos++) {
-      SingleCharItem key;
-      key.hz = lemma_arr_[pos].hanzi_str[hzpos];
-      key.splid.full_splid = lemma_arr_[pos].spl_idx_arr[hzpos];
-      key.splid.half_splid = spl_trie.full_to_half(key.splid.full_splid);
-
-      SingleCharItem *found;
-      found = static_cast<SingleCharItem*>(mybsearch(&key, scis_,
-                                                     unique_scis_num,
-                                                     sizeof(SingleCharItem),
-                                                     cmp_scis_hz_splid));
-
-      assert(found);
-
-      lemma_arr_[pos].hanzi_scis_ids[hzpos] =
-          static_cast<uint16>(found - scis_);
-      lemma_arr_[pos].spl_idx_arr[hzpos] = found->splid.full_splid;
-    }
-  }
-
-  return scis_num_;
-}
-
-bool DictBuilder::construct_subset(void* parent, LemmaEntry* lemma_arr,
-                                   size_t item_start, size_t item_end,
-                                   size_t level) {
-  if (level >= kMaxLemmaSize || item_end <= item_start)
-    return false;
-
-  // 1. Scan for how many sons
-  size_t parent_son_num = 0;
-  // LemmaNode *son_1st = NULL;
-  // parent.num_of_son = 0;
-
-  LemmaEntry *lma_last_start = lemma_arr_ + item_start;
-  uint16 spl_idx_node = lma_last_start->spl_idx_arr[level];
-
-  // Scan for how many sons to be allocaed
-  for (size_t i = item_start + 1; i< item_end; i++) {
-    LemmaEntry *lma_current = lemma_arr + i;
-    uint16 spl_idx_current = lma_current->spl_idx_arr[level];
-    if (spl_idx_current != spl_idx_node) {
-      parent_son_num++;
-      spl_idx_node = spl_idx_current;
-    }
-  }
-  parent_son_num++;
-
-#ifdef ___DO_STATISTICS___
-  // Use to indicate whether all nodes of this layer have no son.
-  bool allson_noson = true;
-
-  assert(level < kMaxLemmaSize);
-  if (parent_son_num > max_sonbuf_len_[level])
-    max_sonbuf_len_[level] = parent_son_num;
-
-  total_son_num_[level] += parent_son_num;
-  total_sonbuf_num_[level] += 1;
-
-  if (parent_son_num == 1)
-    sonbufs_num1_++;
-  else
-    sonbufs_numgt1_++;
-  total_lma_node_num_ += parent_son_num;
-#endif
-
-  // 2. Update the parent's information
-  //    Update the parent's son list;
-  LmaNodeLE0 *son_1st_le0 = NULL;  // only one of le0 or ge1 is used
-  LmaNodeGE1 *son_1st_ge1 = NULL;  // only one of le0 or ge1 is used.
-  if (0 == level) {                 // the parent is root
-    (static_cast<LmaNodeLE0*>(parent))->son_1st_off =
-      lma_nds_used_num_le0_;
-    son_1st_le0 = lma_nodes_le0_ + lma_nds_used_num_le0_;
-    lma_nds_used_num_le0_ += parent_son_num;
-
-    assert(parent_son_num <= 65535);
-    (static_cast<LmaNodeLE0*>(parent))->num_of_son =
-      static_cast<uint16>(parent_son_num);
-  } else if (1 == level) {  // the parent is a son of root
-    (static_cast<LmaNodeLE0*>(parent))->son_1st_off =
-      lma_nds_used_num_ge1_;
-    son_1st_ge1 = lma_nodes_ge1_ + lma_nds_used_num_ge1_;
-    lma_nds_used_num_ge1_ += parent_son_num;
-
-    assert(parent_son_num <= 65535);
-    (static_cast<LmaNodeLE0*>(parent))->num_of_son =
-      static_cast<uint16>(parent_son_num);
-  } else {
-    set_son_offset((static_cast<LmaNodeGE1*>(parent)),
-                   lma_nds_used_num_ge1_);
-    son_1st_ge1 = lma_nodes_ge1_ + lma_nds_used_num_ge1_;
-    lma_nds_used_num_ge1_ += parent_son_num;
-
-    assert(parent_son_num <= 255);
-    (static_cast<LmaNodeGE1*>(parent))->num_of_son =
-      (unsigned char)parent_son_num;
-  }
-
-  // 3. Now begin to construct the son one by one
-  size_t son_pos = 0;
-
-  lma_last_start = lemma_arr_ + item_start;
-  spl_idx_node = lma_last_start->spl_idx_arr[level];
-
-  size_t homo_num = 0;
-  if (lma_last_start->spl_idx_arr[level + 1] == 0)
-    homo_num = 1;
-
-  size_t item_start_next = item_start;
-
-  for (size_t i = item_start + 1; i < item_end; i++) {
-    LemmaEntry* lma_current = lemma_arr_ + i;
-    uint16 spl_idx_current = lma_current->spl_idx_arr[level];
-
-    if (spl_idx_current == spl_idx_node) {
-      if (lma_current->spl_idx_arr[level + 1] == 0)
-        homo_num++;
-    } else {
-      // Construct a node
-      LmaNodeLE0 *node_cur_le0 = NULL;  // only one of them is valid
-      LmaNodeGE1 *node_cur_ge1 = NULL;
-      if (0 == level) {
-        node_cur_le0 = son_1st_le0 + son_pos;
-        node_cur_le0->spl_idx = spl_idx_node;
-        node_cur_le0->homo_idx_buf_off = homo_idx_num_eq1_ + homo_idx_num_gt1_;
-        node_cur_le0->son_1st_off = 0;
-        homo_idx_num_eq1_ += homo_num;
-      } else {
-        node_cur_ge1 = son_1st_ge1 + son_pos;
-        node_cur_ge1->spl_idx = spl_idx_node;
-
-        set_homo_id_buf_offset(node_cur_ge1,
-                               (homo_idx_num_eq1_ + homo_idx_num_gt1_));
-        set_son_offset(node_cur_ge1, 0);
-        homo_idx_num_gt1_ += homo_num;
-      }
-
-      if (homo_num > 0) {
-        LemmaIdType* idx_buf = homo_idx_buf_ + homo_idx_num_eq1_ +
-              homo_idx_num_gt1_ - homo_num;
-        if (0 == level) {
-          assert(homo_num <= 65535);
-          node_cur_le0->num_of_homo = static_cast<uint16>(homo_num);
-        } else {
-          assert(homo_num <= 255);
-          node_cur_ge1->num_of_homo = (unsigned char)homo_num;
-        }
-
-        for (size_t homo_pos = 0; homo_pos < homo_num; homo_pos++) {
-          idx_buf[homo_pos] = lemma_arr_[item_start_next + homo_pos].idx_by_hz;
-        }
-
-#ifdef ___DO_STATISTICS___
-        if (homo_num > max_homobuf_len_[level])
-          max_homobuf_len_[level] = homo_num;
-
-        total_homo_num_[level] += homo_num;
-#endif
-      }
-
-      if (i - item_start_next > homo_num) {
-        void *next_parent;
-        if (0 == level)
-          next_parent = static_cast<void*>(node_cur_le0);
-        else
-          next_parent = static_cast<void*>(node_cur_ge1);
-        construct_subset(next_parent, lemma_arr,
-                         item_start_next + homo_num, i, level + 1);
-#ifdef ___DO_STATISTICS___
-
-        total_node_hasson_[level] += 1;
-        allson_noson = false;
-#endif
-      }
-
-      // for the next son
-      lma_last_start = lma_current;
-      spl_idx_node = spl_idx_current;
-      item_start_next = i;
-      homo_num = 0;
-      if (lma_current->spl_idx_arr[level + 1] == 0)
-        homo_num = 1;
-
-      son_pos++;
-    }
-  }
-
-  // 4. The last one to construct
-  LmaNodeLE0 *node_cur_le0 = NULL;  // only one of them is valid
-  LmaNodeGE1 *node_cur_ge1 = NULL;
-  if (0 == level) {
-    node_cur_le0 = son_1st_le0 + son_pos;
-    node_cur_le0->spl_idx = spl_idx_node;
-    node_cur_le0->homo_idx_buf_off = homo_idx_num_eq1_ + homo_idx_num_gt1_;
-    node_cur_le0->son_1st_off = 0;
-    homo_idx_num_eq1_ += homo_num;
-  } else {
-    node_cur_ge1 = son_1st_ge1 + son_pos;
-    node_cur_ge1->spl_idx = spl_idx_node;
-
-    set_homo_id_buf_offset(node_cur_ge1,
-                           (homo_idx_num_eq1_ + homo_idx_num_gt1_));
-    set_son_offset(node_cur_ge1, 0);
-    homo_idx_num_gt1_ += homo_num;
-  }
-
-  if (homo_num > 0) {
-    LemmaIdType* idx_buf = homo_idx_buf_ + homo_idx_num_eq1_ +
-          homo_idx_num_gt1_ - homo_num;
-    if (0 == level) {
-      assert(homo_num <= 65535);
-      node_cur_le0->num_of_homo = static_cast<uint16>(homo_num);
-    } else {
-      assert(homo_num <= 255);
-      node_cur_ge1->num_of_homo = (unsigned char)homo_num;
-    }
-
-    for (size_t homo_pos = 0; homo_pos < homo_num; homo_pos++) {
-      idx_buf[homo_pos] = lemma_arr[item_start_next + homo_pos].idx_by_hz;
-    }
-
-#ifdef ___DO_STATISTICS___
-    if (homo_num > max_homobuf_len_[level])
-      max_homobuf_len_[level] = homo_num;
-
-    total_homo_num_[level] += homo_num;
-#endif
-  }
-
-  if (item_end - item_start_next > homo_num) {
-    void *next_parent;
-    if (0 == level)
-      next_parent = static_cast<void*>(node_cur_le0);
-    else
-      next_parent = static_cast<void*>(node_cur_ge1);
-    construct_subset(next_parent, lemma_arr,
-                     item_start_next + homo_num, item_end, level + 1);
-#ifdef ___DO_STATISTICS___
-
-    total_node_hasson_[level] += 1;
-    allson_noson = false;
-#endif
-  }
-
-#ifdef ___DO_STATISTICS___
-  if (allson_noson) {
-    total_sonbuf_allnoson_[level] += 1;
-    total_node_in_sonbuf_allnoson_[level] += parent_son_num;
-  }
-#endif
-
-  assert(son_pos + 1 == parent_son_num);
-  return true;
-}
-
-#ifdef ___DO_STATISTICS___
-void DictBuilder::stat_init() {
-  memset(max_sonbuf_len_, 0, sizeof(size_t) * kMaxLemmaSize);
-  memset(max_homobuf_len_, 0, sizeof(size_t) * kMaxLemmaSize);
-  memset(total_son_num_, 0, sizeof(size_t) * kMaxLemmaSize);
-  memset(total_node_hasson_, 0, sizeof(size_t) * kMaxLemmaSize);
-  memset(total_sonbuf_num_, 0, sizeof(size_t) * kMaxLemmaSize);
-  memset(total_sonbuf_allnoson_, 0, sizeof(size_t) * kMaxLemmaSize);
-  memset(total_node_in_sonbuf_allnoson_, 0, sizeof(size_t) * kMaxLemmaSize);
-  memset(total_homo_num_, 0, sizeof(size_t) * kMaxLemmaSize);
-
-  sonbufs_num1_ = 0;
-  sonbufs_numgt1_ = 0;
-  total_lma_node_num_ = 0;
-}
-
-void DictBuilder::stat_print() {
-  printf("\n------------STAT INFO-------------\n");
-  printf("[root is layer -1]\n");
-  printf(".. max_sonbuf_len per layer(from layer 0):\n   ");
-  for (size_t i = 0; i < kMaxLemmaSize; i++)
-    printf("%d, ", max_sonbuf_len_[i]);
-  printf("-, \n");
-
-  printf(".. max_homobuf_len per layer:\n   -, ");
-  for (size_t i = 0; i < kMaxLemmaSize; i++)
-    printf("%d, ", max_homobuf_len_[i]);
-  printf("\n");
-
-  printf(".. total_son_num per layer:\n   ");
-  for (size_t i = 0; i < kMaxLemmaSize; i++)
-    printf("%d, ", total_son_num_[i]);
-  printf("-, \n");
-
-  printf(".. total_node_hasson per layer:\n   1, ");
-  for (size_t i = 0; i < kMaxLemmaSize; i++)
-    printf("%d, ", total_node_hasson_[i]);
-  printf("\n");
-
-  printf(".. total_sonbuf_num per layer:\n   ");
-  for (size_t i = 0; i < kMaxLemmaSize; i++)
-    printf("%d, ", total_sonbuf_num_[i]);
-  printf("-, \n");
-
-  printf(".. total_sonbuf_allnoson per layer:\n   ");
-  for (size_t i = 0; i < kMaxLemmaSize; i++)
-    printf("%d, ", total_sonbuf_allnoson_[i]);
-  printf("-, \n");
-
-  printf(".. total_node_in_sonbuf_allnoson per layer:\n   ");
-  for (size_t i = 0; i < kMaxLemmaSize; i++)
-    printf("%d, ", total_node_in_sonbuf_allnoson_[i]);
-  printf("-, \n");
-
-  printf(".. total_homo_num per layer:\n   0, ");
-  for (size_t i = 0; i < kMaxLemmaSize; i++)
-    printf("%d, ", total_homo_num_[i]);
-  printf("\n");
-
-  printf(".. son buf allocation number with only 1 son: %d\n", sonbufs_num1_);
-  printf(".. son buf allocation number with more than 1 son: %d\n",
-         sonbufs_numgt1_);
-  printf(".. total lemma node number: %d\n", total_lma_node_num_ + 1);
-}
-#endif  // ___DO_STATISTICS___
-
-#endif  // ___BUILD_MODEL___
-}  // namespace ime_pinyin
diff --git a/dictbuilder.h b/dictbuilder.h
deleted file mode 100644
index da0d6cd..0000000
--- a/dictbuilder.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTBUILDER_H__
-#define PINYINIME_INCLUDE_DICTBUILDER_H__
-
-#include <stdlib.h>
-#include "./utf16char.h"
-#include "./dictdef.h"
-#include "./dictlist.h"
-#include "./spellingtable.h"
-#include "./spellingtrie.h"
-#include "./splparser.h"
-
-namespace ime_pinyin {
-
-#ifdef ___BUILD_MODEL___
-
-#define ___DO_STATISTICS___
-
-class DictTrie;
-
-class DictBuilder {
- private:
-  // The raw lemma array buffer.
-  LemmaEntry *lemma_arr_;
-  size_t lemma_num_;
-
-  // Used to store all possible single char items.
-  // Two items may have the same Hanzi while their spelling ids are different.
-  SingleCharItem *scis_;
-  size_t scis_num_;
-
-  // In the tree, root's level is -1.
-  // Lemma nodes for root, and level 0
-  LmaNodeLE0 *lma_nodes_le0_;
-
-  // Lemma nodes for layers whose levels are deeper than 0
-  LmaNodeGE1 *lma_nodes_ge1_;
-
-  // Number of used lemma nodes
-  size_t lma_nds_used_num_le0_;
-  size_t lma_nds_used_num_ge1_;
-
-  // Used to store homophonies' ids.
-  LemmaIdType *homo_idx_buf_;
-  // Number of homophonies each of which only contains one Chinese character.
-  size_t homo_idx_num_eq1_;
-  // Number of homophonies each of which contains more than one character.
-  size_t homo_idx_num_gt1_;
-
-  // The items with highest scores.
-  LemmaEntry *top_lmas_;
-  size_t top_lmas_num_;
-
-  SpellingTable *spl_table_;
-  SpellingParser *spl_parser_;
-
-#ifdef ___DO_STATISTICS___
-  size_t max_sonbuf_len_[kMaxLemmaSize];
-  size_t max_homobuf_len_[kMaxLemmaSize];
-
-  size_t total_son_num_[kMaxLemmaSize];
-  size_t total_node_hasson_[kMaxLemmaSize];
-  size_t total_sonbuf_num_[kMaxLemmaSize];
-  size_t total_sonbuf_allnoson_[kMaxLemmaSize];
-  size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize];
-  size_t total_homo_num_[kMaxLemmaSize];
-
-  size_t sonbufs_num1_;     // Number of son buffer with only 1 son
-  size_t sonbufs_numgt1_;   // Number of son buffer with more 1 son;
-
-  size_t total_lma_node_num_;
-
-  void stat_init();
-  void stat_print();
-#endif
-
- public:
-
-  DictBuilder();
-  ~DictBuilder();
-
-  // Build dictionary trie from the file fn_raw. File fn_validhzs provides
-  // valid chars. If fn_validhzs is NULL, only chars in GB2312 will be
-  // included.
-  bool build_dict(const char* fn_raw, const char* fn_validhzs,
-                  DictTrie *dict_trie);
-
- private:
-  // Fill in the buffer with id. The caller guarantees that the paramters are
-  // vaild.
-  void id_to_charbuf(unsigned char *buf, LemmaIdType id);
-
-  // Update the offset of sons for a node.
-  void set_son_offset(LmaNodeGE1 *node, size_t offset);
-
-  // Update the offset of homophonies' ids for a node.
-  void set_homo_id_buf_offset(LmaNodeGE1 *node, size_t offset);
-
-  // Format a speling string.
-  void format_spelling_str(char *spl_str);
-
-  // Sort the lemma_arr by the hanzi string, and give each of unique items
-  // a id. Why we need to sort the lemma list according to their Hanzi string
-  // is to find items started by a given prefix string to do prediction.
-  // Actually, the single char items are be in other order, for example,
-  // in spelling id order, etc.
-  // Return value is next un-allocated idx available.
-  LemmaIdType sort_lemmas_by_hz();
-
-  // Build the SingleCharItem list, and fill the hanzi_scis_ids in the
-  // lemma buffer lemma_arr_.
-  // This function should be called after the lemma array is ready.
-  // Return the number of unique SingleCharItem elements.
-  size_t build_scis();
-
-  // Construct a subtree using a subset of the spelling array (from
-  // item_star to item_end)
-  // parent is the parent node to update the necessary information
-  // parent can be a member of LmaNodeLE0 or LmaNodeGE1
-  bool construct_subset(void* parent, LemmaEntry* lemma_arr,
-                        size_t item_start, size_t item_end, size_t level);
-
-
-  // Read valid Chinese Hanzis from the given file.
-  // num is used to return number of chars.
-  // The return buffer is sorted and caller needs to free the returned buffer.
-  char16* read_valid_hanzis(const char *fn_validhzs, size_t *num);
-
-
-  // Read a raw dictionary. max_item is the maximum number of items. If there
-  // are more items in the ditionary, only the first max_item will be read.
-  // Returned value is the number of items successfully read from the file.
-  size_t read_raw_dict(const char* fn_raw, const char *fn_validhzs,
-                       size_t max_item);
-
-  // Try to find if a character is in hzs buffer.
-  bool hz_in_hanzis_list(const char16 *hzs, size_t hzs_len, char16 hz);
-
-  // Try to find if all characters in str are in hzs buffer.
-  bool str_in_hanzis_list(const char16 *hzs, size_t hzs_len,
-                          const char16 *str, size_t str_len);
-
-  // Get these lemmas with toppest scores.
-  void get_top_lemmas();
-
-  // Allocate resource to build dictionary.
-  // lma_num is the number of items to be loaded
-  bool alloc_resource(size_t lma_num);
-
-  // Free resource.
-  void free_resource();
-};
-#endif  // ___BUILD_MODEL___
-}
-
-#endif  // PINYINIME_INCLUDE_DICTBUILDER_H__
diff --git a/dictlist.cpp b/dictlist.cpp
deleted file mode 100644
index 20554fc..0000000
--- a/dictlist.cpp
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include "./dictlist.h"
-#include "./mystdlib.h"
-#include "./ngram.h"
-#include "./searchutility.h"
-
-namespace ime_pinyin {
-
-DictList::DictList() {
-  initialized_ = false;
-  scis_num_ = 0;
-  scis_hz_ = NULL;
-  scis_splid_ = NULL;
-  buf_ = NULL;
-  spl_trie_ = SpellingTrie::get_cpinstance();
-
-  assert(kMaxLemmaSize == 8);
-  cmp_func_[0] = cmp_hanzis_1;
-  cmp_func_[1] = cmp_hanzis_2;
-  cmp_func_[2] = cmp_hanzis_3;
-  cmp_func_[3] = cmp_hanzis_4;
-  cmp_func_[4] = cmp_hanzis_5;
-  cmp_func_[5] = cmp_hanzis_6;
-  cmp_func_[6] = cmp_hanzis_7;
-  cmp_func_[7] = cmp_hanzis_8;
-}
-
-DictList::~DictList() {
-  free_resource();
-}
-
-bool DictList::alloc_resource(size_t buf_size, size_t scis_num) {
-  // Allocate memory
-  buf_ = static_cast<char16*>(malloc(buf_size * sizeof(char16)));
-  if (NULL == buf_)
-    return false;
-
-  scis_num_ = scis_num;
-
-  scis_hz_ = static_cast<char16*>(malloc(scis_num_ * sizeof(char16)));
-  if (NULL == scis_hz_)
-    return false;
-
-  scis_splid_ = static_cast<SpellingId*>
-      (malloc(scis_num_ * sizeof(SpellingId)));
-
-  if (NULL == scis_splid_)
-    return false;
-
-  return true;
-}
-
-void DictList::free_resource() {
-  if (NULL != buf_)
-    free(buf_);
-  buf_ = NULL;
-
-  if (NULL != scis_hz_)
-    free(scis_hz_);
-  scis_hz_ = NULL;
-
-  if (NULL != scis_splid_)
-    free(scis_splid_);
-  scis_splid_ = NULL;
-}
-
-#ifdef ___BUILD_MODEL___
-bool DictList::init_list(const SingleCharItem *scis, size_t scis_num,
-                         const LemmaEntry *lemma_arr, size_t lemma_num) {
-  if (NULL == scis || 0 == scis_num || NULL == lemma_arr || 0 == lemma_num)
-    return false;
-
-  initialized_ = false;
-
-  if (NULL != buf_)
-    free(buf_);
-
-  // calculate the size
-  size_t buf_size = calculate_size(lemma_arr, lemma_num);
-  if (0 == buf_size)
-    return false;
-
-  if (!alloc_resource(buf_size, scis_num))
-    return false;
-
-  fill_scis(scis, scis_num);
-
-  // Copy the related content from the array to inner buffer
-  fill_list(lemma_arr, lemma_num);
-
-  initialized_ = true;
-  return true;
-}
-
-size_t DictList::calculate_size(const LemmaEntry* lemma_arr, size_t lemma_num) {
-  size_t last_hz_len = 0;
-  size_t list_size = 0;
-  size_t id_num = 0;
-
-  for (size_t i = 0; i < lemma_num; i++) {
-    if (0 == i) {
-      last_hz_len = lemma_arr[i].hz_str_len;
-
-      assert(last_hz_len > 0);
-      assert(lemma_arr[0].idx_by_hz == 1);
-
-      id_num++;
-      start_pos_[0] = 0;
-      start_id_[0] = id_num;
-
-      last_hz_len = 1;
-      list_size += last_hz_len;
-    } else {
-      size_t current_hz_len = lemma_arr[i].hz_str_len;
-
-      assert(current_hz_len >= last_hz_len);
-
-      if (current_hz_len == last_hz_len) {
-          list_size += current_hz_len;
-          id_num++;
-      } else {
-        for (size_t len = last_hz_len; len < current_hz_len - 1; len++) {
-          start_pos_[len] = start_pos_[len - 1];
-          start_id_[len] = start_id_[len - 1];
-        }
-
-        start_pos_[current_hz_len - 1] = list_size;
-
-        id_num++;
-        start_id_[current_hz_len - 1] = id_num;
-
-        last_hz_len = current_hz_len;
-        list_size += current_hz_len;
-      }
-    }
-  }
-
-  for (size_t i = last_hz_len; i <= kMaxLemmaSize; i++) {
-    if (0 == i) {
-      start_pos_[0] = 0;
-      start_id_[0] = 1;
-    } else {
-      start_pos_[i] = list_size;
-      start_id_[i] = id_num;
-    }
-  }
-
-  return start_pos_[kMaxLemmaSize];
-}
-
-void DictList::fill_scis(const SingleCharItem *scis, size_t scis_num) {
-  assert(scis_num_ == scis_num);
-
-  for (size_t pos = 0; pos < scis_num_; pos++) {
-    scis_hz_[pos] = scis[pos].hz;
-    scis_splid_[pos] = scis[pos].splid;
-  }
-}
-
-void DictList::fill_list(const LemmaEntry* lemma_arr, size_t lemma_num) {
-  size_t current_pos = 0;
-
-  utf16_strncpy(buf_, lemma_arr[0].hanzi_str,
-                lemma_arr[0].hz_str_len);
-
-  current_pos = lemma_arr[0].hz_str_len;
-
-  size_t id_num = 1;
-
-  for (size_t i = 1; i < lemma_num; i++) {
-    utf16_strncpy(buf_ + current_pos, lemma_arr[i].hanzi_str,
-                  lemma_arr[i].hz_str_len);
-
-    id_num++;
-    current_pos += lemma_arr[i].hz_str_len;
-  }
-
-  assert(current_pos == start_pos_[kMaxLemmaSize]);
-  assert(id_num == start_id_[kMaxLemmaSize]);
-}
-
-char16* DictList::find_pos2_startedbyhz(char16 hz_char) {
-  char16 *found_2w = static_cast<char16*>
-                     (mybsearch(&hz_char, buf_ + start_pos_[1],
-                                (start_pos_[2] - start_pos_[1]) / 2,
-                                sizeof(char16) * 2, cmp_hanzis_1));
-  if (NULL == found_2w)
-    return NULL;
-
-  while (found_2w > buf_ + start_pos_[1] && *found_2w == *(found_2w - 1))
-    found_2w -= 2;
-
-  return found_2w;
-}
-#endif  // ___BUILD_MODEL___
-
-char16* DictList::find_pos_startedbyhzs(const char16 last_hzs[],
-    size_t word_len, int (*cmp_func)(const void *, const void *)) {
-  char16 *found_w = static_cast<char16*>
-                    (mybsearch(last_hzs, buf_ + start_pos_[word_len - 1],
-                               (start_pos_[word_len] - start_pos_[word_len - 1])
-                               / word_len,
-                               sizeof(char16) * word_len, cmp_func));
-
-  if (NULL == found_w)
-    return NULL;
-
-  while (found_w > buf_ + start_pos_[word_len -1] &&
-         cmp_func(found_w, found_w - word_len) == 0)
-    found_w -= word_len;
-
-  return found_w;
-}
-
-size_t DictList::predict(const char16 last_hzs[], uint16 hzs_len,
-                         NPredictItem *npre_items, size_t npre_max,
-                         size_t b4_used) {
-  assert(hzs_len <= kMaxPredictSize && hzs_len > 0);
-
-  // 1. Prepare work
-  int (*cmp_func)(const void *, const void *) = cmp_func_[hzs_len - 1];
-
-  NGram& ngram = NGram::get_instance();
-
-  size_t item_num = 0;
-
-  // 2. Do prediction
-  for (uint16 pre_len = 1; pre_len <= kMaxPredictSize + 1 - hzs_len;
-       pre_len++) {
-    uint16 word_len = hzs_len + pre_len;
-    char16 *w_buf = find_pos_startedbyhzs(last_hzs, word_len, cmp_func);
-    if (NULL == w_buf)
-      continue;
-    while (w_buf < buf_ + start_pos_[word_len] &&
-           cmp_func(w_buf, last_hzs) == 0 &&
-           item_num < npre_max) {
-      memset(npre_items + item_num, 0, sizeof(NPredictItem));
-      utf16_strncpy(npre_items[item_num].pre_hzs, w_buf + hzs_len, pre_len);
-      npre_items[item_num].psb =
-        ngram.get_uni_psb((size_t)(w_buf - buf_ - start_pos_[word_len - 1])
-                          / word_len + start_id_[word_len - 1]);
-      npre_items[item_num].his_len = hzs_len;
-      item_num++;
-      w_buf += word_len;
-    }
-  }
-
-  size_t new_num = 0;
-  for (size_t i = 0; i < item_num; i++) {
-    // Try to find it in the existing items
-    size_t e_pos;
-    for (e_pos = 1; e_pos <= b4_used; e_pos++) {
-      if (utf16_strncmp((*(npre_items - e_pos)).pre_hzs, npre_items[i].pre_hzs,
-                        kMaxPredictSize) == 0)
-        break;
-    }
-    if (e_pos <= b4_used)
-      continue;
-
-    // If not found, append it to the buffer
-    npre_items[new_num] = npre_items[i];
-    new_num++;
-  }
-
-  return new_num;
-}
-
-uint16 DictList::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
-                               uint16 str_max) {
-  if (!initialized_ || id_lemma >= start_id_[kMaxLemmaSize] || NULL == str_buf
-      || str_max <= 1)
-    return 0;
-
-  // Find the range
-  for (uint16 i = 0; i < kMaxLemmaSize; i++) {
-    if (i + 1 > str_max - 1)
-      return 0;
-    if (start_id_[i] <= id_lemma && start_id_[i + 1] > id_lemma) {
-      size_t id_span = id_lemma - start_id_[i];
-
-      uint16 *buf = buf_ + start_pos_[i] + id_span * (i + 1);
-      for (uint16 len = 0; len <= i; len++) {
-        str_buf[len] = buf[len];
-      }
-      str_buf[i+1] = (char16)'\0';
-      return i + 1;
-    }
-  }
-  return 0;
-}
-
-uint16 DictList::get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
-                                      uint16 *splids, uint16 max_splids) {
-  char16 *hz_found = static_cast<char16*>
-      (mybsearch(&hanzi, scis_hz_, scis_num_, sizeof(char16), cmp_hanzis_1));
-  assert(NULL != hz_found && hanzi == *hz_found);
-
-  // Move to the first one.
-  while (hz_found > scis_hz_ && hanzi == *(hz_found - 1))
-    hz_found--;
-
-  // First try to found if strict comparison result is not zero.
-  char16 *hz_f = hz_found;
-  bool strict = false;
-  while (hz_f < scis_hz_ + scis_num_ && hanzi == *hz_f) {
-    uint16 pos = hz_f - scis_hz_;
-    if (0 == half_splid || scis_splid_[pos].half_splid == half_splid) {
-      strict = true;
-    }
-    hz_f++;
-  }
-
-  uint16 found_num = 0;
-  while (hz_found < scis_hz_ + scis_num_ && hanzi == *hz_found) {
-    uint16 pos = hz_found - scis_hz_;
-    if (0 == half_splid ||
-        (strict && scis_splid_[pos].half_splid == half_splid) ||
-        (!strict && spl_trie_->half_full_compatible(half_splid,
-        scis_splid_[pos].full_splid))) {
-      assert(found_num + 1 < max_splids);
-      splids[found_num] = scis_splid_[pos].full_splid;
-      found_num++;
-    }
-    hz_found++;
-  }
-
-  return found_num;
-}
-
-LemmaIdType DictList::get_lemma_id(const char16 *str, uint16 str_len) {
-  if (NULL == str || str_len > kMaxLemmaSize)
-    return 0;
-
-  char16 *found = find_pos_startedbyhzs(str, str_len, cmp_func_[str_len - 1]);
-  if (NULL == found)
-    return 0;
-
-  assert(found > buf_);
-  assert(static_cast<size_t>(found - buf_) >= start_pos_[str_len - 1]);
-  return static_cast<LemmaIdType>
-      (start_id_[str_len - 1] +
-       (found - buf_ - start_pos_[str_len - 1]) / str_len);
-}
-
-void DictList::convert_to_hanzis(char16 *str, uint16 str_len) {
-  assert(NULL != str);
-
-  for (uint16 str_pos = 0; str_pos < str_len; str_pos++) {
-    str[str_pos] = scis_hz_[str[str_pos]];
-  }
-}
-
-void DictList::convert_to_scis_ids(char16 *str, uint16 str_len) {
-  assert(NULL != str);
-
-  for (uint16 str_pos = 0; str_pos < str_len; str_pos++) {
-    str[str_pos] = 0x100;
-  }
-}
-
-bool DictList::save_list(FILE *fp) {
-  if (!initialized_ || NULL == fp)
-    return false;
-
-  if (NULL == buf_ || 0 == start_pos_[kMaxLemmaSize] ||
-      NULL == scis_hz_ || NULL == scis_splid_ || 0 == scis_num_)
-    return false;
-
-  if (fwrite(&scis_num_, sizeof(size_t), 1, fp) != 1)
-    return false;
-
-  if (fwrite(start_pos_, sizeof(size_t), kMaxLemmaSize + 1, fp) !=
-      kMaxLemmaSize + 1)
-    return false;
-
-  if (fwrite(start_id_, sizeof(size_t), kMaxLemmaSize + 1, fp) !=
-      kMaxLemmaSize + 1)
-    return false;
-
-  if (fwrite(scis_hz_, sizeof(char16), scis_num_, fp) != scis_num_)
-    return false;
-
-  if (fwrite(scis_splid_, sizeof(SpellingId), scis_num_, fp) != scis_num_)
-    return false;
-
-  if (fwrite(buf_, sizeof(char16), start_pos_[kMaxLemmaSize], fp) !=
-      start_pos_[kMaxLemmaSize])
-    return false;
-
-  return true;
-}
-
-bool DictList::load_list(FILE *fp) {
-  if (NULL == fp)
-    return false;
-
-  initialized_ = false;
-
-  if (fread(&scis_num_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fread(start_pos_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
-      kMaxLemmaSize + 1)
-    return false;
-
-  if (fread(start_id_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
-      kMaxLemmaSize + 1)
-    return false;
-
-  free_resource();
-
-  if (!alloc_resource(start_pos_[kMaxLemmaSize], scis_num_))
-    return false;
-
-  if (fread(scis_hz_, sizeof(char16), scis_num_, fp) != scis_num_)
-    return false;
-
-  if (fread(scis_splid_, sizeof(SpellingId), scis_num_, fp) != scis_num_)
-    return false;
-
-  if (fread(buf_, sizeof(char16), start_pos_[kMaxLemmaSize], fp) !=
-      start_pos_[kMaxLemmaSize])
-    return false;
-
-  initialized_ = true;
-  return true;
-}
-}  // namespace ime_pinyin
diff --git a/dictlist.h b/dictlist.h
deleted file mode 100644
index b0eb2d0..0000000
--- a/dictlist.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTLIST_H__
-#define PINYINIME_INCLUDE_DICTLIST_H__
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "./dictdef.h"
-#include "./searchutility.h"
-#include "./spellingtrie.h"
-#include "./utf16char.h"
-
-namespace ime_pinyin {
-
-class DictList {
- private:
-  bool initialized_;
-
-  const SpellingTrie *spl_trie_;
-
-  // Number of SingCharItem. The first is blank, because id 0 is invalid.
-  size_t scis_num_;
-  char16 *scis_hz_;
-  SpellingId *scis_splid_;
-
-  // The large memory block to store the word list.
-  char16 *buf_;
-
-  // Starting position of those words whose lengths are i+1, counted in
-  // char16
-  uint32 start_pos_[kMaxLemmaSize + 1];
-
-  uint32 start_id_[kMaxLemmaSize + 1];
-
-  int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
-
-  bool alloc_resource(size_t buf_size, size_t scim_num);
-
-  void free_resource();
-
-#ifdef ___BUILD_MODEL___
-  // Calculate the requsted memory, including the start_pos[] buffer.
-  size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num);
-
-  void fill_scis(const SingleCharItem *scis, size_t scis_num);
-
-  // Copy the related content to the inner buffer
-  // It should be called after calculate_size()
-  void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num);
-
-  // Find the starting position for the buffer of those 2-character Chinese word
-  // whose first character is the given Chinese character.
-  char16* find_pos2_startedbyhz(char16 hz_char);
-#endif
-
-  // Find the starting position for the buffer of those words whose lengths are
-  // word_len. The given parameter cmp_func decides how many characters from
-  // beginning will be used to compare.
-  char16* find_pos_startedbyhzs(const char16 last_hzs[],
-                                size_t word_Len,
-                                int (*cmp_func)(const void *, const void *));
-
- public:
-
-  DictList();
-  ~DictList();
-
-  bool save_list(FILE *fp);
-  bool load_list(FILE *fp);
-
-#ifdef ___BUILD_MODEL___
-  // Init the list from the LemmaEntry array.
-  // lemma_arr should have been sorted by the hanzi_str, and have been given
-  // ids from 1
-  bool init_list(const SingleCharItem *scis, size_t scis_num,
-                 const LemmaEntry *lemma_arr, size_t lemma_num);
-#endif
-
-  // Get the hanzi string for the given id
-  uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
-
-  void convert_to_hanzis(char16 *str, uint16 str_len);
-
-  void convert_to_scis_ids(char16 *str, uint16 str_len);
-
-  // last_hzs stores the last n Chinese characters history, its length should be
-  // less or equal than kMaxPredictSize.
-  // hzs_len specifies the length(<= kMaxPredictSize).
-  // predict_buf is used to store the result.
-  // buf_len specifies the buffer length.
-  // b4_used specifies how many items before predict_buf have been used.
-  // Returned value is the number of newly added items.
-  size_t predict(const char16 last_hzs[], uint16 hzs_len,
-                 NPredictItem *npre_items, size_t npre_max,
-                 size_t b4_used);
-
-  // If half_splid is a valid half spelling id, return those full spelling
-  // ids which share this half id.
-  uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
-                              uint16 *splids, uint16 max_splids);
-
-  LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
-};
-}
-
-#endif  // PINYINIME_INCLUDE_DICTLIST_H__
diff --git a/dicttrie.cpp b/dicttrie.cpp
deleted file mode 100644
index 8ee3678..0000000
--- a/dicttrie.cpp
+++ /dev/null
@@ -1,941 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include "./dicttrie.h"
-#include "./dictbuilder.h"
-#include "./lpicache.h"
-#include "./mystdlib.h"
-#include "./ngram.h"
-
-namespace ime_pinyin {
-
-DictTrie::DictTrie() {
-  spl_trie_ = SpellingTrie::get_cpinstance();
-
-  root_ = NULL;
-  splid_le0_index_ = NULL;
-  lma_node_num_le0_ = 0;
-  nodes_ge1_ = NULL;
-  lma_node_num_ge1_ = 0;
-  lma_idx_buf_ = NULL;
-  lma_idx_buf_len_ = 0;
-  total_lma_num_ = 0;
-  top_lmas_num_ = 0;
-  dict_list_ = NULL;
-
-  parsing_marks_ = NULL;
-  mile_stones_ = NULL;
-  reset_milestones(0, kFirstValidMileStoneHandle);
-}
-
-DictTrie::~DictTrie() {
-  free_resource(true);
-}
-
-void DictTrie::free_resource(bool free_dict_list) {
-  if (NULL != root_)
-    free(root_);
-  root_ = NULL;
-
-  if (NULL != splid_le0_index_)
-    free(splid_le0_index_);
-  splid_le0_index_ = NULL;
-
-  if (NULL != nodes_ge1_)
-    free(nodes_ge1_);
-  nodes_ge1_ = NULL;
-
-  if (NULL != nodes_ge1_)
-    free(nodes_ge1_);
-  nodes_ge1_ = NULL;
-
-  if (free_dict_list) {
-    if (NULL != dict_list_) {
-      delete dict_list_;
-    }
-    dict_list_ = NULL;
-  }
-
-  if (parsing_marks_)
-    delete [] parsing_marks_;
-  parsing_marks_ = NULL;
-
-  if (mile_stones_)
-    delete [] mile_stones_;
-  mile_stones_ = NULL;
-
-  reset_milestones(0, kFirstValidMileStoneHandle);
-}
-
-inline size_t DictTrie::get_son_offset(const LmaNodeGE1 *node) {
-  return ((size_t)node->son_1st_off_l + ((size_t)node->son_1st_off_h << 16));
-}
-
-inline size_t DictTrie::get_homo_idx_buf_offset(const LmaNodeGE1 *node) {
-  return ((size_t)node->homo_idx_buf_off_l +
-          ((size_t)node->homo_idx_buf_off_h << 16));
-}
-
-inline LemmaIdType DictTrie::get_lemma_id(size_t id_offset) {
-  LemmaIdType id = 0;
-  for (uint16 pos = kLemmaIdSize - 1; pos > 0; pos--)
-    id = (id << 8) + lma_idx_buf_[id_offset * kLemmaIdSize + pos];
-  id = (id << 8) + lma_idx_buf_[id_offset * kLemmaIdSize];
-  return id;
-}
-
-#ifdef ___BUILD_MODEL___
-bool DictTrie::build_dict(const char* fn_raw, const char* fn_validhzs) {
-  DictBuilder* dict_builder = new DictBuilder();
-
-  free_resource(true);
-
-  return dict_builder->build_dict(fn_raw, fn_validhzs, this);
-}
-
-bool DictTrie::save_dict(FILE *fp) {
-  if (NULL == fp)
-    return false;
-
-  if (fwrite(&lma_node_num_le0_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fwrite(&lma_node_num_ge1_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fwrite(&lma_idx_buf_len_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fwrite(&top_lmas_num_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fwrite(root_, sizeof(LmaNodeLE0), lma_node_num_le0_, fp)
-      != lma_node_num_le0_)
-    return false;
-
-  if (fwrite(nodes_ge1_, sizeof(LmaNodeGE1), lma_node_num_ge1_, fp)
-      != lma_node_num_ge1_)
-    return false;
-
-  if (fwrite(lma_idx_buf_, sizeof(unsigned char), lma_idx_buf_len_, fp) !=
-      lma_idx_buf_len_)
-    return false;
-
-  return true;
-}
-
-bool DictTrie::save_dict(const char *filename) {
-  if (NULL == filename)
-    return false;
-
-  if (NULL == root_ || NULL == dict_list_)
-    return false;
-
-  SpellingTrie &spl_trie = SpellingTrie::get_instance();
-  NGram &ngram = NGram::get_instance();
-
-  FILE *fp = fopen(filename, "wb");
-  if (NULL == fp)
-    return false;
-
-  if (!spl_trie.save_spl_trie(fp) || !dict_list_->save_list(fp) ||
-      !save_dict(fp) || !ngram.save_ngram(fp)) {
-    fclose(fp);
-    return false;
-  }
-
-  fclose(fp);
-  return true;
-}
-#endif  // ___BUILD_MODEL___
-
-bool DictTrie::load_dict(FILE *fp) {
-  if (NULL == fp)
-    return false;
-  if (fread(&lma_node_num_le0_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fread(&lma_node_num_ge1_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fread(&lma_idx_buf_len_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fread(&top_lmas_num_, sizeof(uint32), 1, fp) != 1 ||
-     top_lmas_num_ >= lma_idx_buf_len_)
-     return false;
-
-  free_resource(false);
-
-  root_ = static_cast<LmaNodeLE0*>
-          (malloc(lma_node_num_le0_ * sizeof(LmaNodeLE0)));
-  nodes_ge1_ = static_cast<LmaNodeGE1*>
-               (malloc(lma_node_num_ge1_ * sizeof(LmaNodeGE1)));
-  lma_idx_buf_ = (unsigned char*)malloc(lma_idx_buf_len_);
-  total_lma_num_ = lma_idx_buf_len_ / kLemmaIdSize;
-
-  size_t buf_size = SpellingTrie::get_instance().get_spelling_num() + 1;
-  assert(lma_node_num_le0_ <= buf_size);
-  splid_le0_index_ = static_cast<uint16*>(malloc(buf_size * sizeof(uint16)));
-
-  // Init the space for parsing.
-  parsing_marks_ = new ParsingMark[kMaxParsingMark];
-  mile_stones_ = new MileStone[kMaxMileStone];
-  reset_milestones(0, kFirstValidMileStoneHandle);
-
-  if (NULL == root_ || NULL == nodes_ge1_ || NULL == lma_idx_buf_ ||
-      NULL == splid_le0_index_ || NULL == parsing_marks_ ||
-      NULL == mile_stones_) {
-    free_resource(false);
-    return false;
-  }
-
-  if (fread(root_, sizeof(LmaNodeLE0), lma_node_num_le0_, fp)
-      != lma_node_num_le0_)
-    return false;
-
-  if (fread(nodes_ge1_, sizeof(LmaNodeGE1), lma_node_num_ge1_, fp)
-      != lma_node_num_ge1_)
-    return false;
-
-  if (fread(lma_idx_buf_, sizeof(unsigned char), lma_idx_buf_len_, fp) !=
-      lma_idx_buf_len_)
-    return false;
-
-  // The quick index for the first level sons
-  uint16 last_splid = kFullSplIdStart;
-  size_t last_pos = 0;
-  for (size_t i = 1; i < lma_node_num_le0_; i++) {
-    for (uint16 splid = last_splid; splid < root_[i].spl_idx; splid++)
-      splid_le0_index_[splid - kFullSplIdStart] = last_pos;
-
-    splid_le0_index_[root_[i].spl_idx - kFullSplIdStart] =
-        static_cast<uint16>(i);
-    last_splid = root_[i].spl_idx;
-    last_pos = i;
-  }
-
-  for (uint16 splid = last_splid + 1;
-       splid < buf_size + kFullSplIdStart; splid++) {
-    assert(static_cast<size_t>(splid - kFullSplIdStart) < buf_size);
-    splid_le0_index_[splid - kFullSplIdStart] = last_pos + 1;
-  }
-
-  return true;
-}
-
-bool DictTrie::load_dict(const char *filename, LemmaIdType start_id,
-                         LemmaIdType end_id) {
-  if (NULL == filename || end_id <= start_id)
-    return false;
-
-  FILE *fp = fopen(filename, "rb");
-  if (NULL == fp)
-    return false;
-
-  free_resource(true);
-
-  dict_list_ = new DictList();
-  if (NULL == dict_list_) {
-    fclose(fp);
-    return false;
-  }
-
-  SpellingTrie &spl_trie = SpellingTrie::get_instance();
-  NGram &ngram = NGram::get_instance();
-
-  if (!spl_trie.load_spl_trie(fp) || !dict_list_->load_list(fp) ||
-      !load_dict(fp) || !ngram.load_ngram(fp) ||
-      total_lma_num_ > end_id - start_id + 1) {
-    free_resource(true);
-    fclose(fp);
-    return false;
-  }
-
-  fclose(fp);
-  return true;
-}
-
-bool DictTrie::load_dict_fd(int sys_fd, long start_offset,
-                            long length, LemmaIdType start_id,
-                            LemmaIdType end_id) {
-  if (start_offset < 0 || length <= 0 || end_id <= start_id)
-    return false;
-
-  FILE *fp = fdopen(sys_fd, "rb");
-  if (NULL == fp)
-    return false;
-
-  if (-1 == fseek(fp, start_offset, SEEK_SET)) {
-    fclose(fp);
-    return false;
-  }
-
-  free_resource(true);
-
-  dict_list_ = new DictList();
-  if (NULL == dict_list_) {
-    fclose(fp);
-    return false;
-  }
-
-  SpellingTrie &spl_trie = SpellingTrie::get_instance();
-  NGram &ngram = NGram::get_instance();
-
-  if (!spl_trie.load_spl_trie(fp) || !dict_list_->load_list(fp) ||
-      !load_dict(fp) || !ngram.load_ngram(fp) ||
-      ftell(fp) < start_offset + length ||
-      total_lma_num_ > end_id - start_id + 1) {
-    free_resource(true);
-    fclose(fp);
-    return false;
-  }
-
-  fclose(fp);
-  return true;
-}
-
-size_t DictTrie::fill_lpi_buffer(LmaPsbItem lpi_items[], size_t lpi_max,
-                                 LmaNodeLE0 *node) {
-  size_t lpi_num = 0;
-  NGram& ngram = NGram::get_instance();
-  for (size_t homo = 0; homo < (size_t)node->num_of_homo; homo++) {
-    lpi_items[lpi_num].id = get_lemma_id(node->homo_idx_buf_off +
-                                         homo);
-    lpi_items[lpi_num].lma_len = 1;
-    lpi_items[lpi_num].psb =
-        static_cast<LmaScoreType>(ngram.get_uni_psb(lpi_items[lpi_num].id));
-    lpi_num++;
-    if (lpi_num >= lpi_max)
-      break;
-  }
-
-  return lpi_num;
-}
-
-size_t DictTrie::fill_lpi_buffer(LmaPsbItem lpi_items[], size_t lpi_max,
-                                 size_t homo_buf_off, LmaNodeGE1 *node,
-                                 uint16 lma_len) {
-  size_t lpi_num = 0;
-  NGram& ngram = NGram::get_instance();
-  for (size_t homo = 0; homo < (size_t)node->num_of_homo; homo++) {
-    lpi_items[lpi_num].id = get_lemma_id(homo_buf_off + homo);
-    lpi_items[lpi_num].lma_len = lma_len;
-    lpi_items[lpi_num].psb =
-        static_cast<LmaScoreType>(ngram.get_uni_psb(lpi_items[lpi_num].id));
-    lpi_num++;
-    if (lpi_num >= lpi_max)
-      break;
-  }
-
-  return lpi_num;
-}
-
-void DictTrie::reset_milestones(uint16 from_step, MileStoneHandle from_handle) {
-  if (0 == from_step) {
-    parsing_marks_pos_ = 0;
-    mile_stones_pos_ = kFirstValidMileStoneHandle;
-  } else {
-    if (from_handle > 0 && from_handle < mile_stones_pos_) {
-      mile_stones_pos_ = from_handle;
-
-      MileStone *mile_stone = mile_stones_ + from_handle;
-      parsing_marks_pos_ = mile_stone->mark_start;
-    }
-  }
-}
-
-MileStoneHandle DictTrie::extend_dict(MileStoneHandle from_handle,
-                                      const DictExtPara *dep,
-                                      LmaPsbItem *lpi_items, size_t lpi_max,
-                                      size_t *lpi_num) {
-  if (NULL == dep)
-    return 0;
-
-  // from LmaNodeLE0 (root) to LmaNodeLE0
-  if (0 == from_handle) {
-    assert(0 == dep->splids_extended);
-    return extend_dict0(from_handle, dep, lpi_items, lpi_max, lpi_num);
-  }
-
-  // from LmaNodeLE0 to LmaNodeGE1
-  if (1 == dep->splids_extended)
-    return extend_dict1(from_handle, dep, lpi_items, lpi_max, lpi_num);
-
-  // From LmaNodeGE1 to LmaNodeGE1
-  return extend_dict2(from_handle, dep, lpi_items, lpi_max, lpi_num);
-}
-
-MileStoneHandle DictTrie::extend_dict0(MileStoneHandle from_handle,
-                                       const DictExtPara *dep,
-                                       LmaPsbItem *lpi_items,
-                                       size_t lpi_max, size_t *lpi_num) {
-  assert(NULL != dep && 0 == from_handle);
-  *lpi_num = 0;
-  MileStoneHandle ret_handle = 0;
-
-  uint16 splid = dep->splids[dep->splids_extended];
-  uint16 id_start = dep->id_start;
-  uint16 id_num = dep->id_num;
-
-  LpiCache& lpi_cache = LpiCache::get_instance();
-  bool cached = lpi_cache.is_cached(splid);
-
-  // 2. Begin exgtending
-  // 2.1 Get the LmaPsbItem list
-  LmaNodeLE0 *node = root_;
-  size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
-  size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart];
-  for (size_t son_pos = son_start; son_pos < son_end; son_pos++) {
-    assert(1 == node->son_1st_off);
-    LmaNodeLE0 *son = root_ + son_pos;
-    assert(son->spl_idx >= id_start && son->spl_idx < id_start + id_num);
-
-    if (!cached && *lpi_num < lpi_max) {
-      bool need_lpi = true;
-      if (spl_trie_->is_half_id_yunmu(splid) && son_pos != son_start)
-        need_lpi = false;
-
-      if (need_lpi)
-        *lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num),
-                                    lpi_max - *lpi_num, son);
-    }
-
-    // If necessary, fill in a new mile stone.
-    if (son->spl_idx == id_start) {
-      if (mile_stones_pos_ < kMaxMileStone &&
-          parsing_marks_pos_ < kMaxParsingMark) {
-        parsing_marks_[parsing_marks_pos_].node_offset = son_pos;
-        parsing_marks_[parsing_marks_pos_].node_num = id_num;
-        mile_stones_[mile_stones_pos_].mark_start = parsing_marks_pos_;
-        mile_stones_[mile_stones_pos_].mark_num = 1;
-        ret_handle = mile_stones_pos_;
-        parsing_marks_pos_++;
-        mile_stones_pos_++;
-      }
-    }
-
-    if (son->spl_idx >= id_start + id_num -1)
-      break;
-  }
-
-  //  printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
-  //      mile_stones_pos_);
-  return ret_handle;
-}
-
-MileStoneHandle DictTrie::extend_dict1(MileStoneHandle from_handle,
-                                       const DictExtPara *dep,
-                                       LmaPsbItem *lpi_items,
-                                       size_t lpi_max, size_t *lpi_num) {
-  assert(NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_);
-
-  MileStoneHandle ret_handle = 0;
-
-  // 1. If this is a half Id, get its corresponding full starting Id and
-  // number of full Id.
-  size_t ret_val = 0;
-
-  uint16 id_start = dep->id_start;
-  uint16 id_num = dep->id_num;
-
-  // 2. Begin extending.
-  MileStone *mile_stone = mile_stones_ + from_handle;
-
-  for (uint16 h_pos = 0; h_pos < mile_stone->mark_num; h_pos++) {
-    ParsingMark p_mark = parsing_marks_[mile_stone->mark_start + h_pos];
-    uint16 ext_num = p_mark.node_num;
-    for (uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++) {
-      LmaNodeLE0 *node = root_ + p_mark.node_offset + ext_pos;
-      size_t found_start = 0;
-      size_t found_num = 0;
-      for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
-        assert(node->son_1st_off <= lma_node_num_ge1_);
-        LmaNodeGE1 *son = nodes_ge1_ + node->son_1st_off + son_pos;
-        if (son->spl_idx >= id_start
-            && son->spl_idx < id_start + id_num) {
-          if (*lpi_num < lpi_max) {
-            size_t homo_buf_off = get_homo_idx_buf_offset(son);
-            *lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num),
-                                        lpi_max - *lpi_num, homo_buf_off, son,
-                                        2);
-          }
-
-          // If necessary, fill in the new DTMI
-          if (0 == found_num) {
-            found_start = son_pos;
-          }
-          found_num++;
-        }
-        if (son->spl_idx >= id_start + id_num - 1 || son_pos ==
-            (size_t)node->num_of_son - 1) {
-          if (found_num > 0) {
-            if (mile_stones_pos_ < kMaxMileStone &&
-                parsing_marks_pos_ < kMaxParsingMark) {
-              parsing_marks_[parsing_marks_pos_].node_offset =
-                node->son_1st_off + found_start;
-              parsing_marks_[parsing_marks_pos_].node_num = found_num;
-              if (0 == ret_val)
-                mile_stones_[mile_stones_pos_].mark_start =
-                  parsing_marks_pos_;
-              parsing_marks_pos_++;
-            }
-
-            ret_val++;
-          }
-          break;
-        }  // for son_pos
-      }  // for ext_pos
-    }  // for h_pos
-  }
-
-  if (ret_val > 0) {
-    mile_stones_[mile_stones_pos_].mark_num = ret_val;
-    ret_handle = mile_stones_pos_;
-    mile_stones_pos_++;
-    ret_val = 1;
-  }
-
-  //  printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
-  //         mile_stones_pos_);
-  return ret_handle;
-}
-
-MileStoneHandle DictTrie::extend_dict2(MileStoneHandle from_handle,
-                                       const DictExtPara *dep,
-                                       LmaPsbItem *lpi_items,
-                                       size_t lpi_max, size_t *lpi_num) {
-  assert(NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_);
-
-  MileStoneHandle ret_handle = 0;
-
-  // 1. If this is a half Id, get its corresponding full starting Id and
-  // number of full Id.
-  size_t ret_val = 0;
-
-  uint16 id_start = dep->id_start;
-  uint16 id_num = dep->id_num;
-
-  // 2. Begin extending.
-  MileStone *mile_stone = mile_stones_ + from_handle;
-
-  for (uint16 h_pos = 0; h_pos < mile_stone->mark_num; h_pos++) {
-    ParsingMark p_mark = parsing_marks_[mile_stone->mark_start + h_pos];
-    uint16 ext_num = p_mark.node_num;
-    for (uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++) {
-      LmaNodeGE1 *node = nodes_ge1_ + p_mark.node_offset + ext_pos;
-      size_t found_start = 0;
-      size_t found_num = 0;
-
-      for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
-        assert(node->son_1st_off_l > 0 || node->son_1st_off_h > 0);
-        LmaNodeGE1 *son = nodes_ge1_ + get_son_offset(node) + son_pos;
-        if (son->spl_idx >= id_start
-            && son->spl_idx < id_start + id_num) {
-          if (*lpi_num < lpi_max) {
-            size_t homo_buf_off = get_homo_idx_buf_offset(son);
-            *lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num),
-                                        lpi_max - *lpi_num, homo_buf_off, son,
-                                        dep->splids_extended + 1);
-          }
-
-          // If necessary, fill in the new DTMI
-          if (0 == found_num) {
-            found_start = son_pos;
-          }
-          found_num++;
-        }
-        if (son->spl_idx >= id_start + id_num - 1 || son_pos ==
-            (size_t)node->num_of_son - 1) {
-          if (found_num > 0) {
-            if (mile_stones_pos_ < kMaxMileStone &&
-                parsing_marks_pos_ < kMaxParsingMark) {
-              parsing_marks_[parsing_marks_pos_].node_offset =
-                get_son_offset(node) + found_start;
-              parsing_marks_[parsing_marks_pos_].node_num = found_num;
-              if (0 == ret_val)
-                mile_stones_[mile_stones_pos_].mark_start =
-                  parsing_marks_pos_;
-              parsing_marks_pos_++;
-            }
-
-            ret_val++;
-          }
-          break;
-        }
-      }  // for son_pos
-    }  // for ext_pos
-  }  // for h_pos
-
-  if (ret_val > 0) {
-    mile_stones_[mile_stones_pos_].mark_num = ret_val;
-    ret_handle = mile_stones_pos_;
-    mile_stones_pos_++;
-  }
-
-  // printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
-  //        mile_stones_pos_);
-  return ret_handle;
-}
-
-bool DictTrie::try_extend(const uint16 *splids, uint16 splid_num,
-                          LemmaIdType id_lemma) {
-  if (0 == splid_num || NULL == splids)
-    return false;
-
-  void *node = root_ + splid_le0_index_[splids[0] - kFullSplIdStart];
-
-  for (uint16 pos = 1; pos < splid_num; pos++) {
-    if (1 == pos) {
-      LmaNodeLE0 *node_le0 = reinterpret_cast<LmaNodeLE0*>(node);
-      LmaNodeGE1 *node_son;
-      uint16 son_pos;
-      for (son_pos = 0; son_pos < static_cast<uint16>(node_le0->num_of_son);
-           son_pos++) {
-        assert(node_le0->son_1st_off <= lma_node_num_ge1_);
-        node_son = nodes_ge1_ + node_le0->son_1st_off
-            + son_pos;
-        if (node_son->spl_idx == splids[pos])
-          break;
-      }
-      if (son_pos < node_le0->num_of_son)
-        node = reinterpret_cast<void*>(node_son);
-      else
-        return false;
-    } else {
-      LmaNodeGE1 *node_ge1 = reinterpret_cast<LmaNodeGE1*>(node);
-      LmaNodeGE1 *node_son;
-      uint16 son_pos;
-      for (son_pos = 0; son_pos < static_cast<uint16>(node_ge1->num_of_son);
-           son_pos++) {
-        assert(node_ge1->son_1st_off_l > 0 || node_ge1->son_1st_off_h > 0);
-        node_son = nodes_ge1_ + get_son_offset(node_ge1) + son_pos;
-        if (node_son->spl_idx == splids[pos])
-          break;
-      }
-      if (son_pos < node_ge1->num_of_son)
-        node = reinterpret_cast<void*>(node_son);
-      else
-        return false;
-    }
-  }
-
-  if (1 == splid_num) {
-    LmaNodeLE0* node_le0 = reinterpret_cast<LmaNodeLE0*>(node);
-    size_t num_of_homo = (size_t)node_le0->num_of_homo;
-    for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
-      LemmaIdType id_this = get_lemma_id(node_le0->homo_idx_buf_off + homo_pos);
-      char16 str[2];
-      get_lemma_str(id_this, str, 2);
-      if (id_this == id_lemma)
-        return true;
-    }
-  } else {
-    LmaNodeGE1* node_ge1 = reinterpret_cast<LmaNodeGE1*>(node);
-    size_t num_of_homo = (size_t)node_ge1->num_of_homo;
-    for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
-      size_t node_homo_off = get_homo_idx_buf_offset(node_ge1);
-      if (get_lemma_id(node_homo_off + homo_pos) == id_lemma)
-        return true;
-    }
-  }
-
-  return false;
-}
-
-size_t DictTrie::get_lpis(const uint16* splid_str, uint16 splid_str_len,
-                          LmaPsbItem* lma_buf, size_t max_lma_buf) {
-  if (splid_str_len > kMaxLemmaSize)
-    return 0;
-
-#define MAX_EXTENDBUF_LEN 200
-
-  size_t* node_buf1[MAX_EXTENDBUF_LEN];  // use size_t for data alignment
-  size_t* node_buf2[MAX_EXTENDBUF_LEN];
-  LmaNodeLE0** node_fr_le0 =
-    reinterpret_cast<LmaNodeLE0**>(node_buf1);      // Nodes from.
-  LmaNodeLE0** node_to_le0 =
-    reinterpret_cast<LmaNodeLE0**>(node_buf2);      // Nodes to.
-  LmaNodeGE1** node_fr_ge1 = NULL;
-  LmaNodeGE1** node_to_ge1 = NULL;
-  size_t node_fr_num = 1;
-  size_t node_to_num = 0;
-  node_fr_le0[0] = root_;
-  if (NULL == node_fr_le0[0])
-    return 0;
-
-  size_t spl_pos = 0;
-
-  while (spl_pos < splid_str_len) {
-    uint16 id_num = 1;
-    uint16 id_start = splid_str[spl_pos];
-    // If it is a half id
-    if (spl_trie_->is_half_id(splid_str[spl_pos])) {
-      id_num = spl_trie_->half_to_full(splid_str[spl_pos], &id_start);
-      assert(id_num > 0);
-    }
-
-    // Extend the nodes
-    if (0 == spl_pos) {  // From LmaNodeLE0 (root) to LmaNodeLE0 nodes
-      for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
-        LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
-        assert(node == root_ && 1 == node_fr_num);
-        size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
-        size_t son_end =
-            splid_le0_index_[id_start + id_num - kFullSplIdStart];
-        for (size_t son_pos = son_start; son_pos < son_end; son_pos++) {
-          assert(1 == node->son_1st_off);
-          LmaNodeLE0 *node_son = root_ + son_pos;
-          assert(node_son->spl_idx >= id_start
-                 && node_son->spl_idx < id_start + id_num);
-          if (node_to_num < MAX_EXTENDBUF_LEN) {
-            node_to_le0[node_to_num] = node_son;
-            node_to_num++;
-          }
-          // id_start + id_num - 1 is the last one, which has just been
-          // recorded.
-          if (node_son->spl_idx >= id_start + id_num - 1)
-            break;
-        }
-      }
-
-      spl_pos++;
-      if (spl_pos >= splid_str_len || node_to_num == 0)
-        break;
-      // Prepare the nodes for next extending
-      // next time, from LmaNodeLE0 to LmaNodeGE1
-      LmaNodeLE0** node_tmp = node_fr_le0;
-      node_fr_le0 = node_to_le0;
-      node_to_le0 = NULL;
-      node_to_ge1 = reinterpret_cast<LmaNodeGE1**>(node_tmp);
-    } else if (1 == spl_pos) {  // From LmaNodeLE0 to LmaNodeGE1 nodes
-      for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
-        LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
-        for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son;
-             son_pos++) {
-          assert(node->son_1st_off <= lma_node_num_ge1_);
-          LmaNodeGE1 *node_son = nodes_ge1_ + node->son_1st_off
-                                  + son_pos;
-          if (node_son->spl_idx >= id_start
-              && node_son->spl_idx < id_start + id_num) {
-            if (node_to_num < MAX_EXTENDBUF_LEN) {
-              node_to_ge1[node_to_num] = node_son;
-              node_to_num++;
-            }
-          }
-          // id_start + id_num - 1 is the last one, which has just been
-          // recorded.
-          if (node_son->spl_idx >= id_start + id_num - 1)
-            break;
-        }
-      }
-
-      spl_pos++;
-      if (spl_pos >= splid_str_len || node_to_num == 0)
-        break;
-      // Prepare the nodes for next extending
-      // next time, from LmaNodeGE1 to LmaNodeGE1
-      node_fr_ge1 = node_to_ge1;
-      node_to_ge1 = reinterpret_cast<LmaNodeGE1**>(node_fr_le0);
-      node_fr_le0 = NULL;
-      node_to_le0 = NULL;
-    } else {  // From LmaNodeGE1 to LmaNodeGE1 nodes
-      for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
-        LmaNodeGE1 *node = node_fr_ge1[node_fr_pos];
-        for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son;
-             son_pos++) {
-          assert(node->son_1st_off_l > 0 || node->son_1st_off_h > 0);
-          LmaNodeGE1 *node_son = nodes_ge1_
-                                  + get_son_offset(node) + son_pos;
-          if (node_son->spl_idx >= id_start
-              && node_son->spl_idx < id_start + id_num) {
-            if (node_to_num < MAX_EXTENDBUF_LEN) {
-              node_to_ge1[node_to_num] = node_son;
-              node_to_num++;
-            }
-          }
-          // id_start + id_num - 1 is the last one, which has just been
-          // recorded.
-          if (node_son->spl_idx >= id_start + id_num - 1)
-            break;
-        }
-      }
-
-      spl_pos++;
-      if (spl_pos >= splid_str_len || node_to_num == 0)
-        break;
-      // Prepare the nodes for next extending
-      // next time, from LmaNodeGE1 to LmaNodeGE1
-      LmaNodeGE1 **node_tmp = node_fr_ge1;
-      node_fr_ge1 = node_to_ge1;
-      node_to_ge1 = node_tmp;
-    }
-
-    // The number of node for next extending
-    node_fr_num = node_to_num;
-    node_to_num = 0;
-  }  // while
-
-  if (0 == node_to_num)
-    return 0;
-
-  NGram &ngram = NGram::get_instance();
-  size_t lma_num = 0;
-
-  // If the length is 1, and the splid is a one-char Yunmu like 'a', 'o', 'e',
-  // only those candidates for the full matched one-char id will be returned.
-  if (1 == splid_str_len && spl_trie_->is_half_id_yunmu(splid_str[0]))
-    node_to_num = node_to_num > 0 ? 1 : 0;
-
-  for (size_t node_pos = 0; node_pos < node_to_num; node_pos++) {
-    size_t num_of_homo = 0;
-    if (spl_pos <= 1) {  // Get from LmaNodeLE0 nodes
-      LmaNodeLE0* node_le0 = node_to_le0[node_pos];
-      num_of_homo = (size_t)node_le0->num_of_homo;
-      for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
-        size_t ch_pos = lma_num + homo_pos;
-        lma_buf[ch_pos].id =
-            get_lemma_id(node_le0->homo_idx_buf_off + homo_pos);
-        lma_buf[ch_pos].lma_len = 1;
-        lma_buf[ch_pos].psb =
-            static_cast<LmaScoreType>(ngram.get_uni_psb(lma_buf[ch_pos].id));
-
-        if (lma_num + homo_pos >= max_lma_buf - 1)
-          break;
-      }
-    } else {  // Get from LmaNodeGE1 nodes
-      LmaNodeGE1* node_ge1 = node_to_ge1[node_pos];
-      num_of_homo = (size_t)node_ge1->num_of_homo;
-      for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
-        size_t ch_pos = lma_num + homo_pos;
-        size_t node_homo_off = get_homo_idx_buf_offset(node_ge1);
-        lma_buf[ch_pos].id = get_lemma_id(node_homo_off + homo_pos);
-        lma_buf[ch_pos].lma_len = splid_str_len;
-        lma_buf[ch_pos].psb =
-            static_cast<LmaScoreType>(ngram.get_uni_psb(lma_buf[ch_pos].id));
-
-        if (lma_num + homo_pos >= max_lma_buf - 1)
-          break;
-      }
-    }
-
-    lma_num += num_of_homo;
-    if (lma_num >= max_lma_buf) {
-      lma_num = max_lma_buf;
-      break;
-    }
-  }
-  return lma_num;
-}
-
-uint16 DictTrie::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
-                               uint16 str_max) {
-  return dict_list_->get_lemma_str(id_lemma, str_buf, str_max);
-}
-
-uint16 DictTrie::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
-                                  uint16 splids_max, bool arg_valid) {
-  char16 lma_str[kMaxLemmaSize + 1];
-  uint16 lma_len = get_lemma_str(id_lemma, lma_str, kMaxLemmaSize + 1);
-  assert((!arg_valid && splids_max >= lma_len) || lma_len == splids_max);
-
-  uint16 spl_mtrx[kMaxLemmaSize * 5];
-  uint16 spl_start[kMaxLemmaSize + 1];
-  spl_start[0] = 0;
-  uint16 try_num = 1;
-
-  for (uint16 pos = 0; pos < lma_len; pos++) {
-    uint16 cand_splids_this = 0;
-    if (arg_valid && spl_trie_->is_full_id(splids[pos])) {
-      spl_mtrx[spl_start[pos]] = splids[pos];
-      cand_splids_this = 1;
-    } else {
-      cand_splids_this = dict_list_->get_splids_for_hanzi(lma_str[pos],
-          arg_valid ? splids[pos] : 0, spl_mtrx + spl_start[pos],
-          kMaxLemmaSize * 5 - spl_start[pos]);
-      assert(cand_splids_this > 0);
-    }
-    spl_start[pos + 1] = spl_start[pos] + cand_splids_this;
-    try_num *= cand_splids_this;
-  }
-
-  for (uint16 try_pos = 0; try_pos < try_num; try_pos++) {
-    uint16 mod = 1;
-    for (uint16 pos = 0; pos < lma_len; pos++) {
-      uint16 radix = spl_start[pos + 1] - spl_start[pos];
-      splids[pos] = spl_mtrx[ spl_start[pos] + try_pos / mod % radix];
-      mod *= radix;
-    }
-
-    if (try_extend(splids, lma_len, id_lemma))
-      return lma_len;
-  }
-
-  return 0;
-}
-
-void DictTrie::set_total_lemma_count_of_others(size_t count) {
-  NGram& ngram = NGram::get_instance();
-  ngram.set_total_freq_none_sys(count);
-}
-
-void DictTrie::convert_to_hanzis(char16 *str, uint16 str_len) {
-  return dict_list_->convert_to_hanzis(str, str_len);
-}
-
-void DictTrie::convert_to_scis_ids(char16 *str, uint16 str_len) {
-  return dict_list_->convert_to_scis_ids(str, str_len);
-}
-
-LemmaIdType DictTrie::get_lemma_id(const char16 lemma_str[], uint16 lemma_len) {
-  if (NULL == lemma_str || lemma_len > kMaxLemmaSize)
-    return 0;
-
-  return dict_list_->get_lemma_id(lemma_str, lemma_len);
-}
-
-size_t DictTrie::predict_top_lmas(size_t his_len, NPredictItem *npre_items,
-                                  size_t npre_max, size_t b4_used) {
-  NGram &ngram = NGram::get_instance();
-
-  size_t item_num = 0;
-  size_t top_lmas_id_offset = lma_idx_buf_len_ / kLemmaIdSize - top_lmas_num_;
-  size_t top_lmas_pos = 0;
-  while (item_num < npre_max && top_lmas_pos < top_lmas_num_) {
-    memset(npre_items + item_num, 0, sizeof(NPredictItem));
-    LemmaIdType top_lma_id = get_lemma_id(top_lmas_id_offset + top_lmas_pos);
-    top_lmas_pos += 1;
-    if (dict_list_->get_lemma_str(top_lma_id,
-                                  npre_items[item_num].pre_hzs,
-                                  kMaxLemmaSize - 1) == 0) {
-      continue;
-    }
-    npre_items[item_num].psb = ngram.get_uni_psb(top_lma_id);
-    npre_items[item_num].his_len = his_len;
-    item_num++;
-  }
-  return item_num;
-}
-
-size_t DictTrie::predict(const char16 *last_hzs, uint16 hzs_len,
-                         NPredictItem *npre_items, size_t npre_max,
-                         size_t b4_used) {
-  return dict_list_->predict(last_hzs, hzs_len, npre_items, npre_max, b4_used);
-}
-}  // namespace ime_pinyin
diff --git a/dicttrie.h b/dicttrie.h
deleted file mode 100644
index 3819fe0..0000000
--- a/dicttrie.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTTRIE_H__
-#define PINYINIME_INCLUDE_DICTTRIE_H__
-
-#include <stdlib.h>
-#include "./atomdictbase.h"
-#include "./dictdef.h"
-#include "./dictlist.h"
-#include "./searchutility.h"
-
-namespace ime_pinyin {
-
-class DictTrie : AtomDictBase {
- private:
-  struct ParsingMark {
-    size_t node_offset:24;
-    size_t node_num:8;           // Number of nodes with this spelling id given
-                                 // by spl_id. If spl_id is a Shengmu, for nodes
-                                 // in the first layer of DictTrie, it equals to
-                                 // SpellingTrie::shm2full_num(); but for those
-                                 // nodes which are not in the first layer,
-                                 // node_num < SpellingTrie::shm2full_num().
-                                 // For a full spelling id, node_num = 1;
-  };
-
-  // Used to indicate an extended mile stone.
-  // An extended mile stone is used to mark a partial match in the dictionary
-  // trie to speed up further potential extending.
-  // For example, when the user inputs "w", a mile stone is created to mark the
-  // partial match status, so that when user inputs another char 'm', it will be
-  // faster to extend search space based on this mile stone.
-  //
-  // For partial match status of "wm", there can be more than one sub mile
-  // stone, for example, "wm" can be matched to "wanm", "wom", ..., etc, so
-  // there may be more one parsing mark used to mark these partial matchings.
-  // A mile stone records the starting position in the mark list and number of
-  // marks.
-  struct MileStone {
-    uint16 mark_start;
-    uint16 mark_num;
-  };
-
-  DictList* dict_list_;
-
-  const SpellingTrie *spl_trie_;
-
-  LmaNodeLE0* root_;        // Nodes for root and the first layer.
-  LmaNodeGE1* nodes_ge1_;   // Nodes for other layers.
-
-  // An quick index from spelling id to the LmaNodeLE0 node buffer, or
-  // to the root_ buffer.
-  // Index length:
-  // SpellingTrie::get_instance().get_spelling_num() + 1. The last one is used
-  // to get the end.
-  // All Shengmu ids are not indexed because they will be converted into
-  // corresponding full ids.
-  // So, given an id splid, the son is:
-  // root_[splid_le0_index_[splid - kFullSplIdStart]]
-  uint16 *splid_le0_index_;
-
-  size_t lma_node_num_le0_;
-  size_t lma_node_num_ge1_;
-
-  // The first part is for homophnies, and the last  top_lma_num_ items are
-  // lemmas with highest scores.
-  unsigned char *lma_idx_buf_;
-  size_t lma_idx_buf_len_;  // The total size of lma_idx_buf_ in byte.
-  size_t total_lma_num_;    // Total number of lemmas in this dictionary.
-  size_t top_lmas_num_;     // Number of lemma with highest scores.
-
-  // Parsing mark list used to mark the detailed extended statuses.
-  ParsingMark *parsing_marks_;
-  // The position for next available mark.
-  uint16 parsing_marks_pos_;
-
-  // Mile stone list used to mark the extended status.
-  MileStone *mile_stones_;
-  // The position for the next available mile stone. We use positions (except 0)
-  // as handles.
-  MileStoneHandle mile_stones_pos_;
-
-  // Get the offset of sons for a node.
-  inline size_t get_son_offset(const LmaNodeGE1 *node);
-
-  // Get the offset of homonious ids for a node.
-  inline size_t get_homo_idx_buf_offset(const LmaNodeGE1 *node);
-
-  // Get the lemma id by the offset.
-  inline LemmaIdType get_lemma_id(size_t id_offset);
-
-  void free_resource(bool free_dict_list);
-
-  bool load_dict(FILE *fp);
-
-  // Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill
-  // them into the lpi_items buffer.
-  // This function is called by the search engine.
-  size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
-                         LmaNodeLE0 *node);
-
-  // Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill
-  // them into the lpi_items buffer.
-  // This function is called by inner functions extend_dict0(), extend_dict1()
-  // and extend_dict2().
-  size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
-                         size_t homo_buf_off, LmaNodeGE1 *node,
-                         uint16 lma_len);
-
-  // Extend in the trie from level 0.
-  MileStoneHandle extend_dict0(MileStoneHandle from_handle,
-                               const DictExtPara *dep, LmaPsbItem *lpi_items,
-                               size_t lpi_max, size_t *lpi_num);
-
-  // Extend in the trie from level 1.
-  MileStoneHandle extend_dict1(MileStoneHandle from_handle,
-                               const DictExtPara *dep, LmaPsbItem *lpi_items,
-                               size_t lpi_max, size_t *lpi_num);
-
-  // Extend in the trie from level 2.
-  MileStoneHandle extend_dict2(MileStoneHandle from_handle,
-                               const DictExtPara *dep, LmaPsbItem *lpi_items,
-                               size_t lpi_max, size_t *lpi_num);
-
-  // Try to extend the given spelling id buffer, and if the given id_lemma can
-  // be successfully gotten, return true;
-  // The given spelling ids are all valid full ids.
-  bool try_extend(const uint16 *splids, uint16 splid_num, LemmaIdType id_lemma);
-
-#ifdef ___BUILD_MODEL___
-  bool save_dict(FILE *fp);
-#endif  // ___BUILD_MODEL___
-
-  static const int kMaxMileStone = 100;
-  static const int kMaxParsingMark = 600;
-  static const MileStoneHandle kFirstValidMileStoneHandle = 1;
-
-  friend class DictParser;
-  friend class DictBuilder;
-
- public:
-
-  DictTrie();
-  ~DictTrie();
-
-#ifdef ___BUILD_MODEL___
-  // Construct the tree from the file fn_raw.
-  // fn_validhzs provide the valid hanzi list. If fn_validhzs is
-  // NULL, only chars in GB2312 will be included.
-  bool build_dict(const char *fn_raw, const char *fn_validhzs);
-
-  // Save the binary dictionary
-  // Actually, the SpellingTrie/DictList instance will be also saved.
-  bool save_dict(const char *filename);
-#endif  // ___BUILD_MODEL___
-
-  void convert_to_hanzis(char16 *str, uint16 str_len);
-
-  void convert_to_scis_ids(char16 *str, uint16 str_len);
-
-  // Load a binary dictionary
-  // The SpellingTrie instance/DictList will be also loaded
-  bool load_dict(const char *filename, LemmaIdType start_id,
-                 LemmaIdType end_id);
-  bool load_dict_fd(int sys_fd, long start_offset, long length,
-                    LemmaIdType start_id, LemmaIdType end_id);
-  bool close_dict() {return true;}
-  size_t number_of_lemmas() {return 0;}
-
-  void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
-
-  MileStoneHandle extend_dict(MileStoneHandle from_handle,
-                              const DictExtPara *dep,
-                              LmaPsbItem *lpi_items,
-                              size_t lpi_max, size_t *lpi_num);
-
-  size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
-                  LmaPsbItem *lpi_items, size_t lpi_max);
-
-  uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
-
-  uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
-                          uint16 splids_max, bool arg_valid);
-
-  size_t predict(const char16 *last_hzs, uint16 hzs_len,
-                 NPredictItem *npre_items, size_t npre_max,
-                 size_t b4_used);
-
-  LemmaIdType put_lemma(char16 /*lemma_str*/[], uint16 /*splids*/[],
-                        uint16 /*lemma_len*/, uint16 /*count*/) {return 0;}
-
-  LemmaIdType update_lemma(LemmaIdType /*lemma_id*/, int16 /*delta_count*/,
-                           bool /*selected*/) {return 0;}
-
-  LemmaIdType get_lemma_id(char16 /*lemma_str*/[], uint16 /*splids*/[],
-                           uint16 /*lemma_len*/) {return 0;}
-
-  LmaScoreType get_lemma_score(LemmaIdType /*lemma_id*/) {return 0;}
-
-  LmaScoreType get_lemma_score(char16 /*lemma_str*/[], uint16 /*splids*/[],
-                        uint16 /*lemma_len*/) {return 0;}
-
-  bool remove_lemma(LemmaIdType /*lemma_id*/) {return false;}
-
-  size_t get_total_lemma_count() {return 0;}
-  void set_total_lemma_count_of_others(size_t count);
-
-  void flush_cache() {}
-
-  LemmaIdType get_lemma_id(const char16 lemma_str[], uint16 lemma_len);
-
-  // Fill the lemmas with highest scores to the prediction buffer.
-  // his_len is the history length to fill in the prediction buffer.
-  size_t predict_top_lmas(size_t his_len, NPredictItem *npre_items,
-                          size_t npre_max, size_t b4_used);
-};
-}
-
-#endif  // PINYINIME_INCLUDE_DICTTRIE_H__
diff --git a/lpicache.cpp b/lpicache.cpp
deleted file mode 100644
index ff5041d..0000000
--- a/lpicache.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include "./lpicache.h"
-
-namespace ime_pinyin {
-
-LpiCache* LpiCache::instance_ = NULL;
-
-LpiCache::LpiCache() {
-  lpi_cache_ = new LmaPsbItem[kFullSplIdStart * kMaxLpiCachePerId];
-  lpi_cache_len_ = new uint16[kFullSplIdStart];
-  assert(NULL != lpi_cache_);
-  assert(NULL != lpi_cache_len_);
-  for (uint16 id = 0; id < kFullSplIdStart; id++)
-    lpi_cache_len_[id] = 0;
-}
-
-LpiCache::~LpiCache() {
-  if (NULL != lpi_cache_)
-    delete [] lpi_cache_;
-
-  if (NULL != lpi_cache_len_)
-    delete [] lpi_cache_len_;
-}
-
-LpiCache& LpiCache::get_instance() {
-  if (NULL == instance_) {
-    instance_ = new LpiCache();
-    assert(NULL != instance_);
-  }
-  return *instance_;
-}
-
-bool LpiCache::is_cached(uint16 splid) {
-  if (splid >= kFullSplIdStart)
-    return false;
-  return lpi_cache_len_[splid] != 0;
-}
-
-size_t LpiCache::put_cache(uint16 splid, LmaPsbItem lpi_items[],
-                           size_t lpi_num) {
-  uint16 num = kMaxLpiCachePerId;
-  if (num > lpi_num)
-    num = static_cast<uint16>(lpi_num);
-
-  LmaPsbItem *lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId;
-  for (uint16 pos = 0; pos < num; pos++)
-    lpi_cache_this[pos] = lpi_items[pos];
-
-  lpi_cache_len_[splid] = num;
-  return num;
-}
-
-size_t LpiCache::get_cache(uint16 splid, LmaPsbItem lpi_items[],
-                           size_t lpi_max) {
-  if (lpi_max > lpi_cache_len_[splid])
-    lpi_max = lpi_cache_len_[splid];
-
-  LmaPsbItem *lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId;
-  for (uint16 pos = 0; pos < lpi_max; pos++) {
-    lpi_items[pos] = lpi_cache_this[pos];
-  }
-  return lpi_max;
-}
-
-}  // namespace ime_pinyin
diff --git a/lpicache.h b/lpicache.h
deleted file mode 100644
index 6073597..0000000
--- a/lpicache.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
-#define PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
-
-#include <stdlib.h>
-#include "./searchutility.h"
-#include "./spellingtrie.h"
-
-namespace ime_pinyin {
-
-// Used to cache LmaPsbItem list for half spelling ids.
-class LpiCache {
- private:
-  static LpiCache *instance_;
-  static const int kMaxLpiCachePerId = 15;
-
-  LmaPsbItem *lpi_cache_;
-  uint16 *lpi_cache_len_;
-
- public:
-  LpiCache();
-  ~LpiCache();
-
-  static LpiCache& get_instance();
-
-  // Test if the LPI list of the given splid  has been cached.
-  // If splid is a full spelling id, it returns false, because we only cache
-  // list for half ids.
-  bool is_cached(uint16 splid);
-
-  // Put LPI list to cahce. If the length of the list, lpi_num, is longer than
-  // the cache buffer. the list will be truncated, and function returns the
-  // maximum length of the cache buffer.
-  // Note: splid must be a half id, and lpi_items must be not NULL. The
-  // caller of this function should guarantee this.
-  size_t put_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_num);
-
-  // Get the cached list for the given half id.
-  // Return the length of the cached buffer.
-  // Note: splid must be a half id, and lpi_items must be not NULL. The
-  // caller of this function should guarantee this.
-  size_t get_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_max);
-};
-
-}  // namespace
-
-#endif  // PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
diff --git a/matrixsearch.cpp b/matrixsearch.cpp
deleted file mode 100644
index 7d72372..0000000
--- a/matrixsearch.cpp
+++ /dev/null
@@ -1,1958 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-#include "./lpicache.h"
-#include "./matrixsearch.h"
-#include "./mystdlib.h"
-#include "./ngram.h"
-#include "./userdict.h"
-
-namespace ime_pinyin {
-
-#define PRUMING_SCORE 8000.0
-
-MatrixSearch::MatrixSearch() {
-  inited_ = false;
-  spl_trie_ = SpellingTrie::get_cpinstance();
-
-  reset_pointers_to_null();
-
-  pys_decoded_len_ = 0;
-  mtrx_nd_pool_used_ = 0;
-  dmi_pool_used_ = 0;
-  xi_an_enabled_ = false;
-  dmi_c_phrase_ = false;
-
-  assert(kMaxSearchSteps > 0);
-  max_sps_len_ = kMaxSearchSteps - 1;
-  max_hzs_len_ = kMaxSearchSteps;
-}
-
-MatrixSearch::~MatrixSearch() {
-  free_resource();
-}
-
-void MatrixSearch::reset_pointers_to_null() {
-  dict_trie_ = NULL;
-  user_dict_ = NULL;
-  spl_parser_ = NULL;
-
-  share_buf_ = NULL;
-
-  // The following four buffers are used for decoding, and they are based on
-  // share_buf_, no need to delete them.
-  mtrx_nd_pool_ = NULL;
-  dmi_pool_ = NULL;
-  matrix_ = NULL;
-  dep_ = NULL;
-
-  // Based on share_buf_, no need to delete them.
-  npre_items_ = NULL;
-}
-
-bool MatrixSearch::alloc_resource() {
-  free_resource();
-
-  dict_trie_ = new DictTrie();
-  user_dict_ = static_cast<AtomDictBase*>(new UserDict());
-  spl_parser_ = new SpellingParser();
-
-  size_t mtrx_nd_size = sizeof(MatrixNode) * kMtrxNdPoolSize;
-  mtrx_nd_size = align_to_size_t(mtrx_nd_size) / sizeof(size_t);
-  size_t dmi_size = sizeof(DictMatchInfo) * kDmiPoolSize;
-  dmi_size = align_to_size_t(dmi_size) / sizeof(size_t);
-  size_t matrix_size = sizeof(MatrixRow) * kMaxRowNum;
-  matrix_size = align_to_size_t(matrix_size) / sizeof(size_t);
-  size_t dep_size = sizeof(DictExtPara);
-  dep_size = align_to_size_t(dep_size) / sizeof(size_t);
-
-  // share_buf's size is determined by the buffers for search.
-  share_buf_ = new size_t[mtrx_nd_size + dmi_size + matrix_size + dep_size];
-
-  if (NULL == dict_trie_ || NULL == user_dict_ || NULL == spl_parser_ ||
-      NULL == share_buf_)
-    return false;
-
-  // The buffers for search are based on the share buffer
-  mtrx_nd_pool_ = reinterpret_cast<MatrixNode*>(share_buf_);
-  dmi_pool_ = reinterpret_cast<DictMatchInfo*>(share_buf_ + mtrx_nd_size);
-  matrix_ = reinterpret_cast<MatrixRow*>(share_buf_ + mtrx_nd_size + dmi_size);
-  dep_ = reinterpret_cast<DictExtPara*>
-      (share_buf_ + mtrx_nd_size + dmi_size + matrix_size);
-
-  // The prediction buffer is also based on the share buffer.
-  npre_items_ = reinterpret_cast<NPredictItem*>(share_buf_);
-  npre_items_len_ = (mtrx_nd_size + dmi_size + matrix_size + dep_size) *
-      sizeof(size_t) / sizeof(NPredictItem);
-  return true;
-}
-
-void MatrixSearch::free_resource() {
-  if (NULL != dict_trie_)
-    delete dict_trie_;
-
-  if (NULL != user_dict_)
-    delete user_dict_;
-
-  if (NULL != spl_parser_)
-    delete spl_parser_;
-
-  if (NULL != share_buf_)
-    delete [] share_buf_;
-
-  reset_pointers_to_null();
-}
-
-bool MatrixSearch::init(const char *fn_sys_dict, const char *fn_usr_dict) {
-  if (NULL == fn_sys_dict || NULL == fn_usr_dict)
-    return false;
-
-  if (!alloc_resource())
-    return false;
-
-  if (!dict_trie_->load_dict(fn_sys_dict, 1, kSysDictIdEnd))
-    return false;
-
-  // If engine fails to load the user dictionary, reset the user dictionary
-  // to NULL.
-  if (!user_dict_->load_dict(fn_usr_dict, kUserDictIdStart, kUserDictIdEnd)) {
-    delete user_dict_;
-    user_dict_ = NULL;
-  } else{
-    user_dict_->set_total_lemma_count_of_others(NGram::kSysDictTotalFreq);
-  }
-
-  reset_search0();
-
-  inited_ = true;
-  return true;
-}
-
-bool MatrixSearch::init_fd(int sys_fd, long start_offset, long length,
-                           const char *fn_usr_dict) {
-  if (NULL == fn_usr_dict)
-    return false;
-
-  if (!alloc_resource())
-    return false;
-
-  if (!dict_trie_->load_dict_fd(sys_fd, start_offset, length, 1, kSysDictIdEnd))
-    return false;
-
-  if (!user_dict_->load_dict(fn_usr_dict, kUserDictIdStart, kUserDictIdEnd)) {
-    delete user_dict_;
-    user_dict_ = NULL;
-  } else {
-    user_dict_->set_total_lemma_count_of_others(NGram::kSysDictTotalFreq);
-  }
-
-  reset_search0();
-
-  inited_ = true;
-  return true;
-}
-
-void MatrixSearch::set_max_lens(size_t max_sps_len, size_t max_hzs_len) {
-  if (0 != max_sps_len)
-    max_sps_len_ = max_sps_len;
-  if (0 != max_hzs_len)
-    max_hzs_len_ = max_hzs_len;
-}
-
-void MatrixSearch::close() {
-  flush_cache();
-  free_resource();
-  inited_ = false;
-}
-
-void MatrixSearch::flush_cache() {
-  if (NULL != user_dict_)
-    user_dict_->flush_cache();
-}
-
-void MatrixSearch::set_xi_an_switch(bool xi_an_enabled) {
-  xi_an_enabled_ = xi_an_enabled;
-}
-
-bool MatrixSearch::get_xi_an_switch() {
-  return xi_an_enabled_;
-}
-
-bool MatrixSearch::reset_search() {
-  if (!inited_)
-    return false;
-  return reset_search0();
-}
-
-bool MatrixSearch::reset_search0() {
-    if (!inited_)
-        return false;
-
-    pys_decoded_len_ = 0;
-    mtrx_nd_pool_used_ = 0;
-    dmi_pool_used_ = 0;
-
-    // Get a MatrixNode from the pool
-    matrix_[0].mtrx_nd_pos = mtrx_nd_pool_used_;
-    matrix_[0].mtrx_nd_num = 1;
-    mtrx_nd_pool_used_ += 1;
-
-    // Update the node, and make it to be a starting node
-    MatrixNode *node = mtrx_nd_pool_ + matrix_[0].mtrx_nd_pos;
-    node->id = 0;
-    node->score = 0;
-    node->from = NULL;
-    node->step = 0;
-    node->dmi_fr = (PoolPosType)-1;
-
-    matrix_[0].dmi_pos = 0;
-    matrix_[0].dmi_num = 0;
-    matrix_[0].dmi_has_full_id = 1;
-    matrix_[0].mtrx_nd_fixed = node;
-
-    lma_start_[0] = 0;
-    fixed_lmas_ = 0;
-    spl_start_[0] = 0;
-    fixed_hzs_ = 0;
-
-    dict_trie_->reset_milestones(0, 0);
-    if (NULL != user_dict_)
-      user_dict_->reset_milestones(0, 0);
-
-    return true;
-}
-
-bool MatrixSearch::reset_search(size_t ch_pos, bool clear_fixed_this_step,
-                                bool clear_dmi_this_step,
-                                bool clear_mtrx_this_step) {
-  if (!inited_ || ch_pos > pys_decoded_len_ || ch_pos >= kMaxRowNum)
-    return false;
-
-  if (0 == ch_pos) {
-    reset_search0();
-  } else {
-    // Prepare mile stones of this step to clear.
-    MileStoneHandle *dict_handles_to_clear = NULL;
-    if (clear_dmi_this_step && matrix_[ch_pos].dmi_num > 0) {
-      dict_handles_to_clear = dmi_pool_[matrix_[ch_pos].dmi_pos].dict_handles;
-    }
-
-    // If there are more steps, and this step is not allowed to clear, find
-    // milestones of next step.
-    if (pys_decoded_len_ > ch_pos && !clear_dmi_this_step) {
-      dict_handles_to_clear = NULL;
-      if (matrix_[ch_pos + 1].dmi_num > 0) {
-        dict_handles_to_clear =
-            dmi_pool_[matrix_[ch_pos + 1].dmi_pos].dict_handles;
-      }
-    }
-
-    if (NULL != dict_handles_to_clear) {
-      dict_trie_->reset_milestones(ch_pos, dict_handles_to_clear[0]);
-      if (NULL != user_dict_)
-        user_dict_->reset_milestones(ch_pos, dict_handles_to_clear[1]);
-    }
-
-    pys_decoded_len_ = ch_pos;
-
-    if (clear_dmi_this_step) {
-      dmi_pool_used_ = matrix_[ch_pos - 1].dmi_pos
-                       + matrix_[ch_pos - 1].dmi_num;
-      matrix_[ch_pos].dmi_num = 0;
-    } else {
-      dmi_pool_used_ = matrix_[ch_pos].dmi_pos + matrix_[ch_pos].dmi_num;
-    }
-
-    if (clear_mtrx_this_step) {
-      mtrx_nd_pool_used_ = matrix_[ch_pos - 1].mtrx_nd_pos
-                           + matrix_[ch_pos - 1].mtrx_nd_num;
-      matrix_[ch_pos].mtrx_nd_num = 0;
-    } else {
-      mtrx_nd_pool_used_ = matrix_[ch_pos].mtrx_nd_pos
-                           + matrix_[ch_pos].mtrx_nd_num;
-    }
-
-    // Modify fixed_hzs_
-    if (fixed_hzs_ > 0 &&
-        ((kLemmaIdComposing != lma_id_[0]) ||
-         (kLemmaIdComposing == lma_id_[0] &&
-          spl_start_[c_phrase_.length] <= ch_pos))) {
-      size_t fixed_ch_pos = ch_pos;
-      if (clear_fixed_this_step)
-        fixed_ch_pos = fixed_ch_pos > 0 ? fixed_ch_pos - 1 : 0;
-      while (NULL == matrix_[fixed_ch_pos].mtrx_nd_fixed && fixed_ch_pos > 0)
-        fixed_ch_pos--;
-
-      fixed_lmas_ = 0;
-      fixed_hzs_ = 0;
-      if (fixed_ch_pos > 0) {
-        while (spl_start_[fixed_hzs_] < fixed_ch_pos)
-          fixed_hzs_++;
-        assert(spl_start_[fixed_hzs_] == fixed_ch_pos);
-
-        while (lma_start_[fixed_lmas_] < fixed_hzs_)
-          fixed_lmas_++;
-        assert(lma_start_[fixed_lmas_] == fixed_hzs_);
-      }
-
-      // Re-search the Pinyin string for the unlocked lemma
-      // which was previously fixed.
-      //
-      // Prepare mile stones of this step to clear.
-      MileStoneHandle *dict_handles_to_clear = NULL;
-      if (clear_dmi_this_step && ch_pos == fixed_ch_pos &&
-          matrix_[fixed_ch_pos].dmi_num > 0) {
-        dict_handles_to_clear = dmi_pool_[matrix_[fixed_ch_pos].dmi_pos].dict_handles;
-      }
-
-      // If there are more steps, and this step is not allowed to clear, find
-      // milestones of next step.
-      if (pys_decoded_len_ > fixed_ch_pos && !clear_dmi_this_step) {
-        dict_handles_to_clear = NULL;
-        if (matrix_[fixed_ch_pos + 1].dmi_num > 0) {
-          dict_handles_to_clear =
-              dmi_pool_[matrix_[fixed_ch_pos + 1].dmi_pos].dict_handles;
-        }
-      }
-
-      if (NULL != dict_handles_to_clear) {
-        dict_trie_->reset_milestones(fixed_ch_pos, dict_handles_to_clear[0]);
-        if (NULL != user_dict_)
-          user_dict_->reset_milestones(fixed_ch_pos, dict_handles_to_clear[1]);
-      }
-
-
-      pys_decoded_len_ = fixed_ch_pos;
-
-      if (clear_dmi_this_step && ch_pos == fixed_ch_pos) {
-        dmi_pool_used_ = matrix_[fixed_ch_pos - 1].dmi_pos
-                         + matrix_[fixed_ch_pos - 1].dmi_num;
-        matrix_[fixed_ch_pos].dmi_num = 0;
-      } else {
-        dmi_pool_used_ = matrix_[fixed_ch_pos].dmi_pos +
-            matrix_[fixed_ch_pos].dmi_num;
-      }
-
-      if (clear_mtrx_this_step && ch_pos == fixed_ch_pos) {
-        mtrx_nd_pool_used_ = matrix_[fixed_ch_pos - 1].mtrx_nd_pos
-                             + matrix_[fixed_ch_pos - 1].mtrx_nd_num;
-        matrix_[fixed_ch_pos].mtrx_nd_num = 0;
-      } else {
-        mtrx_nd_pool_used_ = matrix_[fixed_ch_pos].mtrx_nd_pos
-                             + matrix_[fixed_ch_pos].mtrx_nd_num;
-      }
-
-      for (uint16 re_pos = fixed_ch_pos; re_pos < ch_pos; re_pos++) {
-        add_char(pys_[re_pos]);
-      }
-    } else if (fixed_hzs_ > 0 && kLemmaIdComposing == lma_id_[0]) {
-      for (uint16 subpos = 0; subpos < c_phrase_.sublma_num; subpos++) {
-        uint16 splpos_begin = c_phrase_.sublma_start[subpos];
-        uint16 splpos_end = c_phrase_.sublma_start[subpos + 1];
-        for (uint16 splpos = splpos_begin; splpos < splpos_end; splpos++) {
-          // If ch_pos is in this spelling
-          uint16 spl_start = c_phrase_.spl_start[splpos];
-          uint16 spl_end = c_phrase_.spl_start[splpos + 1];
-          if (ch_pos >= spl_start && ch_pos < spl_end) {
-            // Clear everything after this position
-            c_phrase_.chn_str[splpos] = static_cast<char16>('\0');
-            c_phrase_.sublma_start[subpos + 1] = splpos;
-            c_phrase_.sublma_num = subpos + 1;
-            c_phrase_.length = splpos;
-
-            if (splpos == splpos_begin) {
-              c_phrase_.sublma_num = subpos;
-            }
-          }
-        }
-      }
-
-      // Extend the composing phrase.
-      reset_search0();
-      dmi_c_phrase_ = true;
-      uint16 c_py_pos = 0;
-      while (c_py_pos < spl_start_[c_phrase_.length]) {
-        bool b_ac_tmp = add_char(pys_[c_py_pos]);
-        assert(b_ac_tmp);
-        c_py_pos++;
-      }
-      dmi_c_phrase_ = false;
-
-      lma_id_num_ = 1;
-      fixed_lmas_ = 1;
-      fixed_lmas_no1_[0] = 0;  // A composing string is always modified.
-      fixed_hzs_ = c_phrase_.length;
-      lma_start_[1] = fixed_hzs_;
-      lma_id_[0] = kLemmaIdComposing;
-      matrix_[spl_start_[fixed_hzs_]].mtrx_nd_fixed = mtrx_nd_pool_ +
-          matrix_[spl_start_[fixed_hzs_]].mtrx_nd_pos;
-    }
-  }
-
-  return true;
-}
-
-void MatrixSearch::del_in_pys(size_t start, size_t len) {
-  while (start < kMaxRowNum - len && '\0' != pys_[start]) {
-    pys_[start] = pys_[start + len];
-    start++;
-  }
-}
-
-size_t MatrixSearch::search(const char *py, size_t py_len) {
-  if (!inited_ || NULL == py)
-    return 0;
-
-  // If the search Pinyin string is too long, it will be truncated.
-  if (py_len > kMaxRowNum - 1)
-    py_len = kMaxRowNum - 1;
-
-  // Compare the new string with the previous one. Find their prefix to
-  // increase search efficiency.
-  size_t ch_pos = 0;
-  for (ch_pos = 0; ch_pos < pys_decoded_len_; ch_pos++) {
-    if ('\0' == py[ch_pos] || py[ch_pos] != pys_[ch_pos])
-      break;
-  }
-
-  bool clear_fix = true;
-  if (ch_pos == pys_decoded_len_)
-    clear_fix = false;
-
-  reset_search(ch_pos, clear_fix, false, false);
-
-  memcpy(pys_ + ch_pos, py + ch_pos, py_len - ch_pos);
-  pys_[py_len] = '\0';
-
-  while ('\0' != pys_[ch_pos]) {
-    if (!add_char(py[ch_pos])) {
-      pys_decoded_len_ = ch_pos;
-      break;
-    }
-    ch_pos++;
-  }
-
-  // Get spelling ids and starting positions.
-  get_spl_start_id();
-
-  // If there are too many spellings, remove the last letter until the spelling
-  // number is acceptable.
-  while (spl_id_num_ > 9) {
-    py_len--;
-    reset_search(py_len, false, false, false);
-    pys_[py_len] = '\0';
-    get_spl_start_id();
-  }
-
-  prepare_candidates();
-
-  if (kPrintDebug0) {
-    printf("--Matrix Node Pool Used: %d\n", mtrx_nd_pool_used_);
-    printf("--DMI Pool Used: %d\n", dmi_pool_used_);
-
-    if (kPrintDebug1) {
-      for (PoolPosType pos = 0; pos < dmi_pool_used_; pos++) {
-        debug_print_dmi(pos, 1);
-      }
-    }
-  }
-
-  return ch_pos;
-}
-
-size_t MatrixSearch::delsearch(size_t pos, bool is_pos_in_splid,
-                               bool clear_fixed_this_step) {
-  if (!inited_)
-    return 0;
-
-  size_t reset_pos = pos;
-
-  // Out of range for both Pinyin mode and Spelling id mode.
-  if (pys_decoded_len_ <= pos) {
-    del_in_pys(pos, 1);
-
-    reset_pos = pys_decoded_len_;
-    // Decode the string after the un-decoded position
-    while ('\0' != pys_[reset_pos]) {
-      if (!add_char(pys_[reset_pos])) {
-        pys_decoded_len_ = reset_pos;
-        break;
-      }
-      reset_pos++;
-    }
-    get_spl_start_id();
-    prepare_candidates();
-    return pys_decoded_len_;
-  }
-
-  // Spelling id mode, but out of range.
-  if (is_pos_in_splid && spl_id_num_ <= pos)
-    return pys_decoded_len_;
-
-  // Begin to handle two modes respectively.
-  // Pinyin mode by default
-  size_t c_py_len = 0;  // The length of composing phrase's Pinyin
-  size_t del_py_len = 1;
-  if (!is_pos_in_splid) {
-    // Pinyin mode is only allowed to delete beyond the fixed lemmas.
-    if (fixed_lmas_ > 0 && pos < spl_start_[lma_start_[fixed_lmas_]])
-      return pys_decoded_len_;
-
-    del_in_pys(pos, 1);
-
-    // If the deleted character is just the one after the last fixed lemma
-    if (pos == spl_start_[lma_start_[fixed_lmas_]]) {
-      // If all fixed lemmas have been merged, and the caller of the function
-      // request to unlock the last fixed lemma.
-      if (kLemmaIdComposing == lma_id_[0] && clear_fixed_this_step) {
-        // Unlock the last sub lemma in the composing phrase. Because it is not
-        // easy to unlock it directly. Instead, we re-decode the modified
-        // composing phrase.
-        c_phrase_.sublma_num--;
-        c_phrase_.length = c_phrase_.sublma_start[c_phrase_.sublma_num];
-        reset_pos = spl_start_[c_phrase_.length];
-        c_py_len = reset_pos;
-      }
-    }
-  } else {
-    del_py_len = spl_start_[pos + 1] - spl_start_[pos];
-
-    del_in_pys(spl_start_[pos], del_py_len);
-
-    if (pos >= lma_start_[fixed_lmas_]) {
-      c_py_len = 0;
-      reset_pos = spl_start_[pos + 1] - del_py_len;
-    } else {
-      c_py_len = spl_start_[lma_start_[fixed_lmas_]] - del_py_len;
-      reset_pos = c_py_len;
-      if (c_py_len > 0)
-        merge_fixed_lmas(pos);
-    }
-  }
-
-  if (c_py_len > 0) {
-    assert(c_phrase_.length > 0 && c_py_len ==
-        c_phrase_.spl_start[c_phrase_.sublma_start[c_phrase_.sublma_num]]);
-    // The composing phrase is valid, reset all search space,
-    // and begin a new search which will only extend the composing
-    // phrase.
-    reset_search0();
-
-    dmi_c_phrase_ = true;
-    // Extend the composing phrase.
-    uint16 c_py_pos = 0;
-    while (c_py_pos < c_py_len) {
-      bool b_ac_tmp = add_char(pys_[c_py_pos]);
-      assert(b_ac_tmp);
-      c_py_pos++;
-    }
-    dmi_c_phrase_ = false;
-
-    // Fixd the composing phrase as the first choice.
-    lma_id_num_ = 1;
-    fixed_lmas_ = 1;
-    fixed_lmas_no1_[0] = 0;  // A composing string is always modified.
-    fixed_hzs_ = c_phrase_.length;
-    lma_start_[1] = fixed_hzs_;
-    lma_id_[0] = kLemmaIdComposing;
-    matrix_[spl_start_[fixed_hzs_]].mtrx_nd_fixed = mtrx_nd_pool_ +
-        matrix_[spl_start_[fixed_hzs_]].mtrx_nd_pos;
-  } else {
-    // Reseting search only clear pys_decoded_len_, but the string is kept.
-    reset_search(reset_pos, clear_fixed_this_step, false, false);
-  }
-
-  // Decode the string after the delete position.
-  while ('\0' != pys_[reset_pos]) {
-    if (!add_char(pys_[reset_pos])) {
-      pys_decoded_len_ = reset_pos;
-      break;
-    }
-    reset_pos++;
-  }
-
-  get_spl_start_id();
-  prepare_candidates();
-  return pys_decoded_len_;
-}
-
-size_t MatrixSearch::get_candidate_num() {
-  if (!inited_ || 0 == pys_decoded_len_ ||
-      0 == matrix_[pys_decoded_len_].mtrx_nd_num)
-    return 0;
-
-  return 1 + lpi_total_;
-}
-
-char16* MatrixSearch::get_candidate(size_t cand_id, char16 *cand_str,
-                                    size_t max_len) {
-  if (!inited_ || 0 == pys_decoded_len_ || NULL == cand_str)
-    return NULL;
-
-  if (0 == cand_id) {
-    return get_candidate0(cand_str, max_len, NULL, false);
-  } else {
-    cand_id--;
-  }
-
-  // For this case: the current sentence is a word only, and the user fixed it,
-  // so the result will be fixed to the sentence space, and
-  // lpi_total_ will be set to 0.
-  if (0 == lpi_total_) {
-    return get_candidate0(cand_str, max_len, NULL, false);
-  }
-
-  LemmaIdType id = lpi_items_[cand_id].id;
-  char16 s[kMaxLemmaSize + 1];
-
-  uint16 s_len = lpi_items_[cand_id].lma_len;
-  if (s_len > 1) {
-    s_len = get_lemma_str(id, s, kMaxLemmaSize + 1);
-  } else {
-    // For a single character, Hanzi is ready.
-    s[0] = lpi_items_[cand_id].hanzi;
-    s[1] = static_cast<char16>(0);
-  }
-
-  if (s_len > 0 &&  max_len > s_len) {
-    utf16_strncpy(cand_str, s, s_len);
-    cand_str[s_len] = (char16)'\0';
-    return cand_str;
-  }
-
-  return NULL;
-}
-
-void MatrixSearch::update_dict_freq() {
-  if (NULL != user_dict_) {
-    // Update the total frequency of all lemmas, including system lemmas and
-    // user dictionary lemmas.
-    size_t total_freq = user_dict_->get_total_lemma_count();
-    dict_trie_->set_total_lemma_count_of_others(total_freq);
-  }
-}
-
-bool MatrixSearch::add_lma_to_userdict(uint16 lma_fr, uint16 lma_to,
-                                       float score) {
-  if (lma_to - lma_fr <= 1 || NULL == user_dict_)
-    return false;
-
-  char16 word_str[kMaxLemmaSize + 1];
-  uint16 spl_ids[kMaxLemmaSize];
-
-  uint16 spl_id_fr = 0;
-
-  for (uint16 pos = lma_fr; pos < lma_to; pos++) {
-    LemmaIdType lma_id = lma_id_[pos];
-    if (is_user_lemma(lma_id)) {
-      user_dict_->update_lemma(lma_id, 1, true);
-    }
-    uint16 lma_len = lma_start_[pos + 1] - lma_start_[pos];
-    utf16_strncpy(spl_ids + spl_id_fr, spl_id_ + lma_start_[pos], lma_len);
-
-    uint16 tmp = get_lemma_str(lma_id, word_str + spl_id_fr,
-                               kMaxLemmaSize + 1 - spl_id_fr);
-    assert(tmp == lma_len);
-
-    tmp = get_lemma_splids(lma_id, spl_ids + spl_id_fr, lma_len, true);
-    if (tmp != lma_len) {
-      return false;
-    }
-
-    spl_id_fr += lma_len;
-  }
-
-  assert(spl_id_fr <= kMaxLemmaSize);
-
-  return user_dict_->put_lemma(static_cast<char16*>(word_str), spl_ids,
-                                 spl_id_fr, 1);
-}
-
-void MatrixSearch::debug_print_dmi(PoolPosType dmi_pos, uint16 nest_level) {
-  if (dmi_pos >= dmi_pool_used_) return;
-
-  DictMatchInfo *dmi = dmi_pool_ + dmi_pos;
-
-  if (1 == nest_level) {
-    printf("-----------------%d\'th DMI node begin----------->\n", dmi_pos);
-  }
-  if (dmi->dict_level > 1) {
-    debug_print_dmi(dmi->dmi_fr, nest_level + 1);
-  }
-  printf("---%d\n", dmi->dict_level);
-  printf(" MileStone: %x, %x\n", dmi->dict_handles[0], dmi->dict_handles[1]);
-  printf(" Spelling : %s, %d\n", SpellingTrie::get_instance().
-         get_spelling_str(dmi->spl_id), dmi->spl_id);
-  printf(" Total Pinyin Len: %d\n", dmi->splstr_len);
-  if (1 == nest_level) {
-    printf("<----------------%d\'th DMI node end--------------\n\n", dmi_pos);
-  }
-}
-
-bool MatrixSearch::try_add_cand0_to_userdict() {
-  size_t new_cand_num = get_candidate_num();
-  if (fixed_hzs_ > 0 && 1 == new_cand_num) {
-    float score_from = 0;
-    uint16 lma_id_from = 0;
-    uint16 pos = 0;
-    bool modified = false;
-    while (pos < fixed_lmas_) {
-      if (lma_start_[pos + 1] - lma_start_[lma_id_from] >
-          static_cast<uint16>(kMaxLemmaSize)) {
-        float score_to_add =
-            mtrx_nd_pool_[matrix_[spl_start_[lma_start_[pos]]]
-            .mtrx_nd_pos].score - score_from;
-        if (modified) {
-          score_to_add += 1.0;
-          if (score_to_add > NGram::kMaxScore) {
-            score_to_add = NGram::kMaxScore;
-          }
-          add_lma_to_userdict(lma_id_from, pos, score_to_add);
-        }
-        lma_id_from = pos;
-        score_from += score_to_add;
-
-        // Clear the flag for next user lemma.
-        modified = false;
-      }
-
-      if (0 == fixed_lmas_no1_[pos]) {
-        modified = true;
-      }
-      pos++;
-    }
-
-    // Single-char word is not allowed to add to userdict.
-    if (lma_start_[pos] - lma_start_[lma_id_from] > 1) {
-      float score_to_add =
-          mtrx_nd_pool_[matrix_[spl_start_[lma_start_[pos]]]
-          .mtrx_nd_pos].score - score_from;
-      if (modified) {
-        score_to_add += 1.0;
-        if (score_to_add > NGram::kMaxScore) {
-          score_to_add = NGram::kMaxScore;
-        }
-        add_lma_to_userdict(lma_id_from, pos, score_to_add);
-      }
-    }
-  }
-  return true;
-}
-
-// Choose a candidate, and give new candidates for next step.
-// If user finishes selection, we will try to communicate with user dictionary
-// to add new items or update score of some existing items.
-//
-// Basic rule:
-// 1. If user selects the first choice:
-//    1.1. If the first choice is not a sentence, instead, it is a lemma:
-//         1.1.1. If the first choice is a user lemma, notify the user
-//                dictionary that a user lemma is hit, and add occuring count
-//                by 1.
-//         1.1.2. If the first choice is a system lemma, do nothing.
-//    1.2. If the first choice is a sentence containing more than one lemma:
-//         1.2.1. The whole sentence will be added as a user lemma. If the
-//                sentence contains user lemmas, -> hit, and add occuring count
-//                by 1.
-size_t MatrixSearch::choose(size_t cand_id) {
-  if (!inited_ || 0 == pys_decoded_len_)
-    return 0;
-
-  if (0 == cand_id) {
-    fixed_hzs_ = spl_id_num_;
-    matrix_[spl_start_[fixed_hzs_]].mtrx_nd_fixed = mtrx_nd_pool_ +
-        matrix_[spl_start_[fixed_hzs_]].mtrx_nd_pos;
-    for (size_t pos = fixed_lmas_; pos < lma_id_num_; pos++) {
-      fixed_lmas_no1_[pos] = 1;
-    }
-    fixed_lmas_ = lma_id_num_;
-    lpi_total_ = 0;  // Clean all other candidates.
-
-    // 1. It is the first choice
-    if (1 == lma_id_num_) {
-      // 1.1. The first choice is not a sentence but a lemma
-      if (is_user_lemma(lma_id_[0])) {
-        // 1.1.1. The first choice is a user lemma, notify the user dictionary
-        // that it is hit.
-        if (NULL != user_dict_)
-          user_dict_->update_lemma(lma_id_[0], 1, true);
-      } else {
-        // 1.1.2. do thing for a system lemma.
-      }
-    } else {
-      // 1.2. The first choice is a sentence.
-      // 1.2.1 Try to add the whole sentence to user dictionary, the whole
-      // sentence may be splitted into many items.
-      if (NULL != user_dict_) {
-        try_add_cand0_to_userdict();
-      }
-    }
-    update_dict_freq();
-    return 1;
-  } else {
-    cand_id--;
-  }
-
-  // 2. It is not the full sentence candidate.
-  // Find the length of the candidate.
-  LemmaIdType id_chosen = lpi_items_[cand_id].id;
-  LmaScoreType score_chosen = lpi_items_[cand_id].psb;
-  size_t cand_len = lpi_items_[cand_id].lma_len;
-
-  assert(cand_len > 0);
-
-  // Notify the atom dictionary that this item is hit.
-  if (is_user_lemma(id_chosen)) {
-    if (NULL != user_dict_) {
-      user_dict_->update_lemma(id_chosen, 1, true);
-    }
-    update_dict_freq();
-  }
-
-  // 3. Fixed the chosen item.
-  // 3.1 Get the steps number.
-  size_t step_fr = spl_start_[fixed_hzs_];
-  size_t step_to = spl_start_[fixed_hzs_ + cand_len];
-
-  // 3.2 Save the length of the original string.
-  size_t pys_decoded_len = pys_decoded_len_;
-
-  // 3.2 Reset the space of the fixed part.
-  reset_search(step_to, false, false, true);
-
-  // 3.3 For the last character of the fixed part, the previous DMI
-  // information will be kept, while the MTRX information will be re-extended,
-  // and only one node will be extended.
-  matrix_[step_to].mtrx_nd_num = 0;
-
-  LmaPsbItem lpi_item;
-  lpi_item.psb = score_chosen;
-  lpi_item.id = id_chosen;
-
-  PoolPosType step_to_dmi_fr = match_dmi(step_to,
-                                         spl_id_ + fixed_hzs_, cand_len);
-  assert(step_to_dmi_fr != static_cast<PoolPosType>(-1));
-
-  extend_mtrx_nd(matrix_[step_fr].mtrx_nd_fixed, &lpi_item, 1,
-                 step_to_dmi_fr, step_to);
-
-  matrix_[step_to].mtrx_nd_fixed = mtrx_nd_pool_ + matrix_[step_to].mtrx_nd_pos;
-  mtrx_nd_pool_used_ = matrix_[step_to].mtrx_nd_pos +
-                       matrix_[step_to].mtrx_nd_num;
-
-  if (id_chosen == lma_id_[fixed_lmas_])
-    fixed_lmas_no1_[fixed_lmas_] = 1;
-  else
-    fixed_lmas_no1_[fixed_lmas_] = 0;
-  lma_id_[fixed_lmas_] = id_chosen;
-  lma_start_[fixed_lmas_ + 1] = lma_start_[fixed_lmas_] + cand_len;
-  fixed_lmas_++;
-  fixed_hzs_ = fixed_hzs_ + cand_len;
-
-  while (step_to != pys_decoded_len) {
-    bool b = add_char(pys_[step_to]);
-    assert(b);
-    step_to++;
-  }
-
-  if (fixed_hzs_ < spl_id_num_) {
-    prepare_candidates();
-  } else {
-    lpi_total_ = 0;
-    if (NULL != user_dict_) {
-      try_add_cand0_to_userdict();
-    }
-  }
-
-  return get_candidate_num();
-}
-
-size_t MatrixSearch::cancel_last_choice() {
-  if (!inited_ || 0 == pys_decoded_len_)
-    return 0;
-
-  size_t step_start = 0;
-  if (fixed_hzs_ > 0) {
-    size_t step_end = spl_start_[fixed_hzs_];
-    MatrixNode *end_node = matrix_[step_end].mtrx_nd_fixed;
-    assert(NULL != end_node);
-
-    step_start = end_node->from->step;
-
-    if (step_start > 0) {
-      DictMatchInfo *dmi = dmi_pool_ + end_node->dmi_fr;
-      fixed_hzs_ -= dmi->dict_level;
-    } else {
-      fixed_hzs_ = 0;
-    }
-
-    reset_search(step_start, false, false, false);
-
-    while (pys_[step_start] != '\0') {
-      bool b = add_char(pys_[step_start]);
-      assert(b);
-      step_start++;
-    }
-
-    prepare_candidates();
-  }
-  return get_candidate_num();
-}
-
-size_t MatrixSearch::get_fixedlen() {
-  if (!inited_ || 0 == pys_decoded_len_)
-    return 0;
-  return fixed_hzs_;
-}
-
-bool MatrixSearch::prepare_add_char(char ch) {
-  if (pys_decoded_len_ >= kMaxRowNum - 1 ||
-      (!spl_parser_->is_valid_to_parse(ch) && ch != '\''))
-    return false;
-
-  if (dmi_pool_used_ >= kDmiPoolSize) return false;
-
-  pys_[pys_decoded_len_] = ch;
-  pys_decoded_len_++;
-
-  MatrixRow *mtrx_this_row = matrix_ + pys_decoded_len_;
-  mtrx_this_row->mtrx_nd_pos = mtrx_nd_pool_used_;
-  mtrx_this_row->mtrx_nd_num = 0;
-  mtrx_this_row->dmi_pos = dmi_pool_used_;
-  mtrx_this_row->dmi_num = 0;
-  mtrx_this_row->dmi_has_full_id = 0;
-
-  return true;
-}
-
-bool MatrixSearch::is_split_at(uint16 pos) {
-  return !spl_parser_->is_valid_to_parse(pys_[pos - 1]);
-}
-
-void MatrixSearch::fill_dmi(DictMatchInfo *dmi, MileStoneHandle *handles,
-                            PoolPosType dmi_fr, uint16 spl_id,
-                            uint16 node_num, unsigned char dict_level,
-                            bool splid_end_split, unsigned char splstr_len,
-                            unsigned char all_full_id) {
-  dmi->dict_handles[0] = handles[0];
-  dmi->dict_handles[1] = handles[1];
-  dmi->dmi_fr = dmi_fr;
-  dmi->spl_id = spl_id;
-  dmi->dict_level = dict_level;
-  dmi->splid_end_split = splid_end_split ? 1 : 0;
-  dmi->splstr_len = splstr_len;
-  dmi->all_full_id = all_full_id;
-  dmi->c_phrase = 0;
-}
-
-bool MatrixSearch::add_char(char ch) {
-  if (!prepare_add_char(ch))
-    return false;
-  return add_char_qwerty();
-}
-
-bool MatrixSearch::add_char_qwerty() {
-  matrix_[pys_decoded_len_].mtrx_nd_num = 0;
-
-  bool spl_matched = false;
-  uint16 longest_ext = 0;
-  // Extend the search matrix, from the oldest unfixed row. ext_len means
-  // extending length.
-  for (uint16 ext_len = kMaxPinyinSize + 1; ext_len > 0; ext_len--) {
-    if (ext_len > pys_decoded_len_ - spl_start_[fixed_hzs_])
-      continue;
-
-    // Refer to the declaration of the variable dmi_has_full_id for the
-    // explanation of this piece of code. In one word, it is used to prevent
-    // from the unwise extending of "shoud ou" but allow the reasonable
-    // extending of "heng ao", "lang a", etc.
-    if (ext_len > 1 && 0 != longest_ext &&
-        0 == matrix_[pys_decoded_len_ - ext_len].dmi_has_full_id) {
-      if (xi_an_enabled_)
-        continue;
-      else
-        break;
-    }
-
-    uint16 oldrow = pys_decoded_len_ - ext_len;
-
-    // 0. If that row is before the last fixed step, ignore.
-    if (spl_start_[fixed_hzs_] > oldrow)
-      continue;
-
-    // 1. Check if that old row has valid MatrixNode. If no, means that row is
-    // not a boundary, either a word boundary or a spelling boundary.
-    // If it is for extending composing phrase, it's OK to ignore the 0.
-    if (0 == matrix_[oldrow].mtrx_nd_num && !dmi_c_phrase_)
-      continue;
-
-    // 2. Get spelling id(s) for the last ext_len chars.
-    uint16 spl_idx;
-    bool is_pre = false;
-    spl_idx = spl_parser_->get_splid_by_str(pys_ + oldrow,
-                                            ext_len, &is_pre);
-    if (is_pre)
-      spl_matched = true;
-
-    if (0 == spl_idx)
-      continue;
-
-    bool splid_end_split = is_split_at(oldrow + ext_len);
-
-    // 3. Extend the DMI nodes of that old row
-    // + 1 is to extend an extra node from the root
-    for (PoolPosType dmi_pos = matrix_[oldrow].dmi_pos;
-         dmi_pos < matrix_[oldrow].dmi_pos + matrix_[oldrow].dmi_num + 1;
-         dmi_pos++) {
-      DictMatchInfo *dmi = dmi_pool_ + dmi_pos;
-      if (dmi_pos == matrix_[oldrow].dmi_pos + matrix_[oldrow].dmi_num) {
-        dmi = NULL;  // The last one, NULL means extending from the root.
-      } else {
-        // If the dmi is covered by the fixed arrange, ignore it.
-        if (fixed_hzs_ > 0 &&
-            pys_decoded_len_ - ext_len - dmi->splstr_len <
-            spl_start_[fixed_hzs_]) {
-          continue;
-        }
-        // If it is not in mode for composing phrase, and the source DMI node
-        // is marked for composing phrase, ignore this node.
-        if (dmi->c_phrase != 0 && !dmi_c_phrase_) {
-          continue;
-        }
-      }
-
-      // For example, if "gao" is extended, "g ao" is not allowed.
-      // or "zh" has been passed, "z h" is not allowed.
-      // Both word and word-connection will be prevented.
-      if (longest_ext > ext_len) {
-        if (NULL == dmi && 0 == matrix_[oldrow].dmi_has_full_id) {
-          continue;
-        }
-
-        // "z h" is not allowed.
-        if (NULL != dmi && spl_trie_->is_half_id(dmi->spl_id)) {
-          continue;
-        }
-      }
-
-      dep_->splids_extended = 0;
-      if (NULL != dmi) {
-        uint16 prev_ids_num = dmi->dict_level;
-        if ((!dmi_c_phrase_ && prev_ids_num >= kMaxLemmaSize) ||
-            (dmi_c_phrase_ && prev_ids_num >=  kMaxRowNum)) {
-          continue;
-        }
-
-        DictMatchInfo *d = dmi;
-        while (d) {
-          dep_->splids[--prev_ids_num] = d->spl_id;
-          if ((PoolPosType)-1 == d->dmi_fr)
-            break;
-          d = dmi_pool_ + d->dmi_fr;
-        }
-        assert(0 == prev_ids_num);
-        dep_->splids_extended = dmi->dict_level;
-      }
-      dep_->splids[dep_->splids_extended] = spl_idx;
-      dep_->ext_len = ext_len;
-      dep_->splid_end_split = splid_end_split;
-
-      dep_->id_num = 1;
-      dep_->id_start = spl_idx;
-      if (spl_trie_->is_half_id(spl_idx)) {
-        // Get the full id list
-        dep_->id_num = spl_trie_->half_to_full(spl_idx, &(dep_->id_start));
-        assert(dep_->id_num > 0);
-      }
-
-      uint16 new_dmi_num;
-
-      new_dmi_num = extend_dmi(dep_, dmi);
-
-      if (new_dmi_num > 0) {
-        if (dmi_c_phrase_) {
-          dmi_pool_[dmi_pool_used_].c_phrase = 1;
-        }
-        matrix_[pys_decoded_len_].dmi_num += new_dmi_num;
-        dmi_pool_used_ += new_dmi_num;
-
-        if (!spl_trie_->is_half_id(spl_idx))
-          matrix_[pys_decoded_len_].dmi_has_full_id = 1;
-      }
-
-      // If get candiate lemmas, try to extend the path
-      if (lpi_total_ > 0) {
-        uint16 fr_row;
-        if (NULL == dmi) {
-          fr_row = oldrow;
-        } else {
-          assert(oldrow >= dmi->splstr_len);
-          fr_row = oldrow - dmi->splstr_len;
-        }
-        for (PoolPosType mtrx_nd_pos = matrix_[fr_row].mtrx_nd_pos;
-             mtrx_nd_pos < matrix_[fr_row].mtrx_nd_pos +
-             matrix_[fr_row].mtrx_nd_num;
-             mtrx_nd_pos++) {
-          MatrixNode *mtrx_nd = mtrx_nd_pool_ + mtrx_nd_pos;
-
-          extend_mtrx_nd(mtrx_nd, lpi_items_, lpi_total_,
-                         dmi_pool_used_ - new_dmi_num, pys_decoded_len_);
-          if (longest_ext == 0)
-            longest_ext = ext_len;
-        }
-      }
-    }  // for dmi_pos
-  }  // for ext_len
-  mtrx_nd_pool_used_ += matrix_[pys_decoded_len_].mtrx_nd_num;
-
-  if (dmi_c_phrase_)
-    return true;
-
-  return (matrix_[pys_decoded_len_].mtrx_nd_num != 0 || spl_matched);
-}
-
-void MatrixSearch::prepare_candidates() {
-  // Get candiates from the first un-fixed step.
-  uint16 lma_size_max = kMaxLemmaSize;
-  if (lma_size_max > spl_id_num_ - fixed_hzs_)
-    lma_size_max = spl_id_num_ - fixed_hzs_;
-
-  uint16 lma_size = lma_size_max;
-
-  // If the full sentense candidate's unfixed part may be the same with a normal
-  // lemma. Remove the lemma candidate in this case.
-  char16 fullsent[kMaxLemmaSize + 1];
-  char16 *pfullsent = NULL;
-  uint16 sent_len;
-  pfullsent = get_candidate0(fullsent, kMaxLemmaSize + 1, &sent_len, true);
-
-  // If the unfixed part contains more than one ids, it is not necessary to
-  // check whether a lemma's string is the same to the unfixed part of the full
-  // sentence candidate, so, set it to NULL;
-  if (sent_len > kMaxLemmaSize)
-    pfullsent = NULL;
-
-  lpi_total_ = 0;
-  size_t lpi_num_full_match = 0;  // Number of items which are fully-matched.
-  while (lma_size > 0) {
-    size_t lma_num;
-    lma_num = get_lpis(spl_id_ + fixed_hzs_, lma_size,
-                       lpi_items_ + lpi_total_,
-                       size_t(kMaxLmaPsbItems - lpi_total_),
-                       pfullsent, lma_size == lma_size_max);
-
-    if (lma_num > 0) {
-      lpi_total_ += lma_num;
-      // For next lemma candidates which are not the longest, it is not
-      // necessary to compare with the full sentence candiate.
-      pfullsent = NULL;
-    }
-    if (lma_size == lma_size_max) {
-      lpi_num_full_match = lpi_total_;
-    }
-    lma_size--;
-  }
-
-  // Sort those partially-matched items by their unified scores.
-  myqsort(lpi_items_ + lpi_num_full_match, lpi_total_ - lpi_num_full_match,
-          sizeof(LmaPsbItem), cmp_lpi_with_unified_psb);
-
-  if (kPrintDebug0) {
-    printf("-----Prepare candidates, score:\n");
-    for (size_t a = 0; a < lpi_total_; a++) {
-      printf("[%03d]%d    ", a, lpi_items_[a].psb);
-      if ((a + 1) % 6 == 0) printf("\n");
-    }
-    printf("\n");
-  }
-
-  if (kPrintDebug0) {
-    printf("--- lpi_total_ = %d\n", lpi_total_);
-  }
-}
-
-const char* MatrixSearch::get_pystr(size_t *decoded_len) {
-  if (!inited_ || NULL == decoded_len)
-    return NULL;
-
-  *decoded_len = pys_decoded_len_;
-  return pys_;
-}
-
-void MatrixSearch::merge_fixed_lmas(size_t del_spl_pos) {
-  if (fixed_lmas_ == 0)
-    return;
-  // Update spelling segmentation information first.
-  spl_id_num_ -= 1;
-  uint16 del_py_len = spl_start_[del_spl_pos + 1] - spl_start_[del_spl_pos];
-  for (size_t pos = del_spl_pos; pos <= spl_id_num_; pos++) {
-    spl_start_[pos] = spl_start_[pos + 1] - del_py_len;
-    if (pos == spl_id_num_)
-      break;
-    spl_id_[pos] = spl_id_[pos + 1];
-  }
-
-  // Begin to merge.
-  uint16 phrase_len = 0;
-
-  // Update the spelling ids to the composing phrase.
-  // We need to convert these ids into full id in the future.
-  memcpy(c_phrase_.spl_ids, spl_id_, spl_id_num_ * sizeof(uint16));
-  memcpy(c_phrase_.spl_start, spl_start_, (spl_id_num_ + 1) * sizeof(uint16));
-
-  // If composing phrase has not been created, first merge all fixed
-  //  lemmas into a composing phrase without deletion.
-  if (fixed_lmas_ > 1 || kLemmaIdComposing != lma_id_[0]) {
-    uint16 bp = 1;  // Begin position of real fixed lemmas.
-    // There is no existing composing phrase.
-    if (kLemmaIdComposing != lma_id_[0]) {
-      c_phrase_.sublma_num = 0;
-      bp = 0;
-    }
-
-    uint16 sub_num = c_phrase_.sublma_num;
-    for (uint16 pos = bp; pos <= fixed_lmas_; pos++) {
-      c_phrase_.sublma_start[sub_num + pos - bp] = lma_start_[pos];
-      if (lma_start_[pos] > del_spl_pos) {
-        c_phrase_.sublma_start[sub_num + pos - bp] -= 1;
-      }
-
-      if (pos == fixed_lmas_)
-        break;
-
-      uint16 lma_len;
-      char16 *lma_str = c_phrase_.chn_str +
-          c_phrase_.sublma_start[sub_num] + phrase_len;
-
-      lma_len = get_lemma_str(lma_id_[pos], lma_str, kMaxRowNum - phrase_len);
-      assert(lma_len == lma_start_[pos + 1] - lma_start_[pos]);
-      phrase_len += lma_len;
-    }
-    assert(phrase_len == lma_start_[fixed_lmas_]);
-    c_phrase_.length = phrase_len;  // will be deleted by 1
-    c_phrase_.sublma_num += fixed_lmas_ - bp;
-  } else {
-    for (uint16 pos = 0; pos <= c_phrase_.sublma_num; pos++) {
-      if (c_phrase_.sublma_start[pos] > del_spl_pos) {
-        c_phrase_.sublma_start[pos] -= 1;
-      }
-    }
-    phrase_len = c_phrase_.length;
-  }
-
-  assert(phrase_len > 0);
-  if (1 == phrase_len) {
-    // After the only one is deleted, nothing will be left.
-    fixed_lmas_ = 0;
-    return;
-  }
-
-  // Delete the Chinese character in the merged phrase.
-  // The corresponding elements in spl_ids and spl_start of the
-  // phrase have been deleted.
-  char16 *chn_str = c_phrase_.chn_str + del_spl_pos;
-  for (uint16 pos = 0;
-      pos < c_phrase_.sublma_start[c_phrase_.sublma_num] - del_spl_pos;
-      pos++) {
-    chn_str[pos] = chn_str[pos + 1];
-  }
-  c_phrase_.length -= 1;
-
-  // If the deleted spelling id is in a sub lemma which contains more than
-  // one id, del_a_sub will be false; but if the deleted id is in a sub lemma
-  // which only contains 1 id, the whole sub lemma needs to be deleted, so
-  // del_a_sub will be true.
-  bool del_a_sub = false;
-  for (uint16 pos = 1; pos <= c_phrase_.sublma_num; pos++) {
-    if (c_phrase_.sublma_start[pos - 1] ==
-        c_phrase_.sublma_start[pos]) {
-      del_a_sub = true;
-    }
-    if (del_a_sub) {
-      c_phrase_.sublma_start[pos - 1] =
-          c_phrase_.sublma_start[pos];
-    }
-  }
-  if (del_a_sub)
-    c_phrase_.sublma_num -= 1;
-
-  return;
-}
-
-void MatrixSearch::get_spl_start_id() {
-  lma_id_num_ = 0;
-  lma_start_[0] = 0;
-
-  spl_id_num_ = 0;
-  spl_start_[0] = 0;
-  if (!inited_ || 0 == pys_decoded_len_ ||
-      0 == matrix_[pys_decoded_len_].mtrx_nd_num)
-    return;
-
-  // Calculate number of lemmas and spellings
-  // Only scan those part which is not fixed.
-  lma_id_num_ = fixed_lmas_;
-  spl_id_num_ = fixed_hzs_;
-
-  MatrixNode *mtrx_nd = mtrx_nd_pool_ + matrix_[pys_decoded_len_].mtrx_nd_pos;
-  while (mtrx_nd != mtrx_nd_pool_) {
-    if (fixed_hzs_ > 0) {
-      if (mtrx_nd->step <= spl_start_[fixed_hzs_])
-        break;
-    }
-
-    // Update the spelling segamentation information
-    unsigned char word_splstr_len = 0;
-    PoolPosType dmi_fr = mtrx_nd->dmi_fr;
-    if ((PoolPosType)-1 != dmi_fr)
-      word_splstr_len = dmi_pool_[dmi_fr].splstr_len;
-
-    while ((PoolPosType)-1 != dmi_fr) {
-      spl_start_[spl_id_num_ + 1] = mtrx_nd->step -
-          (word_splstr_len - dmi_pool_[dmi_fr].splstr_len);
-      spl_id_[spl_id_num_] = dmi_pool_[dmi_fr].spl_id;
-      spl_id_num_++;
-      dmi_fr = dmi_pool_[dmi_fr].dmi_fr;
-    }
-
-    // Update the lemma segmentation information
-    lma_start_[lma_id_num_ + 1] = spl_id_num_;
-    lma_id_[lma_id_num_] = mtrx_nd->id;
-    lma_id_num_++;
-
-    mtrx_nd = mtrx_nd->from;
-  }
-
-  // Reverse the result of spelling info
-  for (size_t pos = fixed_hzs_;
-       pos < fixed_hzs_ + (spl_id_num_ - fixed_hzs_ + 1) / 2; pos++) {
-    if (spl_id_num_ + fixed_hzs_ - pos != pos + 1) {
-      spl_start_[pos + 1] ^= spl_start_[spl_id_num_ - pos + fixed_hzs_];
-      spl_start_[spl_id_num_ - pos + fixed_hzs_] ^= spl_start_[pos + 1];
-      spl_start_[pos + 1] ^= spl_start_[spl_id_num_ - pos + fixed_hzs_];
-
-      spl_id_[pos] ^= spl_id_[spl_id_num_ + fixed_hzs_ - pos - 1];
-      spl_id_[spl_id_num_ + fixed_hzs_- pos - 1] ^= spl_id_[pos];
-      spl_id_[pos] ^= spl_id_[spl_id_num_ + fixed_hzs_- pos - 1];
-    }
-  }
-
-  // Reverse the result of lemma info
-  for (size_t pos = fixed_lmas_;
-       pos < fixed_lmas_ + (lma_id_num_ - fixed_lmas_ + 1) / 2; pos++) {
-    assert(lma_id_num_ + fixed_lmas_ - pos - 1 >= pos);
-
-    if (lma_id_num_ + fixed_lmas_ - pos > pos + 1) {
-      lma_start_[pos + 1] ^= lma_start_[lma_id_num_ - pos + fixed_lmas_];
-      lma_start_[lma_id_num_ - pos + fixed_lmas_] ^= lma_start_[pos + 1];
-      lma_start_[pos + 1] ^= lma_start_[lma_id_num_ - pos + fixed_lmas_];
-
-      lma_id_[pos] ^= lma_id_[lma_id_num_ - 1 - pos + fixed_lmas_];
-      lma_id_[lma_id_num_ - 1 - pos + fixed_lmas_] ^= lma_id_[pos];
-      lma_id_[pos] ^= lma_id_[lma_id_num_ - 1 - pos + fixed_lmas_];
-    }
-  }
-
-  for (size_t pos = fixed_lmas_ + 1; pos <= lma_id_num_; pos++) {
-    if (pos < lma_id_num_)
-      lma_start_[pos] = lma_start_[pos - 1] +
-          (lma_start_[pos] - lma_start_[pos + 1]);
-    else
-      lma_start_[pos] = lma_start_[pos - 1] + lma_start_[pos] -
-          lma_start_[fixed_lmas_];
-  }
-
-  // Find the last fixed position
-  fixed_hzs_ = 0;
-  for (size_t pos = spl_id_num_; pos > 0; pos--) {
-    if (NULL != matrix_[spl_start_[pos]].mtrx_nd_fixed) {
-      fixed_hzs_ = pos;
-      break;
-    }
-  }
-
-  return;
-}
-
-size_t MatrixSearch::get_spl_start(const uint16 *&spl_start) {
-  get_spl_start_id();
-  spl_start = spl_start_;
-  return spl_id_num_;
-}
-
-size_t MatrixSearch::extend_dmi(DictExtPara *dep, DictMatchInfo *dmi_s) {
-  if (dmi_pool_used_ >= kDmiPoolSize) return 0;
-
-  if (dmi_c_phrase_)
-    return extend_dmi_c(dep, dmi_s);
-
-  LpiCache& lpi_cache = LpiCache::get_instance();
-  uint16 splid = dep->splids[dep->splids_extended];
-
-  bool cached = false;
-  if (0 == dep->splids_extended)
-    cached = lpi_cache.is_cached(splid);
-
-  // 1. If this is a half Id, get its corresponding full starting Id and
-  // number of full Id.
-  size_t ret_val = 0;
-  PoolPosType mtrx_dmi_fr = (PoolPosType)-1;  // From which dmi node
-
-  lpi_total_ = 0;
-
-  MileStoneHandle from_h[3];
-  from_h[0] = 0;
-  from_h[1] = 0;
-
-  if (0 != dep->splids_extended) {
-    from_h[0] = dmi_s->dict_handles[0];
-    from_h[1] = dmi_s->dict_handles[1];
-  }
-
-  // 2. Begin exgtending in the system dictionary
-  size_t lpi_num = 0;
-  MileStoneHandle handles[2];
-  handles[0] = handles[1] = 0;
-  if (from_h[0] > 0 || NULL == dmi_s) {
-    handles[0] = dict_trie_->extend_dict(from_h[0], dep, lpi_items_,
-                                         kMaxLmaPsbItems, &lpi_num);
-  }
-  if (handles[0] > 0)
-    lpi_total_ = lpi_num;
-
-  if (NULL == dmi_s) {  // from root
-    assert(0 != handles[0]);
-    mtrx_dmi_fr = dmi_pool_used_;
-  }
-
-  // 3. Begin extending in the user dictionary
-  if (NULL != user_dict_ && (from_h[1] > 0 || NULL == dmi_s)) {
-    handles[1] = user_dict_->extend_dict(from_h[1], dep,
-                                         lpi_items_ + lpi_total_,
-                                         kMaxLmaPsbItems - lpi_total_,
-                                         &lpi_num);
-    if (handles[1] > 0) {
-      if (kPrintDebug0) {
-        for (size_t t = 0; t < lpi_num; t++) {
-          printf("--Extend in user dict: uid:%d uscore:%d\n", lpi_items_[lpi_total_ + t].id,
-                 lpi_items_[lpi_total_ + t].psb);
-        }
-      }
-      lpi_total_ += lpi_num;
-    }
-  }
-
-  if (0 != handles[0] || 0 != handles[1]) {
-    if (dmi_pool_used_ >= kDmiPoolSize) return 0;
-
-    DictMatchInfo *dmi_add = dmi_pool_ + dmi_pool_used_;
-    if (NULL == dmi_s) {
-      fill_dmi(dmi_add, handles,
-               (PoolPosType)-1, splid,
-               1, 1, dep->splid_end_split, dep->ext_len,
-               spl_trie_->is_half_id(splid) ? 0 : 1);
-    } else {
-      fill_dmi(dmi_add, handles,
-               dmi_s - dmi_pool_, splid, 1,
-               dmi_s->dict_level + 1, dep->splid_end_split,
-               dmi_s->splstr_len + dep->ext_len,
-               spl_trie_->is_half_id(splid) ? 0 : dmi_s->all_full_id);
-    }
-
-    ret_val = 1;
-  }
-
-  if (!cached) {
-    if (0 == lpi_total_)
-      return ret_val;
-
-    if (kPrintDebug0) {
-      printf("--- lpi_total_ = %d\n", lpi_total_);
-    }
-
-    myqsort(lpi_items_, lpi_total_, sizeof(LmaPsbItem), cmp_lpi_with_psb);
-    if (NULL == dmi_s && spl_trie_->is_half_id(splid))
-      lpi_total_ = lpi_cache.put_cache(splid, lpi_items_, lpi_total_);
-  } else {
-    assert(spl_trie_->is_half_id(splid));
-    lpi_total_ = lpi_cache.get_cache(splid, lpi_items_, kMaxLmaPsbItems);
-  }
-
-  return ret_val;
-}
-
-size_t MatrixSearch::extend_dmi_c(DictExtPara *dep, DictMatchInfo *dmi_s) {
-  lpi_total_ = 0;
-
-  uint16 pos = dep->splids_extended;
-  assert(dmi_c_phrase_);
-  if (pos >= c_phrase_.length)
-    return 0;
-
-  uint16 splid = dep->splids[pos];
-  if (splid == c_phrase_.spl_ids[pos]) {
-    DictMatchInfo *dmi_add = dmi_pool_ + dmi_pool_used_;
-    MileStoneHandle handles[2];  // Actually never used.
-    if (NULL == dmi_s)
-      fill_dmi(dmi_add, handles,
-               (PoolPosType)-1, splid,
-               1, 1, dep->splid_end_split, dep->ext_len,
-               spl_trie_->is_half_id(splid) ? 0 : 1);
-    else
-      fill_dmi(dmi_add, handles,
-               dmi_s - dmi_pool_, splid, 1,
-               dmi_s->dict_level + 1, dep->splid_end_split,
-               dmi_s->splstr_len + dep->ext_len,
-               spl_trie_->is_half_id(splid) ? 0 : dmi_s->all_full_id);
-
-    if (pos == c_phrase_.length - 1) {
-      lpi_items_[0].id = kLemmaIdComposing;
-      lpi_items_[0].psb = 0;  // 0 is bigger than normal lemma score.
-      lpi_total_ = 1;
-    }
-    return 1;
-  }
-  return 0;
-}
-
-size_t MatrixSearch::extend_mtrx_nd(MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
-                                    size_t lpi_num, PoolPosType dmi_fr,
-                                    size_t res_row) {
-  assert(NULL != mtrx_nd);
-  matrix_[res_row].mtrx_nd_fixed = NULL;
-
-  if (mtrx_nd_pool_used_ >= kMtrxNdPoolSize - kMaxNodeARow)
-    return 0;
-
-  if (0 == mtrx_nd->step) {
-    // Because the list is sorted, if the source step is 0, it is only
-    // necessary to pick up the first kMaxNodeARow items.
-    if (lpi_num > kMaxNodeARow)
-      lpi_num = kMaxNodeARow;
-  }
-
-  MatrixNode *mtrx_nd_res_min = mtrx_nd_pool_ + matrix_[res_row].mtrx_nd_pos;
-  for (size_t pos = 0; pos < lpi_num; pos++) {
-    float score = mtrx_nd->score + lpi_items[pos].psb;
-    if (pos > 0 && score - PRUMING_SCORE > mtrx_nd_res_min->score)
-      break;
-
-    // Try to add a new node
-    size_t mtrx_nd_num = matrix_[res_row].mtrx_nd_num;
-    MatrixNode *mtrx_nd_res = mtrx_nd_res_min + mtrx_nd_num;
-    bool replace = false;
-    // Find its position
-    while (mtrx_nd_res > mtrx_nd_res_min && score < (mtrx_nd_res - 1)->score) {
-      if (static_cast<size_t>(mtrx_nd_res - mtrx_nd_res_min) < kMaxNodeARow)
-        *mtrx_nd_res = *(mtrx_nd_res - 1);
-      mtrx_nd_res--;
-      replace = true;
-    }
-    if (replace || (mtrx_nd_num < kMaxNodeARow &&
-        matrix_[res_row].mtrx_nd_pos + mtrx_nd_num < kMtrxNdPoolSize)) {
-      mtrx_nd_res->id = lpi_items[pos].id;
-      mtrx_nd_res->score = score;
-      mtrx_nd_res->from = mtrx_nd;
-      mtrx_nd_res->dmi_fr = dmi_fr;
-      mtrx_nd_res->step = res_row;
-      if (matrix_[res_row].mtrx_nd_num < kMaxNodeARow)
-        matrix_[res_row].mtrx_nd_num++;
-    }
-  }
-  return matrix_[res_row].mtrx_nd_num;
-}
-
-PoolPosType MatrixSearch::match_dmi(size_t step_to, uint16 spl_ids[],
-                                    uint16 spl_id_num) {
-  if (pys_decoded_len_ < step_to || 0 == matrix_[step_to].dmi_num) {
-    return static_cast<PoolPosType>(-1);
-  }
-
-  for (PoolPosType dmi_pos = 0; dmi_pos < matrix_[step_to].dmi_num; dmi_pos++) {
-    DictMatchInfo *dmi = dmi_pool_ + matrix_[step_to].dmi_pos + dmi_pos;
-
-    if (dmi->dict_level != spl_id_num)
-      continue;
-
-    bool matched = true;
-    for (uint16 spl_pos = 0; spl_pos < spl_id_num; spl_pos++) {
-      if (spl_ids[spl_id_num - spl_pos - 1] != dmi->spl_id) {
-        matched = false;
-        break;
-      }
-
-      dmi = dmi_pool_ + dmi->dmi_fr;
-    }
-    if (matched) {
-      return matrix_[step_to].dmi_pos + dmi_pos;
-    }
-  }
-
-  return static_cast<PoolPosType>(-1);
-}
-
-char16* MatrixSearch::get_candidate0(char16 *cand_str, size_t max_len,
-                                     uint16 *retstr_len,
-                                     bool only_unfixed) {
-  if (pys_decoded_len_ == 0 ||
-      matrix_[pys_decoded_len_].mtrx_nd_num == 0)
-    return NULL;
-
-  LemmaIdType idxs[kMaxRowNum];
-  size_t id_num = 0;
-
-  MatrixNode *mtrx_nd = mtrx_nd_pool_ + matrix_[pys_decoded_len_].mtrx_nd_pos;
-
-  if (kPrintDebug0) {
-    printf("--- sentence score: %f\n", mtrx_nd->score);
-  }
-
-  if (kPrintDebug1) {
-    printf("==============Sentence DMI (reverse order) begin===========>>\n");
-  }
-
-  while (mtrx_nd != NULL) {
-    idxs[id_num] = mtrx_nd->id;
-    id_num++;
-
-    if (kPrintDebug1) {
-       printf("---MatrixNode [step: %d, lma_idx: %d, total score:%.5f]\n",
-              mtrx_nd->step, mtrx_nd->id, mtrx_nd->score);
-       debug_print_dmi(mtrx_nd->dmi_fr, 1);
-    }
-
-    mtrx_nd = mtrx_nd->from;
-  }
-
-  if (kPrintDebug1) {
-    printf("<<==============Sentence DMI (reverse order) end=============\n");
-  }
-
-  size_t ret_pos = 0;
-  do {
-    id_num--;
-    if (0 == idxs[id_num])
-      continue;
-
-    char16 str[kMaxLemmaSize + 1];
-    uint16 str_len = get_lemma_str(idxs[id_num], str, kMaxLemmaSize + 1);
-    if (str_len > 0 && ((!only_unfixed && max_len - ret_pos > str_len) ||
-        (only_unfixed && max_len - ret_pos + fixed_hzs_ > str_len))) {
-      if (!only_unfixed)
-        utf16_strncpy(cand_str + ret_pos, str, str_len);
-      else if (ret_pos >= fixed_hzs_)
-        utf16_strncpy(cand_str + ret_pos - fixed_hzs_, str, str_len);
-
-      ret_pos += str_len;
-    } else {
-      return NULL;
-    }
-  } while (id_num != 0);
-
-  if (!only_unfixed) {
-    if (NULL != retstr_len)
-      *retstr_len = ret_pos;
-    cand_str[ret_pos] = (char16)'\0';
-  } else {
-    if (NULL != retstr_len)
-      *retstr_len = ret_pos - fixed_hzs_;
-    cand_str[ret_pos - fixed_hzs_] = (char16)'\0';
-  }
-  return cand_str;
-}
-
-size_t MatrixSearch::get_lpis(const uint16* splid_str, size_t splid_str_len,
-                              LmaPsbItem* lma_buf, size_t max_lma_buf,
-                              const char16 *pfullsent, bool sort_by_psb) {
-  if (splid_str_len > kMaxLemmaSize)
-    return 0;
-
-  size_t num1 = dict_trie_->get_lpis(splid_str, splid_str_len,
-                                     lma_buf, max_lma_buf);
-  size_t num2 = 0;
-  if (NULL != user_dict_) {
-    num2 = user_dict_->get_lpis(splid_str, splid_str_len,
-                         lma_buf + num1, max_lma_buf - num1);
-  }
-
-  size_t num = num1 + num2;
-
-  if (0 == num)
-    return 0;
-
-  // Remove repeated items.
-  if (splid_str_len > 1) {
-    LmaPsbStrItem *lpsis = reinterpret_cast<LmaPsbStrItem*>(lma_buf + num);
-    size_t lpsi_num = (max_lma_buf - num) * sizeof(LmaPsbItem) /
-        sizeof(LmaPsbStrItem);
-    assert(lpsi_num > num);
-    if (num > lpsi_num) num = lpsi_num;
-    lpsi_num = num;
-
-    for (size_t pos = 0; pos < lpsi_num; pos++) {
-      lpsis[pos].lpi = lma_buf[pos];
-      get_lemma_str(lma_buf[pos].id, lpsis[pos].str, kMaxLemmaSize + 1);
-    }
-
-    myqsort(lpsis, lpsi_num, sizeof(LmaPsbStrItem), cmp_lpsi_with_str);
-
-    size_t remain_num = 0;
-    for (size_t pos = 0; pos < lpsi_num; pos++) {
-      if (pos > 0 && utf16_strcmp(lpsis[pos].str, lpsis[pos - 1].str) == 0) {
-        if (lpsis[pos].lpi.psb < lpsis[pos - 1].lpi.psb) {
-          assert(remain_num > 0);
-          lma_buf[remain_num - 1] = lpsis[pos].lpi;
-        }
-        continue;
-      }
-      if (NULL != pfullsent && utf16_strcmp(lpsis[pos].str, pfullsent) == 0)
-        continue;
-
-      lma_buf[remain_num] = lpsis[pos].lpi;
-      remain_num++;
-    }
-
-    // Update the result number
-    num = remain_num;
-  } else {
-    // For single character, some characters have more than one spelling, for
-    // example, "de" and "di" are all valid for a Chinese character, so when
-    // the user input  "d", repeated items are generated.
-    // For single character lemmas, Hanzis will be gotten
-    for (size_t pos = 0; pos < num; pos++) {
-      char16 hanzis[2];
-      get_lemma_str(lma_buf[pos].id, hanzis, 2);
-      lma_buf[pos].hanzi = hanzis[0];
-    }
-
-    myqsort(lma_buf, num, sizeof(LmaPsbItem), cmp_lpi_with_hanzi);
-
-    size_t remain_num = 0;
-    for (size_t pos = 0; pos < num; pos++) {
-      if (pos > 0 && lma_buf[pos].hanzi == lma_buf[pos - 1].hanzi) {
-        if (NULL != pfullsent &&
-            static_cast<char16>(0) == pfullsent[1] &&
-            lma_buf[pos].hanzi == pfullsent[0])
-          continue;
-
-        if (lma_buf[pos].psb < lma_buf[pos - 1].psb) {
-          assert(remain_num > 0);
-          assert(lma_buf[remain_num - 1].hanzi == lma_buf[pos].hanzi);
-          lma_buf[remain_num - 1] = lma_buf[pos];
-        }
-        continue;
-      }
-      if (NULL != pfullsent &&
-          static_cast<char16>(0) == pfullsent[1] &&
-          lma_buf[pos].hanzi == pfullsent[0])
-          continue;
-
-      lma_buf[remain_num] = lma_buf[pos];
-      remain_num++;
-    }
-
-    num = remain_num;
-  }
-
-  if (sort_by_psb) {
-    myqsort(lma_buf, num, sizeof(LmaPsbItem), cmp_lpi_with_psb);
-  }
-  return num;
-}
-
-uint16 MatrixSearch::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
-                                   uint16 str_max) {
-  uint16 str_len = 0;
-
-  if (is_system_lemma(id_lemma)) {
-    str_len = dict_trie_->get_lemma_str(id_lemma, str_buf, str_max);
-  } else if (is_user_lemma(id_lemma)) {
-    if (NULL != user_dict_) {
-      str_len = user_dict_->get_lemma_str(id_lemma, str_buf, str_max);
-    } else {
-      str_len = 0;
-      str_buf[0] = static_cast<char16>('\0');
-    }
-  } else if (is_composing_lemma(id_lemma)) {
-    if (str_max <= 1)
-      return 0;
-    str_len = c_phrase_.sublma_start[c_phrase_.sublma_num];
-    if (str_len > str_max - 1)
-      str_len = str_max - 1;
-    utf16_strncpy(str_buf, c_phrase_.chn_str, str_len);
-    str_buf[str_len] = (char16)'\0';
-    return str_len;
-  }
-
-  return str_len;
-}
-
-uint16 MatrixSearch::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
-                                      uint16 splids_max, bool arg_valid) {
-  uint16 splid_num = 0;
-
-  if (arg_valid) {
-    for (splid_num = 0; splid_num < splids_max; splid_num++) {
-      if (spl_trie_->is_half_id(splids[splid_num]))
-        break;
-    }
-    if (splid_num == splids_max)
-      return splid_num;
-  }
-
-  if (is_system_lemma(id_lemma)) {
-    splid_num = dict_trie_->get_lemma_splids(id_lemma, splids, splids_max,
-                                              arg_valid);
-  } else if (is_user_lemma(id_lemma)) {
-    if (NULL != user_dict_) {
-      splid_num = user_dict_->get_lemma_splids(id_lemma, splids, splids_max,
-                                               arg_valid);
-    } else {
-      splid_num = 0;
-    }
-  } else if (is_composing_lemma(id_lemma)) {
-    if (c_phrase_.length > splids_max) {
-      return 0;
-    }
-    for (uint16 pos = 0; pos < c_phrase_.length; pos++) {
-      splids[pos] = c_phrase_.spl_ids[pos];
-      if (spl_trie_->is_half_id(splids[pos])) {
-        return 0;
-      }
-    }
-  }
-  return splid_num;
-}
-
-size_t MatrixSearch::inner_predict(const char16 *fixed_buf, uint16 fixed_len,
-                                   char16 predict_buf[][kMaxPredictSize + 1],
-                                   size_t buf_len) {
-  size_t res_total = 0;
-  memset(npre_items_, 0, sizeof(NPredictItem) * npre_items_len_);
-  // In order to shorten the comments, j-character candidates predicted by
-  // i-character prefix are called P(i,j). All candiates predicted by
-  // i-character prefix are called P(i,*)
-  // Step 1. Get P(kMaxPredictSize, *) and sort them, here
-  // P(kMaxPredictSize, *) == P(kMaxPredictSize, 1)
-  for (size_t len = fixed_len; len >0; len--) {
-    // How many blank items are available
-    size_t this_max = npre_items_len_ - res_total;
-    size_t res_this;
-    // If the history is longer than 1, and we can not get prediction from
-    // lemmas longer than 2, in this case, we will add lemmas with
-    // highest scores as the prediction result.
-    if (fixed_len > 1 && 1 == len && 0 == res_total) {
-      // Try to find if recent n (n>1) characters can be a valid lemma in system
-      // dictionary.
-      bool nearest_n_word = false;
-      for (size_t nlen = 2; nlen <= fixed_len; nlen++) {
-        if (dict_trie_->get_lemma_id(fixed_buf + fixed_len - nlen, nlen) > 0) {
-          nearest_n_word = true;
-          break;
-        }
-      }
-      res_this = dict_trie_->predict_top_lmas(nearest_n_word ? len : 0,
-                                              npre_items_ + res_total,
-                                              this_max, res_total);
-      res_total += res_this;
-    }
-
-    // How many blank items are available
-    this_max = npre_items_len_ - res_total;
-    res_this = 0;
-    if (!kOnlyUserDictPredict) {
-      res_this =
-          dict_trie_->predict(fixed_buf + fixed_len - len, len,
-                              npre_items_ + res_total, this_max,
-                              res_total);
-    }
-
-    if (NULL != user_dict_) {
-      res_this = res_this +
-                 user_dict_->predict(fixed_buf + fixed_len - len, len,
-                                     npre_items_ + res_total + res_this,
-                                     this_max - res_this, res_total + res_this);
-    }
-
-    if (kPredictLimitGt1) {
-      myqsort(npre_items_ + res_total, res_this, sizeof(NPredictItem),
-              cmp_npre_by_score);
-
-      if (len > 3) {
-        if (res_this > kMaxPredictNumByGt3)
-          res_this = kMaxPredictNumByGt3;
-      } else if (3 == len) {
-        if (res_this > kMaxPredictNumBy3)
-          res_this = kMaxPredictNumBy3;
-      } else if (2 == len) {
-        if (res_this > kMaxPredictNumBy2)
-          res_this = kMaxPredictNumBy2;
-      }
-    }
-
-    res_total += res_this;
-  }
-
-  res_total = remove_duplicate_npre(npre_items_, res_total);
-
-  if (kPreferLongHistoryPredict) {
-    myqsort(npre_items_, res_total, sizeof(NPredictItem),
-            cmp_npre_by_hislen_score);
-  } else {
-    myqsort(npre_items_, res_total, sizeof(NPredictItem),
-            cmp_npre_by_score);
-  }
-
-  if (buf_len < res_total) {
-    res_total = buf_len;
-  }
-
-  if (kPrintDebug2) {
-    printf("/////////////////Predicted Items Begin////////////////////>>\n");
-    for (size_t i = 0; i < res_total; i++) {
-      printf("---");
-      for (size_t j = 0; j < kMaxPredictSize; j++) {
-        printf("%d  ", npre_items_[i].pre_hzs[j]);
-      }
-      printf("\n");
-    }
-    printf("<<///////////////Predicted Items End////////////////////////\n");
-  }
-
-  for (size_t i = 0; i < res_total; i++) {
-    utf16_strncpy(predict_buf[i], npre_items_[i].pre_hzs,
-                  kMaxPredictSize);
-    predict_buf[i][kMaxPredictSize] = '\0';
-  }
-
-  return res_total;
-}
-
-size_t MatrixSearch::get_predicts(const char16 fixed_buf[],
-                                  char16 predict_buf[][kMaxPredictSize + 1],
-                                  size_t buf_len) {
-  size_t fixed_len = utf16_strlen(fixed_buf);
-  if (0 ==fixed_len || fixed_len > kMaxPredictSize || 0 == buf_len)
-    return 0;
-
-  return inner_predict(fixed_buf, fixed_len, predict_buf, buf_len);
-}
-
-}  // namespace ime_pinyin
diff --git a/matrixsearch.h b/matrixsearch.h
deleted file mode 100644
index f581d30..0000000
--- a/matrixsearch.h
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
-#define PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
-
-#include <stdlib.h>
-#include "./atomdictbase.h"
-#include "./dicttrie.h"
-#include "./searchutility.h"
-#include "./spellingtrie.h"
-#include "./splparser.h"
-
-namespace ime_pinyin {
-
-static const size_t kMaxRowNum = kMaxSearchSteps;
-
-typedef struct {
-  // MileStoneHandle objects for the system and user dictionaries.
-  MileStoneHandle dict_handles[2];
-  // From which DMI node. -1 means it's from root.
-  PoolPosType dmi_fr;
-  // The spelling id for the Pinyin string from the previous DMI to this node.
-  // If it is a half id like Shengmu, the node pointed by dict_node is the first
-  // node with this Shengmu,
-  uint16 spl_id;
-  // What's the level of the dict node. Level of root is 0, but root is never
-  // recorded by dict_node.
-  unsigned char dict_level:7;
-  // If this node is for composing phrase, this bit is 1.
-  unsigned char c_phrase:1;
-  // Whether the spl_id is parsed with a split character at the end.
-  unsigned char splid_end_split:1;
-  // What's the length of the spelling string for this match, for the whole
-  // word.
-  unsigned char splstr_len:7;
-  // Used to indicate whether all spelling ids from the root are full spelling
-  // ids. This information is useful for keymapping mode(not finished). Because
-  // in this mode, there is no clear boundaries, we prefer those results which
-  // have full spelling ids.
-  unsigned char all_full_id:1;
-} DictMatchInfo, *PDictMatchInfo;
-
-typedef struct MatrixNode {
-  LemmaIdType id;
-  float score;
-  MatrixNode *from;
-  // From which DMI node. Used to trace the spelling segmentation.
-  PoolPosType dmi_fr;
-  uint16 step;
-} MatrixNode, *PMatrixNode;
-
-typedef struct {
-  // The MatrixNode position in the matrix pool
-  PoolPosType mtrx_nd_pos;
-  // The DictMatchInfo position in the DictMatchInfo pool.
-  PoolPosType dmi_pos;
-  uint16 mtrx_nd_num;
-  uint16 dmi_num:15;
-  // Used to indicate whether there are dmi nodes in this step with full
-  // spelling id. This information is used to decide whether a substring of a
-  // valid Pinyin should be extended.
-  //
-  // Example1: shoudao
-  // When the last char 'o' is added, the parser will find "dao" is a valid
-  // Pinyin, and because all dmi nodes at location 'd' (including those for
-  // "shoud", and those for "d") have Shengmu id only, so it is not necessary
-  // to extend "ao", otherwise the result may be "shoud ao", that is not
-  // reasonable.
-  //
-  // Example2: hengao
-  // When the last 'o' is added, the parser finds "gao" is a valid Pinyin.
-  // Because some dmi nodes at 'g' has Shengmu ids (hen'g and g), but some dmi
-  // nodes at 'g' has full ids ('heng'), so it is necessary to extend "ao", thus
-  // "heng ao" can also be the result.
-  //
-  // Similarly, "ganga" is expanded to "gang a".
-  //
-  // For Pinyin string "xian", because "xian" is a valid Pinyin, because all dmi
-  // nodes at 'x' only have Shengmu ids, the parser will not try "x ian" (and it
-  // is not valid either). If the parser uses break in the loop, the result
-  // always be "xian"; but if the parser uses continue in the loop, "xi an" will
-  // also be tried. This behaviour can be set via the function
-  // set_xi_an_switch().
-  uint16 dmi_has_full_id:1;
-  // Points to a MatrixNode of the current step to indicate which choice the
-  // user selects.
-  MatrixNode *mtrx_nd_fixed;
-} MatrixRow, *PMatrixRow;
-
-// When user inputs and selects candidates, the fixed lemma ids are stored in
-// lma_id_ of class MatrixSearch, and fixed_lmas_ is used to indicate how many
-// lemmas from the beginning are fixed. If user deletes Pinyin characters one
-// by one from the end, these fixed lemmas can be unlocked one by one when
-// necessary. Whenever user deletes a Chinese character and its spelling string
-// in these fixed lemmas, all fixed lemmas will be merged together into a unit
-// named ComposingPhrase with a lemma id kLemmaIdComposing, and this composing
-// phrase will be the first lemma in the sentence. Because it contains some
-// modified lemmas (by deleting a character), these merged lemmas are called
-// sub lemmas (sublma), and each of them are represented individually, so that
-// when user deletes Pinyin characters from the end, these sub lemmas can also
-// be unlocked one by one.
-typedef struct {
-  uint16 spl_ids[kMaxRowNum];
-  uint16 spl_start[kMaxRowNum];
-  char16 chn_str[kMaxRowNum];       // Chinese string.
-  uint16 sublma_start[kMaxRowNum];  // Counted in Chinese characters.
-  size_t sublma_num;
-  uint16 length;                    // Counted in Chinese characters.
-} ComposingPhrase, *TComposingPhrase;
-
-class MatrixSearch {
- private:
-  // If it is true, prediction list by string whose length is greater than 1
-  // will be limited to a reasonable number.
-  static const bool kPredictLimitGt1 = false;
-
-  // If it is true, the engine will prefer long history based prediction,
-  // for example, when user inputs "BeiJing", we prefer "DaXue", etc., which are
-  // based on the two-character history.
-  static const bool kPreferLongHistoryPredict = true;
-
-  // If it is true, prediction will only be based on user dictionary. this flag
-  // is for debug purpose.
-  static const bool kOnlyUserDictPredict = false;
-
-  // The maximum buffer to store LmaPsbItems.
-  static const size_t kMaxLmaPsbItems = 1450;
-
-  // How many rows for each step.
-  static const size_t kMaxNodeARow = 5;
-
-  // The maximum length of the sentence candidates counted in chinese
-  // characters
-  static const size_t kMaxSentenceLength = 16;
-
-  // The size of the matrix node pool.
-  static const size_t kMtrxNdPoolSize = 200;
-
-  // The size of the DMI node pool.
-  static const size_t kDmiPoolSize = 800;
-
-  // Used to indicate whether this object has been initialized.
-  bool inited_;
-
-  // Spelling trie.
-  const SpellingTrie *spl_trie_;
-
-  // Used to indicate this switcher status: when "xian" is parseed, should
-  // "xi an" also be extended. Default is false.
-  // These cases include: xia, xian, xiang, zhuan, jiang..., etc. The string
-  // should be valid for a FULL spelling, or a combination of two spellings,
-  // first of which is a FULL id too. So even it is true, "da" will never be
-  // split into "d a", because "d" is not a full spelling id.
-  bool xi_an_enabled_;
-
-  // System dictionary.
-  DictTrie* dict_trie_;
-
-  // User dictionary.
-  AtomDictBase* user_dict_;
-
-  // Spelling parser.
-  SpellingParser* spl_parser_;
-
-  // The maximum allowed length of spelling string (such as a Pinyin string).
-  size_t max_sps_len_;
-
-  // The maximum allowed length of a result Chinese string.
-  size_t max_hzs_len_;
-
-  // Pinyin string. Max length: kMaxRowNum - 1
-  char pys_[kMaxRowNum];
-
-  // The length of the string that has been decoded successfully.
-  size_t pys_decoded_len_;
-
-  // Shared buffer for multiple purposes.
-  size_t *share_buf_;
-
-  MatrixNode *mtrx_nd_pool_;
-  PoolPosType mtrx_nd_pool_used_;    // How many nodes used in the pool
-  DictMatchInfo *dmi_pool_;
-  PoolPosType dmi_pool_used_;        // How many items used in the pool
-
-  MatrixRow *matrix_;                // The first row is for starting
-
-  DictExtPara *dep_;                 // Parameter used to extend DMI nodes.
-
-  NPredictItem *npre_items_;         // Used to do prediction
-  size_t npre_items_len_;
-
-  // The starting positions and lemma ids for the full sentence candidate.
-  size_t lma_id_num_;
-  uint16 lma_start_[kMaxRowNum];     // Counted in spelling ids.
-  LemmaIdType lma_id_[kMaxRowNum];
-  size_t fixed_lmas_;
-
-  // If fixed_lmas_ is bigger than i,  Element i is used to indicate whether
-  // the i'th lemma id in lma_id_ is the first candidate for that step.
-  // If all candidates are the first one for that step, the whole string can be
-  // decoded by the engine automatically, so no need to add it to user
-  // dictionary. (We are considering to add it to user dictionary in the
-  // future).
-  uint8 fixed_lmas_no1_[kMaxRowNum];
-
-  // Composing phrase
-  ComposingPhrase c_phrase_;
-
-  // If dmi_c_phrase_ is true, the decoder will try to match the
-  // composing phrase (And definitely it will match successfully). If it
-  // is false, the decoder will try to match lemmas items in dictionaries.
-  bool dmi_c_phrase_;
-
-  // The starting positions and spelling ids for the first full sentence
-  // candidate.
-  size_t spl_id_num_;                // Number of splling ids
-  uint16 spl_start_[kMaxRowNum];     // Starting positions
-  uint16 spl_id_[kMaxRowNum];        // Spelling ids
-  // Used to remember the last fixed position, counted in Hanzi.
-  size_t fixed_hzs_;
-
-  // Lemma Items with possibility score, two purposes:
-  // 1. In Viterbi decoding, this buffer is used to get all possible candidates
-  // for current step;
-  // 2. When the search is done, this buffer is used to get candiates from the
-  // first un-fixed step and show them to the user.
-  LmaPsbItem lpi_items_[kMaxLmaPsbItems];
-  size_t lpi_total_;
-
-  // Assign the pointers with NULL. The caller makes sure that all pointers are
-  // not valid before calling it. This function only will be called in the
-  // construction function and free_resource().
-  void reset_pointers_to_null();
-
-  bool alloc_resource();
-
-  void free_resource();
-
-  // Reset the search space totally.
-  bool reset_search0();
-
-  // Reset the search space from ch_pos step. For example, if the original
-  // input Pinyin is "an", reset_search(1) will reset the search space to the
-  // result of "a". If the given position is out of range, return false.
-  // if clear_fixed_this_step is true, and the ch_pos step is a fixed step,
-  // clear its fixed status. if clear_dmi_his_step is true, clear the DMI nodes.
-  // If clear_mtrx_this_sTep is true, clear the mtrx nodes of this step.
-  // The DMI nodes will be kept.
-  //
-  // Note: this function should not destroy content of pys_.
-  bool reset_search(size_t ch_pos, bool clear_fixed_this_step,
-                    bool clear_dmi_this_step, bool clear_mtrx_this_step);
-
-  // Delete a part of the content in pys_.
-  void del_in_pys(size_t start, size_t len);
-
-  // Delete a spelling id and its corresponding Chinese character, and merge
-  // the fixed lemmas into the composing phrase.
-  // del_spl_pos indicates which spelling id needs to be delete.
-  // This function will update the lemma and spelling segmentation information.
-  // The caller guarantees that fixed_lmas_ > 0 and del_spl_pos is within
-  // the fixed lemmas.
-  void merge_fixed_lmas(size_t del_spl_pos);
-
-  // Get spelling start posistions and ids. The result will be stored in
-  // spl_id_num_, spl_start_[], spl_id_[].
-  // fixed_hzs_ will be also assigned.
-  void get_spl_start_id();
-
-  // Get all lemma ids with match the given spelling id stream(shorter than the
-  // maximum length of a word).
-  // If pfullsent is not NULL, means the full sentence candidate may be the
-  // same with the coming lemma string, if so, remove that lemma.
-  // The result is sorted in descendant order by the frequency score.
-  size_t get_lpis(const uint16* splid_str, size_t splid_str_len,
-                  LmaPsbItem* lma_buf, size_t max_lma_buf,
-                  const char16 *pfullsent, bool sort_by_psb);
-
-  uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
-
-  uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
-                          uint16 splids_max, bool arg_valid);
-
-
-  // Extend a DMI node with a spelling id. ext_len is the length of the rows
-  // to extend, actually, it is the size of the spelling string of splid.
-  // return value can be 1 or 0.
-  // 1 means a new DMI is filled in (dmi_pool_used_ is the next blank DMI in
-  // the pool).
-  // 0 means either the dmi node can not be extended with splid, or the splid
-  // is a Shengmu id, which is only used to get lpi_items, or the result node
-  // in DictTrie has no son, it is not nccessary to keep the new DMI.
-  //
-  // This function modifies the content of lpi_items_ and lpi_total_.
-  // lpi_items_ is used to get the LmaPsbItem list, lpi_total_ returns the size.
-  // The function's returned value has no relation with the value of lpi_num.
-  //
-  // If dmi == NULL, this function will extend the root node of DictTrie
-  //
-  // This function will not change dmi_nd_pool_used_. Please change it after
-  // calling this function if necessary.
-  //
-  // The caller should guarantees that NULL != dep.
-  size_t extend_dmi(DictExtPara *dep, DictMatchInfo *dmi_s);
-
-  // Extend dmi for the composing phrase.
-  size_t extend_dmi_c(DictExtPara *dep, DictMatchInfo *dmi_s);
-
-  // Extend a MatrixNode with the give LmaPsbItem list.
-  // res_row is the destination row number.
-  // This function does not change mtrx_nd_pool_used_. Please change it after
-  // calling this function if necessary.
-  // return 0 always.
-  size_t extend_mtrx_nd(MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
-                        size_t lpi_num, PoolPosType dmi_fr, size_t res_row);
-
-
-  // Try to find a dmi node at step_to position, and the found dmi node should
-  // match the given spelling id strings.
-  PoolPosType match_dmi(size_t step_to, uint16 spl_ids[], uint16 spl_id_num);
-
-  bool add_char(char ch);
-  bool prepare_add_char(char ch);
-
-  // Called after prepare_add_char, so the input char has been saved.
-  bool add_char_qwerty();
-
-  // Prepare candidates from the last fixed hanzi position.
-  void prepare_candidates();
-
-  // Is the character in step pos a splitter character?
-  // The caller guarantees that the position is valid.
-  bool is_split_at(uint16 pos);
-
-  void fill_dmi(DictMatchInfo *dmi, MileStoneHandle *handles,
-                PoolPosType dmi_fr,
-                uint16 spl_id, uint16 node_num, unsigned char dict_level,
-                bool splid_end_split, unsigned char splstr_len,
-                unsigned char all_full_id);
-
-  size_t inner_predict(const char16 fixed_scis_ids[], uint16 scis_num,
-                       char16 predict_buf[][kMaxPredictSize + 1],
-                       size_t buf_len);
-
-  // Add the first candidate to the user dictionary.
-  bool try_add_cand0_to_userdict();
-
-  // Add a user lemma to the user dictionary. This lemma is a subset of
-  // candidate 0. lma_from is from which lemma in lma_ids_, lma_num is the
-  // number of lemmas to be combined together as a new lemma. The caller
-  // gurantees that the combined new lemma's length is less or equal to
-  // kMaxLemmaSize.
-  bool add_lma_to_userdict(uint16 lma_from, uint16 lma_num, float score);
-
-  // Update dictionary frequencies.
-  void update_dict_freq();
-
-  void debug_print_dmi(PoolPosType dmi_pos, uint16 nest_level);
-
- public:
-  MatrixSearch();
-  ~MatrixSearch();
-
-  bool init(const char *fn_sys_dict, const char *fn_usr_dict);
-
-  bool init_fd(int sys_fd, long start_offset, long length,
-               const char *fn_usr_dict);
-
-  void set_max_lens(size_t max_sps_len, size_t max_hzs_len);
-
-  void close();
-
-  void flush_cache();
-
-  void set_xi_an_switch(bool xi_an_enabled);
-
-  bool get_xi_an_switch();
-
-  // Reset the search space. Equivalent to reset_search(0).
-  // If inited, always return true;
-  bool reset_search();
-
-  // Search a Pinyin string.
-  // Return value is the position successfully parsed.
-  size_t search(const char *py, size_t py_len);
-
-  // Used to delete something in the Pinyin string kept by the engine, and do
-  // a re-search.
-  // Return value is the new length of Pinyin string kept by the engine which
-  // is parsed successfully.
-  // If is_pos_in_splid is false, pos is used to indicate that pos-th Pinyin
-  // character needs to be deleted. If is_pos_in_splid is true, all Pinyin
-  // characters for pos-th spelling id needs to be deleted.
-  // If the deleted character(s) is just after a fixed lemma or sub lemma in
-  // composing phrase, clear_fixed_this_step indicates whether we needs to
-  // unlock the last fixed lemma or sub lemma.
-  // If is_pos_in_splid is false, and pos-th character is in the range for the
-  // fixed lemmas or composing string, this function will do nothing and just
-  // return the result of the previous search.
-  size_t delsearch(size_t pos, bool is_pos_in_splid,
-                   bool clear_fixed_this_step);
-
-  // Get the number of candiates, called after search().
-  size_t get_candidate_num();
-
-  // Get the Pinyin string stored by the engine.
-  // *decoded_len returns the length of the successfully decoded string.
-  const char* get_pystr(size_t *decoded_len);
-
-  // Get the spelling boundaries for the first sentence candidate.
-  // Number of spellings will be returned. The number of valid elements in
-  // spl_start is one more than the return value because the last one is used
-  // to indicate the beginning of the next un-input speling.
-  // For a Pinyin "women", the returned value is 2, spl_start is [0, 2, 5] .
-  size_t get_spl_start(const uint16 *&spl_start);
-
-  // Get one candiate string. If full sentence candidate is available, it will
-  // be the first one.
-  char16* get_candidate(size_t cand_id, char16 *cand_str, size_t max_len);
-
-  // Get the first candiate, which is a "full sentence".
-  // retstr_len is not NULL, it will be used to return the string length.
-  // If only_unfixed is true, only unfixed part will be fetched.
-  char16* get_candidate0(char16* cand_str, size_t max_len,
-                         uint16 *retstr_len, bool only_unfixed);
-
-  // Choose a candidate. The decoder will do a search after the fixed position.
-  size_t choose(size_t cand_id);
-
-  // Cancel the last choosing operation, and return the new number of choices.
-  size_t cancel_last_choice();
-
-  // Get the length of fixed Hanzis.
-  size_t get_fixedlen();
-
-  size_t get_predicts(const char16 fixed_buf[],
-                      char16 predict_buf[][kMaxPredictSize + 1],
-                      size_t buf_len);
-};
-}
-
-#endif  // PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
diff --git a/ngram.cpp b/ngram.cpp
deleted file mode 100644
index cfbaf0e..0000000
--- a/ngram.cpp
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include "./mystdlib.h"
-#include "./ngram.h"
-
-namespace ime_pinyin {
-
-#define ADD_COUNT 0.3
-
-int comp_double(const void *p1, const void *p2) {
-  if (*static_cast<const double*>(p1) < *static_cast<const double*>(p2))
-    return -1;
-  if (*static_cast<const double*>(p1) > *static_cast<const double*>(p2))
-    return 1;
-  return 0;
-}
-
-inline double distance(double freq, double code) {
-  // return fabs(freq - code);
-  return freq * fabs(log(freq) - log(code));
-}
-
-// Find the index of the code value which is nearest to the given freq
-int qsearch_nearest(double code_book[], double freq, int start, int end) {
-  if (start == end)
-    return start;
-
-  if (start + 1 == end) {
-    if (distance(freq, code_book[end]) > distance(freq, code_book[start]))
-      return start;
-    return end;
-  }
-
-  int mid = (start + end) / 2;
-
-  if (code_book[mid] > freq)
-    return qsearch_nearest(code_book, freq, start, mid);
-  else
-    return qsearch_nearest(code_book, freq, mid, end);
-}
-
-size_t update_code_idx(double freqs[], size_t num, double code_book[],
-                       CODEBOOK_TYPE *code_idx) {
-  size_t changed = 0;
-  for (size_t pos = 0; pos < num; pos++) {
-    CODEBOOK_TYPE idx;
-    idx = qsearch_nearest(code_book, freqs[pos], 0, kCodeBookSize - 1);
-    if (idx != code_idx[pos])
-      changed++;
-    code_idx[pos] = idx;
-  }
-  return changed;
-}
-
-double recalculate_kernel(double freqs[], size_t num, double code_book[],
-                          CODEBOOK_TYPE *code_idx) {
-  double ret = 0;
-
-  size_t *item_num =  new size_t[kCodeBookSize];
-  assert(item_num);
-  memset(item_num, 0, sizeof(size_t) * kCodeBookSize);
-
-  double *cb_new = new double[kCodeBookSize];
-  assert(cb_new);
-  memset(cb_new, 0, sizeof(double) * kCodeBookSize);
-
-  for (size_t pos = 0; pos < num; pos++) {
-    ret += distance(freqs[pos], code_book[code_idx[pos]]);
-
-    cb_new[code_idx[pos]] += freqs[pos];
-    item_num[code_idx[pos]] += 1;
-  }
-
-  for (size_t code = 0; code < kCodeBookSize; code++) {
-    assert(item_num[code] > 0);
-    code_book[code] = cb_new[code] / item_num[code];
-  }
-
-  delete [] item_num;
-  delete [] cb_new;
-
-  return ret;
-}
-
-void iterate_codes(double freqs[], size_t num, double code_book[],
-                   CODEBOOK_TYPE *code_idx) {
-  size_t iter_num = 0;
-  double delta_last = 0;
-  do {
-    size_t changed = update_code_idx(freqs, num, code_book, code_idx);
-
-    double delta = recalculate_kernel(freqs, num, code_book, code_idx);
-
-    if (kPrintDebug0) {
-      printf("---Unigram codebook iteration: %d : %d, %.9f\n",
-             iter_num, changed, delta);
-    }
-    iter_num++;
-
-    if (iter_num > 1 &&
-        (delta == 0 || fabs(delta_last - delta)/fabs(delta) < 0.000000001))
-      break;
-    delta_last = delta;
-  } while (true);
-}
-
-
-NGram* NGram::instance_ = NULL;
-
-NGram::NGram() {
-  initialized_ = false;
-  idx_num_ = 0;
-  lma_freq_idx_ = NULL;
-  sys_score_compensation_ = 0;
-
-#ifdef ___BUILD_MODEL___
-  freq_codes_df_ = NULL;
-#endif
-  freq_codes_ = NULL;
-}
-
-NGram::~NGram() {
-  if (NULL != lma_freq_idx_)
-    free(lma_freq_idx_);
-
-#ifdef ___BUILD_MODEL___
-  if (NULL != freq_codes_df_)
-    free(freq_codes_df_);
-#endif
-
-  if (NULL != freq_codes_)
-    free(freq_codes_);
-}
-
-NGram& NGram::get_instance() {
-  if (NULL == instance_)
-    instance_ = new NGram();
-  return *instance_;
-}
-
-bool NGram::save_ngram(FILE *fp) {
-  if (!initialized_ || NULL == fp)
-    return false;
-
-  if (0 == idx_num_ || NULL == freq_codes_ ||  NULL == lma_freq_idx_)
-    return false;
-
-  if (fwrite(&idx_num_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fwrite(freq_codes_, sizeof(LmaScoreType), kCodeBookSize, fp) !=
-      kCodeBookSize)
-    return false;
-
-  if (fwrite(lma_freq_idx_, sizeof(CODEBOOK_TYPE), idx_num_, fp) != idx_num_)
-    return false;
-
-  return true;
-}
-
-bool NGram::load_ngram(FILE *fp) {
-  if (NULL == fp)
-    return false;
-
-  initialized_ = false;
-
-  if (fread(&idx_num_, sizeof(uint32), 1, fp) != 1 )
-    return false;
-
-  if (NULL != lma_freq_idx_)
-    free(lma_freq_idx_);
-
-  if (NULL != freq_codes_)
-    free(freq_codes_);
-
-  lma_freq_idx_ = static_cast<CODEBOOK_TYPE*>
-                  (malloc(idx_num_ * sizeof(CODEBOOK_TYPE)));
-  freq_codes_ = static_cast<LmaScoreType*>
-      (malloc(kCodeBookSize * sizeof(LmaScoreType)));
-
-  if (NULL == lma_freq_idx_ || NULL == freq_codes_)
-    return false;
-
-  if (fread(freq_codes_, sizeof(LmaScoreType), kCodeBookSize, fp) !=
-      kCodeBookSize)
-    return false;
-
-  if (fread(lma_freq_idx_, sizeof(CODEBOOK_TYPE), idx_num_, fp) != idx_num_)
-    return false;
-
-  initialized_ = true;
-
-  total_freq_none_sys_ = 0;
-  return true;
-}
-
-void NGram::set_total_freq_none_sys(size_t freq_none_sys) {
-  total_freq_none_sys_ = freq_none_sys;
-  if (0 == total_freq_none_sys_) {
-    sys_score_compensation_ = 0;
-  } else {
-    double factor = static_cast<double>(kSysDictTotalFreq) / (
-        kSysDictTotalFreq + total_freq_none_sys_);
-    sys_score_compensation_ = static_cast<float>(
-        log(factor) * kLogValueAmplifier);
-  }
-}
-
-// The caller makes sure this oject is initialized.
-float NGram::get_uni_psb(LemmaIdType lma_id) {
-  return  static_cast<float>(freq_codes_[lma_freq_idx_[lma_id]]) +
-      sys_score_compensation_;
-}
-
-float NGram::convert_psb_to_score(double psb) {
-  float score = static_cast<float>(
-      log(psb) * static_cast<double>(kLogValueAmplifier));
-  if (score > static_cast<float>(kMaxScore)) {
-    score = static_cast<float>(kMaxScore);
-  }
-  return score;
-}
-
-#ifdef ___BUILD_MODEL___
-bool NGram::build_unigram(LemmaEntry *lemma_arr, size_t lemma_num,
-                          LemmaIdType next_idx_unused) {
-  if (NULL == lemma_arr || 0 == lemma_num || next_idx_unused <= 1)
-    return false;
-
-  double total_freq = 0;
-  double *freqs = new double[next_idx_unused];
-  if (NULL == freqs)
-    return false;
-
-  freqs[0] = ADD_COUNT;
-  total_freq += freqs[0];
-  LemmaIdType idx_now = 0;
-  for (size_t pos = 0; pos < lemma_num; pos++) {
-    if (lemma_arr[pos].idx_by_hz == idx_now)
-      continue;
-    idx_now++;
-
-    assert(lemma_arr[pos].idx_by_hz == idx_now);
-
-    freqs[idx_now] = lemma_arr[pos].freq;
-    if (freqs[idx_now] <= 0)
-      freqs[idx_now] = 0.3;
-
-    total_freq += freqs[idx_now];
-  }
-
-  double max_freq = 0;
-  idx_num_ = idx_now + 1;
-  assert(idx_now + 1 == next_idx_unused);
-
-  for (size_t pos = 0; pos < idx_num_; pos++) {
-    freqs[pos] = freqs[pos] / total_freq;
-    assert(freqs[pos] > 0);
-    if (freqs[pos] > max_freq)
-      max_freq = freqs[pos];
-  }
-
-  // calculate the code book
-  if (NULL == freq_codes_df_)
-    freq_codes_df_ = new double[kCodeBookSize];
-  assert(freq_codes_df_);
-  memset(freq_codes_df_, 0, sizeof(double) * kCodeBookSize);
-
-  if (NULL == freq_codes_)
-    freq_codes_ = new LmaScoreType[kCodeBookSize];
-  assert(freq_codes_);
-  memset(freq_codes_, 0, sizeof(LmaScoreType) * kCodeBookSize);
-
-  size_t freq_pos = 0;
-  for (size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++) {
-    bool found = true;
-
-    while (found) {
-      found = false;
-      double cand = freqs[freq_pos];
-      for (size_t i = 0; i < code_pos; i++)
-        if (freq_codes_df_[i] == cand) {
-          found = true;
-          break;
-        }
-      if (found)
-        freq_pos++;
-    }
-
-    freq_codes_df_[code_pos] = freqs[freq_pos];
-    freq_pos++;
-  }
-
-  myqsort(freq_codes_df_, kCodeBookSize, sizeof(double), comp_double);
-
-  if (NULL == lma_freq_idx_)
-    lma_freq_idx_ = new CODEBOOK_TYPE[idx_num_];
-  assert(lma_freq_idx_);
-
-  iterate_codes(freqs, idx_num_, freq_codes_df_, lma_freq_idx_);
-
-  delete [] freqs;
-
-  if (kPrintDebug0) {
-    printf("\n------Language Model Unigram Codebook------\n");
-  }
-
-  for (size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++) {
-    double log_score = log(freq_codes_df_[code_pos]);
-    float final_score = convert_psb_to_score(freq_codes_df_[code_pos]);
-    if (kPrintDebug0) {
-      printf("code:%d, probability:%.9f, log score:%.3f, final score: %.3f\n",
-             code_pos, freq_codes_df_[code_pos], log_score, final_score);
-    }
-    freq_codes_[code_pos] = static_cast<LmaScoreType>(final_score);
-  }
-
-  initialized_ = true;
-  return true;
-}
-#endif
-
-}  // namespace ime_pinyin
diff --git a/ngram.h b/ngram.h
deleted file mode 100644
index ad6c304..0000000
--- a/ngram.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_NGRAM_H__
-#define PINYINIME_INCLUDE_NGRAM_H__
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "./dictdef.h"
-
-namespace ime_pinyin {
-
-typedef unsigned char CODEBOOK_TYPE;
-
-static const size_t kCodeBookSize = 256;
-
-class NGram {
- public:
-  // The maximum score of a lemma item.
-  static const LmaScoreType kMaxScore = 0x3fff;
-
-  // In order to reduce the storage size, the original log value is amplified by
-  // kScoreAmplifier, and we use LmaScoreType to store.
-  // After this process, an item with a lower score has a higher frequency.
-  static const int kLogValueAmplifier = -800;
-
-  // System words' total frequency. It is not the real total frequency, instead,
-  // It is only used to adjust system lemmas' scores when the user dictionary's
-  // total frequency changes.
-  // In this version, frequencies of system lemmas are fixed. We are considering
-  // to make them changable in next version.
-  static const size_t kSysDictTotalFreq = 100000000;
-
- private:
-
-  static NGram* instance_;
-
-  bool initialized_;
-  size_t idx_num_;
-
-  size_t total_freq_none_sys_;
-
-  // Score compensation for system dictionary lemmas.
-  // Because after user adds some user lemmas, the total frequency changes, and
-  // we use this value to normalize the score.
-  float sys_score_compensation_;
-
-#ifdef ___BUILD_MODEL___
-  double *freq_codes_df_;
-#endif
-  LmaScoreType *freq_codes_;
-  CODEBOOK_TYPE *lma_freq_idx_;
-
- public:
-  NGram();
-  ~NGram();
-
-  static NGram& get_instance();
-
-  bool save_ngram(FILE *fp);
-  bool load_ngram(FILE *fp);
-
-  // Set the total frequency of all none system dictionaries.
-  void set_total_freq_none_sys(size_t freq_none_sys);
-
-  float get_uni_psb(LemmaIdType lma_id);
-
-  // Convert a probability to score. Actually, the score will be limited to
-  // kMaxScore, but at runtime, we also need float expression to get accurate
-  // value of the score.
-  // After the conversion, a lower score indicates a higher probability of the
-  // item.
-  static float convert_psb_to_score(double psb);
-
-#ifdef ___BUILD_MODEL___
-  // For constructing the unigram mode model.
-  bool build_unigram(LemmaEntry *lemma_arr, size_t num,
-                     LemmaIdType next_idx_unused);
-#endif
-};
-}
-
-#endif  // PINYINIME_INCLUDE_NGRAM_H__
diff --git a/pinyinime.cpp b/pinyinime.cpp
deleted file mode 100644
index afc91aa..0000000
--- a/pinyinime.cpp
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdlib.h>
-#include "./pinyinime.h"
-#include "./dicttrie.h"
-#include "./matrixsearch.h"
-#include "./spellingtrie.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-  using namespace ime_pinyin;
-
-  // The maximum number of the prediction items.
-  static const size_t kMaxPredictNum = 500;
-
-  // Used to search Pinyin string and give the best candidate.
-  MatrixSearch* matrix_search = NULL;
-
-  char16 predict_buf[kMaxPredictNum][kMaxPredictSize + 1];
-
-  bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict) {
-    if (NULL != matrix_search)
-      delete matrix_search;
-
-    matrix_search = new MatrixSearch();
-    if (NULL == matrix_search) {
-      return false;
-    }
-
-    return matrix_search->init(fn_sys_dict, fn_usr_dict);
-  }
-
-  bool im_open_decoder_fd(int sys_fd, long start_offset, long length,
-                          const char *fn_usr_dict) {
-    if (NULL != matrix_search)
-      delete matrix_search;
-
-    matrix_search = new MatrixSearch();
-    if (NULL == matrix_search)
-      return false;
-
-    return matrix_search->init_fd(sys_fd, start_offset, length, fn_usr_dict);
-  }
-
-  void im_close_decoder() {
-    if (NULL != matrix_search) {
-      matrix_search->close();
-      delete matrix_search;
-    }
-    matrix_search = NULL;
-  }
-
-  void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len) {
-    if (NULL != matrix_search) {
-      matrix_search->set_max_lens(max_sps_len, max_hzs_len);
-    }
-  }
-
-  void im_flush_cache() {
-    if (NULL != matrix_search)
-      matrix_search->flush_cache();
-  }
-
-  // To be updated.
-  size_t im_search(const char* pybuf, size_t pylen) {
-    if (NULL == matrix_search)
-      return 0;
-
-    matrix_search->search(pybuf, pylen);
-    return matrix_search->get_candidate_num();
-  }
-
-  size_t im_delsearch(size_t pos, bool is_pos_in_splid,
-                      bool clear_fixed_this_step) {
-    if (NULL == matrix_search)
-      return 0;
-    matrix_search->delsearch(pos, is_pos_in_splid, clear_fixed_this_step);
-    return matrix_search->get_candidate_num();
-  }
-
-  void im_reset_search() {
-    if (NULL == matrix_search)
-      return;
-
-    matrix_search->reset_search();
-  }
-
-  // To be removed
-  size_t im_add_letter(char ch) {
-    return 0;
-  }
-
-  const char* im_get_sps_str(size_t *decoded_len) {
-    if (NULL == matrix_search)
-      return NULL;
-
-    return matrix_search->get_pystr(decoded_len);
-  }
-
-  char16* im_get_candidate(size_t cand_id, char16* cand_str,
-                        size_t max_len) {
-    if (NULL == matrix_search)
-      return NULL;
-
-    return matrix_search->get_candidate(cand_id, cand_str, max_len);
-  }
-
-  size_t im_get_spl_start_pos(const uint16 *&spl_start) {
-    if (NULL == matrix_search)
-      return 0;
-
-    return matrix_search->get_spl_start(spl_start);
-  }
-
-  size_t im_choose(size_t choice_id) {
-    if (NULL == matrix_search)
-      return 0;
-
-    return matrix_search->choose(choice_id);
-  }
-
-  size_t im_cancel_last_choice() {
-    if (NULL == matrix_search)
-      return 0;
-
-    return matrix_search->cancel_last_choice();
-  }
-
-  size_t im_get_fixed_len() {
-    if (NULL == matrix_search)
-      return 0;
-
-    return matrix_search->get_fixedlen();
-  }
-
-  // To be removed
-  bool im_cancel_input() {
-    return true;
-  }
-
-
-  size_t im_get_predicts(const char16 *his_buf,
-                         char16 (*&pre_buf)[kMaxPredictSize + 1]) {
-    if (NULL == his_buf)
-      return 0;
-
-    size_t fixed_len = utf16_strlen(his_buf);
-    const char16 *fixed_ptr = his_buf;
-    if (fixed_len > kMaxPredictSize) {
-      fixed_ptr += fixed_len - kMaxPredictSize;
-      fixed_len = kMaxPredictSize;
-    }
-
-    pre_buf = predict_buf;
-    return matrix_search->get_predicts(his_buf, pre_buf, kMaxPredictNum);
-  }
-
-  void im_enable_shm_as_szm(bool enable) {
-    SpellingTrie &spl_trie = SpellingTrie::get_instance();
-    spl_trie.szm_enable_shm(enable);
-  }
-
-  void im_enable_ym_as_szm(bool enable) {
-    SpellingTrie &spl_trie = SpellingTrie::get_instance();
-    spl_trie.szm_enable_ym(enable);
-  }
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/pinyinime.h b/pinyinime.h
deleted file mode 100644
index 0744ec7..0000000
--- a/pinyinime.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_ANDPYIME_H__
-#define PINYINIME_INCLUDE_ANDPYIME_H__
-
-#include <stdlib.h>
-#include "./dictdef.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-  namespace ime_pinyin {
-
-  /**
-   * Open the decoder engine via the system and user dictionary file names.
-   *
-   * @param fn_sys_dict The file name of the system dictionary.
-   * @param fn_usr_dict The file name of the user dictionary.
-   * @return true if open the decoder engine successfully.
-   */
-  bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict);
-
-  /**
-   * Open the decoder engine via the system dictionary FD and user dictionary
-   * file name. Because on Android, the system dictionary is embedded in the
-   * whole application apk file.
-   *
-   * @param sys_fd The file in which the system dictionary is embedded.
-   * @param start_offset The starting position of the system dictionary in the
-   * file sys_fd.
-   * @param length The length of the system dictionary in the file sys_fd,
-   * counted in byte.
-   * @return true if succeed.
-   */
-  bool im_open_decoder_fd(int sys_fd, long start_offset, long length,
-                          const char *fn_usr_dict);
-
-  /**
-   * Close the decoder engine.
-   */
-  void im_close_decoder();
-
-  /**
-   * Set maximum limitations for decoding. If this function is not called,
-   * default values will be used. For example, due to screen size limitation,
-   * the UI engine of the IME can only show a certain number of letters(input)
-   * to decode, and a certain number of Chinese characters(output). If after
-   * user adds a new letter, the input or the output string is longer than the
-   * limitations, the engine will discard the recent letter.
-   *
-   * @param max_sps_len Maximum length of the spelling string(Pinyin string).
-   * @max_hzs_len Maximum length of the decoded Chinese character string.
-   */
-  void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len);
-
-  /**
-   * Flush cached data to persistent memory. Because at runtime, in order to
-   * achieve best performance, some data is only store in memory.
-   */
-  void im_flush_cache();
-
-  /**
-   * Use a spelling string(Pinyin string) to search. The engine will try to do
-   * an incremental search based on its previous search result, so if the new
-   * string has the same prefix with the previous one stored in the decoder,
-   * the decoder will only continue the search from the end of the prefix.
-   * If the caller needs to do a brand new search, please call im_reset_search()
-   * first. Calling im_search() is equivalent to calling im_add_letter() one by
-   * one.
-   *
-   * @param sps_buf The spelling string buffer to decode.
-   * @param sps_len The length of the spelling string buffer.
-   * @return The number of candidates.
-   */
-  size_t im_search(const char* sps_buf, size_t sps_len);
-
-  /**
-   * Make a delete operation in the current search result, and make research if
-   * necessary.
-   *
-   * @param pos The posistion of char in spelling string to delete, or the
-   * position of spelling id in result string to delete.
-   * @param is_pos_in_splid Indicate whether the pos parameter is the position
-   * in the spelling string, or the position in the result spelling id string.
-   * @return The number of candidates.
-   */
-  size_t im_delsearch(size_t pos, bool is_pos_in_splid,
-                      bool clear_fixed_this_step);
-
-  /**
-   * Reset the previous search result.
-   */
-  void im_reset_search();
-
-  /**
-   * Add a Pinyin letter to the current spelling string kept by decoder. If the
-   * decoder fails in adding the letter, it will do nothing. im_get_sps_str()
-   * can be used to get the spelling string kept by decoder currently.
-   *
-   * @param ch The letter to add.
-   * @return The number of candidates.
-   */
-  size_t im_add_letter(char ch);
-
-  /**
-   * Get the spelling string kept by the decoder.
-   *
-   * @param decoded_len Used to return how many characters in the spelling
-   * string is successfully parsed.
-   * @return The spelling string kept by the decoder.
-   */
-  const char *im_get_sps_str(size_t *decoded_len);
-
-  /**
-   * Get a candidate(or choice) string.
-   *
-   * @param cand_id The id to get a candidate. Started from 0. Usually, id 0
-   * is a sentence-level candidate.
-   * @param cand_str The buffer to store the candidate.
-   * @param max_len The maximum length of the buffer.
-   * @return cand_str if succeeds, otherwise NULL.
-   */
-  char16* im_get_candidate(size_t cand_id, char16* cand_str,
-                           size_t max_len);
-
-  /**
-   * Get the segmentation information(the starting positions) of the spelling
-   * string.
-   *
-   * @param spl_start Used to return the starting posistions.
-   * @return The number of spelling ids. If it is L, there will be L+1 valid
-   * elements in spl_start, and spl_start[L] is the posistion after the end of
-   * the last spelling id.
-   */
-  size_t im_get_spl_start_pos(const uint16 *&spl_start);
-
-  /**
-   * Choose a candidate and make it fixed. If the candidate does not match
-   * the end of all spelling ids, new candidates will be provided from the
-   * first unfixed position. If the candidate matches the end of the all
-   * spelling ids, there will be only one new candidates, or the whole fixed
-   * sentence.
-   *
-   * @param cand_id The id of candidate to select and make it fixed.
-   * @return The number of candidates. If after the selection, the whole result
-   * string has been fixed, there will be only one candidate.
-   */
-  size_t im_choose(size_t cand_id);
-
-  /**
-   * Cancel the last selection, or revert the last operation of im_choose().
-   *
-   * @return The number of candidates.
-   */
-  size_t im_cancel_last_choice();
-
-  /**
-   * Get the number of fixed spelling ids, or Chinese characters.
-   *
-   * @return The number of fixed spelling ids, of Chinese characters.
-   */
-  size_t im_get_fixed_len();
-
-  /**
-   * Cancel the input state and reset the search workspace.
-   */
-  bool im_cancel_input();
-
-  /**
-   * Get prediction candiates based on the given fixed Chinese string as the
-   * history.
-   *
-   * @param his_buf The history buffer to do the prediction. It should be ended
-   * with '\0'.
-   * @param pre_buf Used to return prediction result list.
-   * @return The number of predicted result string.
-   */
-  size_t im_get_predicts(const char16 *his_buf,
-                         char16 (*&pre_buf)[kMaxPredictSize + 1]);
-
-  /**
-   * Enable Shengmus in ShouZiMu mode.
-   */
-  void im_enable_shm_as_szm(bool enable);
-
-  /**
-   * Enable Yunmus in ShouZiMu mode.
-   */
-  void im_enable_ym_as_szm(bool enable);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // PINYINIME_INCLUDE_ANDPYIME_H__
diff --git a/searchutility.cpp b/searchutility.cpp
deleted file mode 100644
index d25c66c..0000000
--- a/searchutility.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include "./mystdlib.h"
-#include "./searchutility.h"
-
-namespace ime_pinyin {
-
-bool is_system_lemma(LemmaIdType lma_id) {
-  return (0 < lma_id && lma_id <= kSysDictIdEnd);
-}
-
-bool is_user_lemma(LemmaIdType lma_id) {
-  return (kUserDictIdStart <= lma_id && lma_id <= kUserDictIdEnd);
-}
-
-bool is_composing_lemma(LemmaIdType lma_id) {
-  return (kLemmaIdComposing == lma_id);
-}
-
-int cmp_lpi_with_psb(const void *p1, const void *p2) {
-  if ((static_cast<const LmaPsbItem*>(p1))->psb >
-      (static_cast<const LmaPsbItem*>(p2))->psb)
-    return 1;
-  if ((static_cast<const LmaPsbItem*>(p1))->psb <
-      (static_cast<const LmaPsbItem*>(p2))->psb)
-    return -1;
-  return 0;
-}
-
-int cmp_lpi_with_unified_psb(const void *p1, const void *p2) {
-  const LmaPsbItem *item1 = static_cast<const LmaPsbItem*>(p1);
-  const LmaPsbItem *item2 = static_cast<const LmaPsbItem*>(p2);
-
-  // The real unified psb is psb1 / lma_len1 and psb2 * lma_len2
-  // But we use psb1 * lma_len2 and psb2 * lma_len1 to get better
-  // precision.
-  size_t up1 = item1->psb * (item2->lma_len);
-  size_t up2 = item2->psb * (item1->lma_len);
-  if (up1 < up2) {
-    return -1;
-  }
-  if (up1 > up2) {
-    return 1;
-  }
-  return 0;
-}
-
-int cmp_lpi_with_id(const void *p1, const void *p2) {
-  if ((static_cast<const LmaPsbItem*>(p1))->id <
-      (static_cast<const LmaPsbItem*>(p2))->id)
-    return -1;
-  if ((static_cast<const LmaPsbItem*>(p1))->id >
-      (static_cast<const LmaPsbItem*>(p2))->id)
-    return 1;
-  return 0;
-}
-
-int cmp_lpi_with_hanzi(const void *p1, const void *p2) {
-  if ((static_cast<const LmaPsbItem*>(p1))->hanzi <
-      (static_cast<const LmaPsbItem*>(p2))->hanzi)
-    return -1;
-  if ((static_cast<const LmaPsbItem*>(p1))->hanzi >
-      (static_cast<const LmaPsbItem*>(p2))->hanzi)
-    return 1;
-
-  return 0;
-}
-
-int cmp_lpsi_with_str(const void *p1, const void *p2) {
-  return utf16_strcmp((static_cast<const LmaPsbStrItem*>(p1))->str,
-                      (static_cast<const LmaPsbStrItem*>(p2))->str);
-}
-
-
-int cmp_hanzis_1(const void *p1, const void *p2) {
-  if (*static_cast<const char16*>(p1) <
-      *static_cast<const char16*>(p2))
-    return -1;
-
-  if (*static_cast<const char16*>(p1) >
-      *static_cast<const char16*>(p2))
-    return 1;
-  return 0;
-}
-
-int cmp_hanzis_2(const void *p1, const void *p2) {
-  return  utf16_strncmp(static_cast<const char16*>(p1),
-                        static_cast<const char16*>(p2), 2);
-}
-
-int cmp_hanzis_3(const void *p1, const void *p2) {
-  return  utf16_strncmp(static_cast<const char16*>(p1),
-                        static_cast<const char16*>(p2), 3);
-}
-
-int cmp_hanzis_4(const void *p1, const void *p2) {
-  return  utf16_strncmp(static_cast<const char16*>(p1),
-                        static_cast<const char16*>(p2), 4);
-}
-
-int cmp_hanzis_5(const void *p1, const void *p2) {
-  return  utf16_strncmp(static_cast<const char16*>(p1),
-                        static_cast<const char16*>(p2), 5);
-}
-
-int cmp_hanzis_6(const void *p1, const void *p2) {
-  return  utf16_strncmp(static_cast<const char16*>(p1),
-                        static_cast<const char16*>(p2), 6);
-}
-
-int cmp_hanzis_7(const void *p1, const void *p2) {
-  return  utf16_strncmp(static_cast<const char16*>(p1),
-                        static_cast<const char16*>(p2), 7);
-}
-
-int cmp_hanzis_8(const void *p1, const void *p2) {
-  return  utf16_strncmp(static_cast<const char16*>(p1),
-                        static_cast<const char16*>(p2), 8);
-}
-
-int cmp_npre_by_score(const void *p1, const void *p2) {
-  if ((static_cast<const NPredictItem*>(p1))->psb >
-      (static_cast<const NPredictItem*>(p2))->psb)
-    return 1;
-
-  if ((static_cast<const NPredictItem*>(p1))->psb <
-      (static_cast<const NPredictItem*>(p2))->psb)
-    return -1;
-
-  return 0;
-}
-
-int cmp_npre_by_hislen_score(const void *p1, const void *p2) {
-  if ((static_cast<const NPredictItem*>(p1))->his_len <
-      (static_cast<const NPredictItem*>(p2))->his_len)
-    return 1;
-
-  if ((static_cast<const NPredictItem*>(p1))->his_len >
-      (static_cast<const NPredictItem*>(p2))->his_len)
-    return -1;
-
-  if ((static_cast<const NPredictItem*>(p1))->psb >
-      (static_cast<const NPredictItem*>(p2))->psb)
-    return 1;
-
-  if ((static_cast<const NPredictItem*>(p1))->psb <
-      (static_cast<const NPredictItem*>(p2))->psb)
-    return -1;
-
-  return 0;
-}
-
-int cmp_npre_by_hanzi_score(const void *p1, const void *p2) {
-  int ret_v = (utf16_strncmp((static_cast<const NPredictItem*>(p1))->pre_hzs,
-      (static_cast<const NPredictItem*>(p2))->pre_hzs, kMaxPredictSize));
-  if (0 != ret_v)
-    return ret_v;
-
-  if ((static_cast<const NPredictItem*>(p1))->psb >
-      (static_cast<const NPredictItem*>(p2))->psb)
-    return 1;
-
-  if ((static_cast<const NPredictItem*>(p1))->psb <
-      (static_cast<const NPredictItem*>(p2))->psb)
-    return -1;
-
-  return 0;
-}
-
-size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num) {
-  if (NULL == npre_items || 0 == npre_num)
-    return 0;
-
-  myqsort(npre_items, npre_num, sizeof(NPredictItem), cmp_npre_by_hanzi_score);
-
-  size_t remain_num = 1;  // The first one is reserved.
-  for (size_t pos = 1; pos < npre_num; pos++) {
-    if (utf16_strncmp(npre_items[pos].pre_hzs,
-                      npre_items[remain_num - 1].pre_hzs,
-                      kMaxPredictSize) != 0) {
-      if (remain_num != pos) {
-        npre_items[remain_num] = npre_items[pos];
-      }
-      remain_num++;
-    }
-  }
-  return remain_num;
-}
-
-size_t align_to_size_t(size_t size) {
-  size_t s = sizeof(size_t);
-  return (size + s -1) / s * s;
-}
-
-}  // namespace ime_pinyin
diff --git a/spellingtable.cpp b/spellingtable.cpp
deleted file mode 100644
index 8a6fb4b..0000000
--- a/spellingtable.cpp
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include "./spellingtable.h"
-
-namespace ime_pinyin {
-
-#ifdef ___BUILD_MODEL___
-
-const char SpellingTable::
-    kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1] = {"HM", "HNG", "NG"};
-
-// "" is the biggest, so that all empty strings will be moved to the end
-// _eb mean empty is biggest
-int compare_raw_spl_eb(const void* p1, const void* p2) {
-  if ('\0' == (static_cast<const RawSpelling*>(p1))->str[0])
-    return 1;
-
-  if ('\0' == (static_cast<const RawSpelling*>(p2))->str[0])
-    return -1;
-
-  return strcmp((static_cast<const RawSpelling*>(p1))->str,
-                (static_cast<const RawSpelling*>(p2))->str);
-}
-
-size_t get_odd_next(size_t value) {
-  size_t v_next = value;
-  while (true) {
-    size_t v_next_sqrt = (size_t)sqrt(v_next);
-
-    bool is_odd = true;
-    for (size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++) {
-      if (v_next % v_dv == 0) {
-        is_odd = false;
-        break;
-      }
-    }
-
-    if (is_odd)
-      return v_next;
-
-    v_next++;
-  }
-
-  // never reach here
-  return 0;
-}
-
-SpellingTable::SpellingTable() {
-  need_score_ = false;
-  raw_spellings_ = NULL;
-  spelling_buf_ = NULL;
-  spelling_num_ = 0;
-  total_freq_ = 0;
-  frozen_ = true;
-}
-
-SpellingTable::~SpellingTable() {
-  free_resource();
-}
-
-size_t SpellingTable::get_hash_pos(const char* spelling_str) {
-  size_t hash_pos = 0;
-  for (size_t pos = 0; pos < spelling_size_; pos++) {
-    if ('\0' == spelling_str[pos])
-      break;
-    hash_pos += (size_t)spelling_str[pos];
-  }
-
-  hash_pos = hash_pos % spelling_max_num_;
-  return hash_pos;
-}
-
-size_t SpellingTable::hash_pos_next(size_t hash_pos) {
-  hash_pos += 123;
-  hash_pos = hash_pos % spelling_max_num_;
-  return hash_pos;
-}
-
-void SpellingTable::free_resource() {
-  if (NULL != raw_spellings_)
-    delete [] raw_spellings_;
-  raw_spellings_ = NULL;
-
-  if (NULL != spelling_buf_)
-    delete [] spelling_buf_;
-  spelling_buf_ = NULL;
-}
-
-bool SpellingTable::init_table(size_t pure_spl_size, size_t spl_max_num,
-                               bool need_score) {
-  if (pure_spl_size == 0 || spl_max_num ==0)
-    return false;
-
-  need_score_ = need_score;
-
-  free_resource();
-
-  spelling_size_ = pure_spl_size + 1;
-  if (need_score)
-    spelling_size_ += 1;
-  spelling_max_num_ = get_odd_next(spl_max_num);
-  spelling_num_ = 0;
-
-  raw_spellings_ = new RawSpelling[spelling_max_num_];
-  spelling_buf_ = new char[spelling_max_num_ * (spelling_size_)];
-  if (NULL == raw_spellings_ || NULL == spelling_buf_) {
-    free_resource();
-    return false;
-  }
-
-  memset(raw_spellings_, 0, spelling_max_num_ * sizeof(RawSpelling));
-  memset(spelling_buf_, 0, spelling_max_num_ * (spelling_size_));
-  frozen_ = false;
-  total_freq_ = 0;
-  return true;
-}
-
-bool SpellingTable::put_spelling(const char* spelling_str, double freq) {
-  if (frozen_ || NULL == spelling_str)
-    return false;
-
-  for (size_t pos = 0; pos < kNotSupportNum; pos++) {
-    if (strcmp(spelling_str, kNotSupportList[pos]) == 0) {
-      return false;
-    }
-  }
-
-  total_freq_ += freq;
-
-  size_t hash_pos = get_hash_pos(spelling_str);
-
-  raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
-
-  if (strncmp(raw_spellings_[hash_pos].str, spelling_str,
-              spelling_size_ - 1) == 0) {
-    raw_spellings_[hash_pos].freq += freq;
-    return true;
-  }
-
-  size_t hash_pos_ori = hash_pos;
-
-  while (true) {
-    if (strncmp(raw_spellings_[hash_pos].str,
-                spelling_str, spelling_size_ - 1) == 0) {
-      raw_spellings_[hash_pos].freq += freq;
-      return true;
-    }
-
-    if ('\0' == raw_spellings_[hash_pos].str[0]) {
-      raw_spellings_[hash_pos].freq += freq;
-      strncpy(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1);
-      raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
-      spelling_num_++;
-      return true;
-    }
-
-    hash_pos = hash_pos_next(hash_pos);
-    if (hash_pos_ori == hash_pos)
-      return false;
-  }
-
-  // never reach here
-  return false;
-}
-
-bool SpellingTable::contain(const char* spelling_str) {
-  if (NULL == spelling_str || NULL == spelling_buf_ || frozen_)
-    return false;
-
-  size_t hash_pos = get_hash_pos(spelling_str);
-
-  if ('\0' == raw_spellings_[hash_pos].str[0])
-    return false;
-
-  if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1)
-      == 0)
-    return true;
-
-  size_t hash_pos_ori = hash_pos;
-
-  while (true) {
-    hash_pos = hash_pos_next(hash_pos);
-    if (hash_pos_ori == hash_pos)
-      return false;
-
-    if ('\0' == raw_spellings_[hash_pos].str[0])
-      return false;
-
-    if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1)
-        == 0)
-      return true;
-  }
-
-  // never reach here
-  return false;
-}
-
-const char* SpellingTable::arrange(size_t *item_size, size_t *spl_num) {
-  if (NULL == raw_spellings_ || NULL == spelling_buf_ ||
-      NULL == item_size || NULL == spl_num)
-    return NULL;
-
-  qsort(raw_spellings_, spelling_max_num_, sizeof(RawSpelling),
-        compare_raw_spl_eb);
-
-  // After sorting, only the first spelling_num_ items are valid.
-  // Copy them to the destination buffer.
-  for (size_t pos = 0; pos < spelling_num_; pos++) {
-    strncpy(spelling_buf_ + pos * spelling_size_, raw_spellings_[pos].str,
-            spelling_size_);
-  }
-
-  if (need_score_) {
-    if (kPrintDebug0)
-      printf("------------Spelling Possiblities--------------\n");
-
-    double max_score = 0;
-    double min_score = 0;
-
-    // After sorting, only the first spelling_num_ items are valid.
-    for (size_t pos = 0; pos < spelling_num_; pos++) {
-      raw_spellings_[pos].freq /= total_freq_;
-      if (need_score_) {
-        if (0 == pos) {
-          max_score = raw_spellings_[0].freq;
-          min_score = max_score;
-        } else {
-          if (raw_spellings_[pos].freq > max_score)
-            max_score = raw_spellings_[pos].freq;
-          if (raw_spellings_[pos].freq < min_score)
-            min_score = raw_spellings_[pos].freq;
-        }
-      }
-    }
-
-    if (kPrintDebug0)
-      printf("-----max psb: %f, min psb: %f\n", max_score, min_score);
-
-    max_score = log(max_score);
-    min_score = log(min_score);
-
-    if (kPrintDebug0)
-      printf("-----max log value: %f, min log value: %f\n",
-             max_score, min_score);
-
-    // The absolute value of min_score is bigger than that of max_score because
-    // both of them are negative after log function.
-    score_amplifier_ = 1.0 * 255 / min_score;
-
-    double average_score = 0;
-    for (size_t pos = 0; pos < spelling_num_; pos++) {
-      double score = log(raw_spellings_[pos].freq) * score_amplifier_;
-      assert(score >= 0);
-
-      average_score += score;
-
-      // Because of calculation precision issue, score might be a little bigger
-      // than 255 after being amplified.
-      if (score > 255)
-        score = 255;
-      char *this_spl_buf = spelling_buf_ + pos * spelling_size_;
-      this_spl_buf[spelling_size_ - 1] =
-          static_cast<char>((unsigned char)score);
-
-      if (kPrintDebug0) {
-        printf("---pos:%d, %s, psb:%d\n", pos, this_spl_buf,
-               (unsigned char)this_spl_buf[spelling_size_ -1]);
-      }
-    }
-    average_score /= spelling_num_;
-    assert(average_score <= 255);
-    average_score_ = static_cast<uint8>(average_score);
-
-    if (kPrintDebug0)
-      printf("\n----Score Amplifier: %f, Average Score: %d\n", score_amplifier_,
-             average_score_);
-  }
-
-  *item_size = spelling_size_;
-  *spl_num = spelling_num_;
-  frozen_ = true;
-  return spelling_buf_;
-}
-
-float SpellingTable::get_score_amplifier() {
-  return static_cast<float>(score_amplifier_);
-}
-
-unsigned char SpellingTable::get_average_score() {
-  return average_score_;
-}
-
-#endif  // ___BUILD_MODEL___
-}  // namespace ime_pinyin
diff --git a/spellingtable.h b/spellingtable.h
deleted file mode 100644
index fd79c6e..0000000
--- a/spellingtable.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_SPELLINGTABLE_H__
-#define PINYINIME_INCLUDE_SPELLINGTABLE_H__
-
-#include <stdlib.h>
-#include "./dictdef.h"
-
-namespace ime_pinyin {
-
-#ifdef ___BUILD_MODEL___
-
-const size_t kMaxSpellingSize = kMaxPinyinSize;
-
-typedef struct {
-  char str[kMaxSpellingSize + 1];
-  double freq;
-} RawSpelling, *PRawSpelling;
-
-// This class is used to store the spelling strings
-// The length of the input spelling string should be less or equal to the
-// spelling_size_ (set by init_table). If the input string is too long,
-// we only keep its first spelling_size_ chars.
-class SpellingTable {
- private:
-  static const size_t kNotSupportNum = 3;
-  static const char kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1];
-
-  bool need_score_;
-
-  size_t spelling_max_num_;
-
-  RawSpelling *raw_spellings_;
-
-  // Used to store spelling strings. If the spelling table needs to calculate
-  // score, an extra char after each spelling string is the score.
-  // An item with a lower score has a higher probability.
-  char *spelling_buf_;
-  size_t spelling_size_;
-
-  double total_freq_;
-
-  size_t spelling_num_;
-
-  double score_amplifier_;
-
-  unsigned char average_score_;
-
-  // If frozen is true, put_spelling() and contain() are not allowed to call.
-  bool frozen_;
-
-  size_t get_hash_pos(const char* spelling_str);
-  size_t hash_pos_next(size_t hash_pos);
-  void free_resource();
- public:
-  SpellingTable();
-  ~SpellingTable();
-
-  // pure_spl_size is the pure maximum spelling string size. For example,
-  // "zhuang" is the longgest item in Pinyin, so pure_spl_size should be 6.
-  // spl_max_num is the maximum number of spelling strings to store.
-  // need_score is used to indicate whether the caller needs to calculate a
-  // score for each spelling.
-  bool init_table(size_t pure_spl_size, size_t spl_max_num, bool need_score);
-
-  // Put a spelling string to the table.
-  // It always returns false if called after arrange() withtout a new
-  // init_table() operation.
-  // freq is the spelling's occuring count.
-  // If the spelling has been in the table, occuring count will accumulated.
-  bool put_spelling(const char* spelling_str, double spl_count);
-
-  // Test whether a spelling string is in the table.
-  // It always returns false, when being called after arrange() withtout a new
-  // init_table() operation.
-  bool contain(const char* spelling_str);
-
-  // Sort the spelling strings and put them from the begin of the buffer.
-  // Return the pointer of the sorted spelling strings.
-  // item_size and spl_num return the item size and number of spelling.
-  // Because each spelling uses a '\0' as terminator, the returned item_size is
-  // at least one char longer than the spl_size parameter specified by
-  // init_table(). If the table is initialized to calculate score, item_size
-  // will be increased by 1, and current_spl_str[item_size - 1] stores an
-  // unsinged char score.
-  // An item with a lower score has a higher probability.
-  // Do not call put_spelling() and contains() after arrange().
-  const char* arrange(size_t *item_size, size_t *spl_num);
-
-  float get_score_amplifier();
-
-  unsigned char get_average_score();
-};
-#endif  // ___BUILD_MODEL___
-}
-
-#endif  // PINYINIME_INCLUDE_SPELLINGTABLE_H__
diff --git a/spellingtrie.cpp b/spellingtrie.cpp
deleted file mode 100644
index 2ab8258..0000000
--- a/spellingtrie.cpp
+++ /dev/null
@@ -1,828 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include "./dictdef.h"
-
-#ifdef ___BUILD_MODEL___
-#include "./spellingtable.h"
-#endif
-
-#include "./spellingtrie.h"
-
-namespace ime_pinyin {
-
-SpellingTrie* SpellingTrie::instance_ = NULL;
-
-// z/c/s is for Zh/Ch/Sh
-const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
-    "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
-
-// Bit 0 : is it a Shengmu char?
-// Bit 1 : is it a Yunmu char? (one char is a Yunmu)
-// Bit 2 : is it enabled in ShouZiMu(first char) mode?
-unsigned char SpellingTrie::char_flags_[] = {
-  // a    b      c     d     e     f     g
-  0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
-  // h    i     j      k     l     m    n
-  0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
-  // o    p     q      r     s     t
-  0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
-  // u    v     w      x     y     z
-  0x00, 0x00, 0x01, 0x01, 0x01, 0x01
-};
-
-int compare_spl(const void* p1, const void* p2) {
-  return strcmp((const char*)(p1), (const char*)(p2));
-}
-
-SpellingTrie::SpellingTrie() {
-  spelling_buf_ = NULL;
-  spelling_size_ = 0;
-  spelling_num_ = 0;
-  spl_ym_ids_ = NULL;
-  splstr_queried_ = NULL;
-  splstr16_queried_ = NULL;
-  root_ = NULL;
-  dumb_node_ = NULL;
-  splitter_node_ = NULL;
-  instance_ = NULL;
-  ym_buf_ = NULL;
-  f2h_ = NULL;
-
-  szm_enable_shm(true);
-  szm_enable_ym(true);
-
-#ifdef ___BUILD_MODEL___
-  node_num_ = 0;
-#endif
-}
-
-SpellingTrie::~SpellingTrie() {
-  if (NULL != spelling_buf_)
-    delete [] spelling_buf_;
-
-  if (NULL != splstr_queried_)
-    delete [] splstr_queried_;
-
-  if (NULL != splstr16_queried_)
-    delete [] splstr16_queried_;
-
-  if (NULL != spl_ym_ids_)
-    delete [] spl_ym_ids_;
-
-  if (NULL != root_) {
-    free_son_trie(root_);
-    delete root_;
-  }
-
-  if (NULL != dumb_node_) {
-    delete [] dumb_node_;
-  }
-
-  if (NULL != splitter_node_) {
-    delete [] splitter_node_;
-  }
-
-  if (NULL != instance_) {
-    delete instance_;
-    instance_ = NULL;
-  }
-
-  if (NULL != ym_buf_)
-    delete [] ym_buf_;
-
-  if (NULL != f2h_)
-    delete [] f2h_;
-}
-
-bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
-  if (NULL == splid || 0 == *splid)
-    return false;
-
-  if (*splid >= kFullSplIdStart)
-    return true;
-  if (*splid < kFullSplIdStart) {
-    char ch = kHalfId2Sc_[*splid];
-    if (ch > 'Z') {
-      return true;
-    } else {
-      if (szm_is_enabled(ch)) {
-        return true;
-      } else if (is_yunmu_char(ch)) {
-        assert(h2f_num_[*splid] > 0);
-        *splid = h2f_start_[*splid];
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-bool SpellingTrie::is_half_id(uint16 splid) const {
-  if (0 == splid || splid >= kFullSplIdStart)
-    return false;
-
-  return true;
-}
-
-bool SpellingTrie::is_full_id(uint16 splid) const {
-  if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
-    return false;
-  return true;
-}
-
-bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
-  uint16 half_fr_full = full_to_half(full_id);
-
-  if (half_fr_full == half_id)
-    return true;
-
-  // &~0x20 is used to conver the char to upper case.
-  // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
-  char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
-  char ch_h = kHalfId2Sc_[half_id];
-  if (ch_f == ch_h)
-    return true;
-
-  return false;
-}
-
-bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
-  if (0 == splid || splid >= kFullSplIdStart)
-    return false;
-
-  char ch = kHalfId2Sc_[splid];
-  // If ch >= 'a', that means the half id is one of Zh/Ch/Sh
-  if (ch >= 'a') {
-    return false;
-  }
-
-  return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
-}
-
-bool SpellingTrie::is_shengmu_char(char ch) const {
-  return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
-}
-
-bool SpellingTrie::is_yunmu_char(char ch) const {
-  return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
-}
-
-bool SpellingTrie::is_szm_char(char ch) const {
-  return is_shengmu_char(ch) || is_yunmu_char(ch);
-}
-
-bool SpellingTrie::szm_is_enabled(char ch) const {
-  return char_flags_[ch - 'A'] & kHalfIdSzmMask;
-}
-
-void SpellingTrie::szm_enable_shm(bool enable) {
-  if (enable) {
-    for (char ch = 'A'; ch <= 'Z'; ch++) {
-      if (is_shengmu_char(ch))
-        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
-    }
-  } else {
-    for (char ch = 'A'; ch <= 'Z'; ch++) {
-      if (is_shengmu_char(ch))
-        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
-    }
-  }
-}
-
-void SpellingTrie::szm_enable_ym(bool enable) {
-  if (enable) {
-    for (char ch = 'A'; ch <= 'Z'; ch++) {
-      if (is_yunmu_char(ch))
-        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
-    }
-  } else {
-    for (char ch = 'A'; ch <= 'Z'; ch++) {
-      if (is_yunmu_char(ch))
-        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
-    }
-  }
-}
-
-bool SpellingTrie::is_szm_enabled(char ch) const {
-  return char_flags_[ch - 'A'] & kHalfIdSzmMask;
-}
-
-const SpellingTrie* SpellingTrie::get_cpinstance() {
-  return &get_instance();
-}
-
-SpellingTrie& SpellingTrie::get_instance() {
-  if (NULL == instance_)
-    instance_ = new SpellingTrie();
-
-  return *instance_;
-}
-
-uint16 SpellingTrie::half2full_num(uint16 half_id) const {
-  if (NULL == root_ || half_id >= kFullSplIdStart)
-    return 0;
-  return h2f_num_[half_id];
-}
-
-uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
-  if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
-    return 0;
-
-  *spl_id_start = h2f_start_[half_id];
-  return h2f_num_[half_id];
-}
-
-uint16 SpellingTrie::full_to_half(uint16 full_id) const {
-  if (NULL == root_ || full_id < kFullSplIdStart ||
-      full_id > spelling_num_ + kFullSplIdStart)
-    return 0;
-
-  return f2h_[full_id - kFullSplIdStart];
-}
-
-void SpellingTrie::free_son_trie(SpellingNode* node) {
-  if (NULL == node)
-    return;
-
-  for (size_t pos = 0; pos < node->num_of_son; pos++) {
-    free_son_trie(node->first_son + pos);
-  }
-
-  if (NULL != node->first_son)
-    delete [] node->first_son;
-}
-
-bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
-                             size_t item_num, float score_amplifier,
-                             unsigned char average_score) {
-  if (spelling_arr == NULL)
-    return false;
-
-  memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
-  memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
-
-  // If the arr is the same as the buf, means this function is called by
-  // load_table(), the table data are ready; otherwise the array should be
-  // saved.
-  if (spelling_arr != spelling_buf_) {
-    if (NULL != spelling_buf_)
-      delete [] spelling_buf_;
-    spelling_buf_ = new char[item_size * item_num];
-    if (NULL == spelling_buf_)
-      return false;
-    memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
-  }
-
-  spelling_size_ = item_size;
-  spelling_num_ = item_num;
-
-  score_amplifier_ = score_amplifier;
-  average_score_ = average_score;
-
-  if (NULL != splstr_queried_)
-    delete [] splstr_queried_;
-  splstr_queried_ = new char[spelling_size_];
-  if (NULL == splstr_queried_)
-    return false;
-
-  if (NULL != splstr16_queried_)
-    delete [] splstr16_queried_;
-  splstr16_queried_ = new char16[spelling_size_];
-  if (NULL == splstr16_queried_)
-    return false;
-
-  // First, sort the buf to ensure they are in ascendant order
-  qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
-
-#ifdef ___BUILD_MODEL___
-  node_num_ = 1;
-#endif
-
-  root_ = new SpellingNode();
-  memset(root_, 0, sizeof(SpellingNode));
-
-  dumb_node_ = new SpellingNode();
-  memset(dumb_node_, 0, sizeof(SpellingNode));
-  dumb_node_->score = average_score_;
-
-  splitter_node_ = new SpellingNode();
-  memset(splitter_node_, 0, sizeof(SpellingNode));
-  splitter_node_->score = average_score_;
-
-  memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
-
-  root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
-
-  // Root's score should be cleared.
-  root_->score = 0;
-
-  if (NULL == root_->first_son)
-    return false;
-
-  h2f_start_[0] = h2f_num_[0] = 0;
-
-  if (!build_f2h())
-    return false;
-
-#ifdef ___BUILD_MODEL___
-  if (kPrintDebug0) {
-    printf("---SpellingTrie Nodes: %d\n", node_num_);
-  }
-  return build_ym_info();
-#else
-  return true;
-#endif
-}
-
-#ifdef ___BUILD_MODEL___
-const char* SpellingTrie::get_ym_str(const char *spl_str) {
-  bool start_ZCS = false;
-  if (is_shengmu_char(*spl_str)) {
-    if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
-      start_ZCS = true;
-    spl_str += 1;
-    if (start_ZCS && 'h' == *spl_str)
-      spl_str += 1;
-  }
-  return spl_str;
-}
-
-bool SpellingTrie::build_ym_info() {
-  bool sucess;
-  SpellingTable *spl_table = new SpellingTable();
-
-  sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
-  assert(sucess);
-
-  for (uint16 pos = 0; pos < spelling_num_; pos++) {
-    const char *spl_str = spelling_buf_ + spelling_size_ * pos;
-    spl_str = get_ym_str(spl_str);
-    if ('\0' != spl_str[0]) {
-      sucess = spl_table->put_spelling(spl_str, 0);
-      assert(sucess);
-    }
-  }
-
-  size_t ym_item_size;  // '\0' is included
-  size_t ym_num;
-  const char* ym_buf;
-  ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
-
-  if (NULL != ym_buf_)
-    delete [] ym_buf_;
-  ym_buf_ = new char[ym_item_size * ym_num];
-  if (NULL == ym_buf_) {
-    delete spl_table;
-    return false;
-  }
-
-  memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
-  ym_size_ = ym_item_size;
-  ym_num_ = ym_num;
-
-  delete spl_table;
-
-  // Generate the maping from the spelling ids to the Yunmu ids.
-  if (spl_ym_ids_)
-    delete spl_ym_ids_;
-  spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
-  if (NULL == spl_ym_ids_)
-    return false;
-
-  memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
-
-  for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
-    const char *str = get_spelling_str(id);
-
-    str = get_ym_str(str);
-    if ('\0' != str[0]) {
-      uint8 ym_id = get_ym_id(str);
-      spl_ym_ids_[id] = ym_id;
-      assert(ym_id > 0);
-    } else {
-      spl_ym_ids_[id] = 0;
-    }
-  }
-  return true;
-}
-#endif
-
-SpellingNode* SpellingTrie::construct_spellings_subset(
-    size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
-  if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
-    return NULL;
-
-  SpellingNode *first_son = NULL;
-  uint16 num_of_son = 0;
-  unsigned char min_son_score = 255;
-
-  const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
-  char char_for_node = spelling_last_start[level];
-  assert(char_for_node >= 'A' && char_for_node <= 'Z' ||
-         'h' == char_for_node);
-
-  // Scan the array to find how many sons
-  for (size_t i = item_start + 1; i < item_end; i++) {
-    const char *spelling_current = spelling_buf_ + spelling_size_ * i;
-    char char_current = spelling_current[level];
-    if (char_current != char_for_node) {
-      num_of_son++;
-      char_for_node = char_current;
-    }
-  }
-  num_of_son++;
-
-  // Allocate memory
-#ifdef ___BUILD_MODEL___
-  node_num_ += num_of_son;
-#endif
-  first_son = new SpellingNode[num_of_son];
-  memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
-
-  // Now begin construct tree
-  size_t son_pos = 0;
-
-  spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
-  char_for_node = spelling_last_start[level];
-
-  bool spelling_endable = true;
-  if (spelling_last_start[level + 1] != '\0')
-    spelling_endable = false;
-
-  size_t item_start_next = item_start;
-
-  for (size_t i = item_start + 1; i < item_end; i++) {
-    const char *spelling_current = spelling_buf_ + spelling_size_ * i;
-    char char_current = spelling_current[level];
-    assert(is_valid_spl_char(char_current));
-
-    if (char_current != char_for_node) {
-      // Construct a node
-      SpellingNode *node_current = first_son + son_pos;
-      node_current->char_this_node = char_for_node;
-
-      // For quick search in the first level
-      if (0 == level)
-        level1_sons_[char_for_node - 'A'] = node_current;
-
-      if (spelling_endable) {
-        node_current->spelling_idx = kFullSplIdStart + item_start_next;
-      }
-
-      if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
-        size_t real_start = item_start_next;
-        if (spelling_last_start[level + 1] == '\0')
-          real_start++;
-
-        node_current->first_son =
-            construct_spellings_subset(real_start, i, level + 1,
-                                       node_current);
-
-        if (real_start == item_start_next + 1) {
-          uint16 score_this = static_cast<unsigned char>(
-              spelling_last_start[spelling_size_ - 1]);
-          if (score_this < node_current->score)
-            node_current->score = score_this;
-        }
-      } else {
-        node_current->first_son = NULL;
-        node_current->score = static_cast<unsigned char>(
-            spelling_last_start[spelling_size_ - 1]);
-      }
-
-      if (node_current->score < min_son_score)
-        min_son_score = node_current->score;
-
-      bool is_half = false;
-      if (level == 0 && is_szm_char(char_for_node)) {
-        node_current->spelling_idx =
-          static_cast<uint16>(char_for_node - 'A' + 1);
-
-        if (char_for_node > 'C')
-          node_current->spelling_idx++;
-        if (char_for_node > 'S')
-          node_current->spelling_idx++;
-
-        h2f_num_[node_current->spelling_idx] = i - item_start_next;
-        is_half = true;
-      } else if (level == 1 && char_for_node == 'h') {
-        char ch_level0 = spelling_last_start[0];
-        uint16 part_id = 0;
-        if (ch_level0 == 'C')
-          part_id = 'C' - 'A' + 1 + 1;
-        else if (ch_level0 == 'S')
-          part_id = 'S' - 'A' + 1 + 2;
-        else if (ch_level0 == 'Z')
-          part_id = 'Z' - 'A' + 1 + 3;
-        if (0 != part_id) {
-          node_current->spelling_idx = part_id;
-          h2f_num_[node_current->spelling_idx] = i - item_start_next;
-          is_half = true;
-        }
-      }
-
-      if (is_half) {
-        if (h2f_num_[node_current->spelling_idx] > 0)
-          h2f_start_[node_current->spelling_idx] =
-            item_start_next + kFullSplIdStart;
-        else
-          h2f_start_[node_current->spelling_idx] = 0;
-      }
-
-      // for next sibling
-      spelling_last_start = spelling_current;
-      char_for_node = char_current;
-      item_start_next = i;
-      spelling_endable = true;
-      if (spelling_current[level + 1] != '\0')
-        spelling_endable = false;
-
-      son_pos++;
-    }
-  }
-
-  // the last one
-  SpellingNode *node_current = first_son + son_pos;
-  node_current->char_this_node = char_for_node;
-
-  // For quick search in the first level
-  if (0 == level)
-    level1_sons_[char_for_node - 'A'] = node_current;
-
-  if (spelling_endable) {
-    node_current->spelling_idx = kFullSplIdStart + item_start_next;
-  }
-
-  if (spelling_last_start[level + 1] != '\0' ||
-      item_end - item_start_next > 1) {
-    size_t real_start = item_start_next;
-    if (spelling_last_start[level + 1] == '\0')
-      real_start++;
-
-    node_current->first_son =
-        construct_spellings_subset(real_start, item_end, level + 1,
-                                   node_current);
-
-    if (real_start == item_start_next + 1) {
-      uint16 score_this = static_cast<unsigned char>(
-          spelling_last_start[spelling_size_ - 1]);
-      if (score_this < node_current->score)
-        node_current->score = score_this;
-    }
-  } else {
-    node_current->first_son = NULL;
-    node_current->score = static_cast<unsigned char>(
-        spelling_last_start[spelling_size_ - 1]);
-  }
-
-  if (node_current->score < min_son_score)
-    min_son_score = node_current->score;
-
-  assert(son_pos + 1 == num_of_son);
-
-  bool is_half = false;
-  if (level == 0 && szm_is_enabled(char_for_node)) {
-    node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
-
-    if (char_for_node > 'C')
-      node_current->spelling_idx++;
-    if (char_for_node > 'S')
-      node_current->spelling_idx++;
-
-    h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
-    is_half = true;
-  } else if (level == 1 && char_for_node == 'h') {
-    char ch_level0 = spelling_last_start[0];
-    uint16 part_id = 0;
-    if (ch_level0 == 'C')
-      part_id = 'C' - 'A' + 1 + 1;
-    else if (ch_level0 == 'S')
-      part_id = 'S' - 'A' + 1 + 2;
-    else if (ch_level0 == 'Z')
-      part_id = 'Z' - 'A' + 1 + 3;
-    if (0 != part_id) {
-      node_current->spelling_idx = part_id;
-      h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
-      is_half = true;
-    }
-  }
-  if (is_half) {
-    if (h2f_num_[node_current->spelling_idx] > 0)
-      h2f_start_[node_current->spelling_idx] =
-        item_start_next + kFullSplIdStart;
-    else
-      h2f_start_[node_current->spelling_idx] = 0;
-  }
-
-  parent->num_of_son = num_of_son;
-  parent->score = min_son_score;
-  return first_son;
-}
-
-bool SpellingTrie::save_spl_trie(FILE *fp) {
-  if (NULL == fp || NULL == spelling_buf_)
-    return false;
-
-  if (fwrite(&spelling_size_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fwrite(&spelling_num_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
-    return false;
-
-  if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
-    return false;
-
-  if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
-             spelling_num_, fp) != spelling_num_)
-    return false;
-
-  return true;
-}
-
-bool SpellingTrie::load_spl_trie(FILE *fp) {
-  if (NULL == fp)
-    return false;
-
-  if (fread(&spelling_size_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fread(&spelling_num_, sizeof(uint32), 1, fp) != 1)
-    return false;
-
-  if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
-    return false;
-
-  if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
-    return false;
-
-  if (NULL != spelling_buf_)
-    delete [] spelling_buf_;
-
-  spelling_buf_ = new char[spelling_size_ * spelling_num_];
-  if (NULL == spelling_buf_)
-    return false;
-
-  if (fread(spelling_buf_, sizeof(char) * spelling_size_,
-            spelling_num_, fp) != spelling_num_)
-    return false;
-
-  return construct(spelling_buf_, spelling_size_, spelling_num_,
-                   score_amplifier_, average_score_);
-}
-
-bool SpellingTrie::build_f2h() {
-  if (NULL != f2h_)
-    delete [] f2h_;
-  f2h_ = new uint16[spelling_num_];
-  if (NULL == f2h_)
-    return false;
-
-  for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
-    for (uint16 fid = h2f_start_[hid];
-         fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
-      f2h_[fid - kFullSplIdStart] = hid;
-  }
-
-  return true;
-}
-
-size_t SpellingTrie::get_spelling_num() {
-  return spelling_num_;
-}
-
-uint8 SpellingTrie::get_ym_id(const char *ym_str) {
-  if (NULL == ym_str || NULL == ym_buf_)
-    return 0;
-
-  for (uint8 pos = 0; pos < ym_num_; pos++)
-    if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
-      return pos + 1;
-
-  return 0;
-}
-
-const char* SpellingTrie::get_spelling_str(uint16 splid) {
-  splstr_queried_[0] = '\0';
-
-  if (splid >= kFullSplIdStart) {
-    splid -= kFullSplIdStart;
-    snprintf(splstr_queried_, spelling_size_, "%s",
-             spelling_buf_ + splid * spelling_size_);
-  } else {
-    if (splid == 'C' - 'A' + 1 + 1) {
-      snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
-    } else if (splid == 'S' - 'A' + 1 + 2) {
-      snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
-    } else if (splid == 'Z' - 'A' + 1 + 3) {
-      snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
-    } else {
-      if (splid > 'C' - 'A' + 1)
-        splid--;
-      if (splid > 'S' - 'A' + 1)
-        splid--;
-      splstr_queried_[0] = 'A' + splid - 1;
-      splstr_queried_[1] = '\0';
-    }
-  }
-  return splstr_queried_;
-}
-
-const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
-  splstr16_queried_[0] = '\0';
-
-  if (splid >= kFullSplIdStart) {
-    splid -= kFullSplIdStart;
-    for (size_t pos = 0; pos < spelling_size_; pos++) {
-      splstr16_queried_[pos] = static_cast<char16>
-          (spelling_buf_[splid * spelling_size_ + pos]);
-    }
-  } else {
-    if (splid == 'C' - 'A' + 1 + 1) {
-      splstr16_queried_[0] = static_cast<char16>('C');
-      splstr16_queried_[1] = static_cast<char16>('h');
-      splstr16_queried_[2] = static_cast<char16>('\0');
-    } else if (splid == 'S' - 'A' + 1 + 2) {
-      splstr16_queried_[0] = static_cast<char16>('S');
-      splstr16_queried_[1] = static_cast<char16>('h');
-      splstr16_queried_[2] = static_cast<char16>('\0');
-    } else if (splid == 'Z' - 'A' + 1 + 3) {
-      splstr16_queried_[0] = static_cast<char16>('Z');
-      splstr16_queried_[1] = static_cast<char16>('h');
-      splstr16_queried_[2] = static_cast<char16>('\0');
-    } else {
-      if (splid > 'C' - 'A' + 1)
-        splid--;
-      if (splid > 'S' - 'A' + 1)
-        splid--;
-      splstr16_queried_[0] = 'A' + splid - 1;
-      splstr16_queried_[1] = '\0';
-    }
-  }
-  return splstr16_queried_;
-}
-
-size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
-                                        size_t splstr16_len) {
-  if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
-
-  if (splid >= kFullSplIdStart) {
-    splid -= kFullSplIdStart;
-    for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
-      splstr16[pos] = static_cast<char16>
-          (spelling_buf_[splid * spelling_size_ + pos]);
-      if (static_cast<char16>('\0') == splstr16[pos]) {
-        return pos;
-      }
-    }
-  } else {
-    if (splid == 'C' - 'A' + 1 + 1) {
-      splstr16[0] = static_cast<char16>('C');
-      splstr16[1] = static_cast<char16>('h');
-      splstr16[2] = static_cast<char16>('\0');
-      return 2;
-    } else if (splid == 'S' - 'A' + 1 + 2) {
-      splstr16[0] = static_cast<char16>('S');
-      splstr16[1] = static_cast<char16>('h');
-      splstr16[2] = static_cast<char16>('\0');
-      return 2;
-    } else if (splid == 'Z' - 'A' + 1 + 3) {
-      splstr16[0] = static_cast<char16>('Z');
-      splstr16[1] = static_cast<char16>('h');
-      splstr16[2] = static_cast<char16>('\0');
-      return 2;
-    } else {
-      if (splid > 'C' - 'A' + 1)
-        splid--;
-      if (splid > 'S' - 'A' + 1)
-        splid--;
-      splstr16[0] = 'A' + splid - 1;
-      splstr16[1] = '\0';
-      return 1;
-    }
-  }
-
-  // Not reachable.
-  return 0;
-}
-
-}  // namespace ime_pinyin
diff --git a/spellingtrie.h b/spellingtrie.h
deleted file mode 100644
index 4438757..0000000
--- a/spellingtrie.h
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_SPELLINGTRIE_H__
-#define PINYINIME_INCLUDE_SPELLINGTRIE_H__
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "./dictdef.h"
-
-namespace ime_pinyin {
-
-static const unsigned short kFullSplIdStart = kHalfSpellingIdNum + 1;
-
-// Node used for the trie of spellings
-struct SpellingNode {
-  SpellingNode *first_son;
-  // The spelling id for each node. If you need more bits to store
-  // spelling id, please adjust this structure.
-  uint16 spelling_idx:11;
-  uint16  num_of_son:5;
-  char char_this_node;
-  unsigned char score;
-};
-
-class SpellingTrie {
- private:
-  static const int kMaxYmNum = 64;
-  static const size_t kValidSplCharNum = 26;
-
-  static const uint16 kHalfIdShengmuMask = 0x01;
-  static const uint16 kHalfIdYunmuMask = 0x02;
-  static const uint16 kHalfIdSzmMask = 0x04;
-
-  // Map from half spelling id to single char.
-  // For half ids of Zh/Ch/Sh, map to z/c/s (low case) respectively.
-  // For example, 1 to 'A', 2 to 'B', 3 to 'C', 4 to 'c', 5 to 'D', ...,
-  // 28 to 'Z', 29 to 'z'.
-  // [0] is not used to achieve better efficiency.
-  static const char kHalfId2Sc_[kFullSplIdStart + 1];
-
-  static unsigned char char_flags_[];
-  static SpellingTrie* instance_;
-
-  // The spelling table
-  char *spelling_buf_;
-
-  // The size of longest spelling string, includes '\0' and an extra char to
-  // store score. For example, "zhuang" is the longgest item in Pinyin list,
-  // so spelling_size_ is 8.
-  // Structure: The string ended with '\0' + score char.
-  // An item with a lower score has a higher probability.
-  size_t spelling_size_;
-
-  // Number of full spelling ids.
-  size_t spelling_num_;
-
-  float score_amplifier_;
-  unsigned char average_score_;
-
-  // The Yunmu id list for the spelling ids (for half ids of Shengmu,
-  // the Yunmu id is 0).
-  // The length of the list is spelling_num_ + kFullSplIdStart,
-  // so that spl_ym_ids_[splid] is the Yunmu id of the splid.
-  uint8 *spl_ym_ids_;
-
-  // The Yunmu table.
-  // Each Yunmu will be assigned with Yunmu id from 1.
-  char *ym_buf_;
-  size_t ym_size_;  // The size of longest Yunmu string, '\0'included.
-  size_t ym_num_;
-
-  // The spelling string just queried
-  char *splstr_queried_;
-
-  // The spelling string just queried
-  char16 *splstr16_queried_;
-
-  // The root node of the spelling tree
-  SpellingNode* root_;
-
-  // If a none qwerty key such as a fnction key like ENTER is given, this node
-  // will be used to indicate that this is not a QWERTY node.
-  SpellingNode* dumb_node_;
-
-  // If a splitter key is pressed, this node will be used to indicate that this
-  // is a splitter key.
-  SpellingNode* splitter_node_;
-
-  // Used to get the first level sons.
-  SpellingNode* level1_sons_[kValidSplCharNum];
-
-  // The full spl_id range for specific half id.
-  // h2f means half to full.
-  // A half id can be a ShouZiMu id (id to represent the first char of a full
-  // spelling, including Shengmu and Yunmu), or id of zh/ch/sh.
-  // [1..kFullSplIdStart-1] is the arrange of half id.
-  uint16 h2f_start_[kFullSplIdStart];
-  uint16 h2f_num_[kFullSplIdStart];
-
-  // Map from full id to half id.
-  uint16 *f2h_;
-
-#ifdef ___BUILD_MODEL___
-  // How many node used to build the trie.
-  size_t node_num_;
-#endif
-
-  SpellingTrie();
-
-  void free_son_trie(SpellingNode* node);
-
-  // Construct a subtree using a subset of the spelling array (from
-  // item_star to item_end).
-  // Member spelliing_buf_ and spelling_size_ should be valid.
-  // parent is used to update its num_of_son and score.
-  SpellingNode* construct_spellings_subset(size_t item_start, size_t item_end,
-                                           size_t level, SpellingNode *parent);
-  bool build_f2h();
-
-  // The caller should guarantee ch >= 'A' && ch <= 'Z'
-  bool is_shengmu_char(char ch) const;
-
-  // The caller should guarantee ch >= 'A' && ch <= 'Z'
-  bool is_yunmu_char(char ch) const;
-
-#ifdef ___BUILD_MODEL___
-  // Given a spelling string, return its Yunmu string.
-  // The caller guaratees spl_str is valid.
-  const char* get_ym_str(const char *spl_str);
-
-  // Build the Yunmu list, and the mapping relation between the full ids and the
-  // Yunmu ids. This functin is called after the spelling trie is built.
-  bool build_ym_info();
-#endif
-
-  friend class SpellingParser;
-  friend class SmartSplParser;
-  friend class SmartSplParser2;
-
- public:
-  ~SpellingTrie();
-
-  inline static bool is_valid_spl_char(char ch) {
-    return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
-  }
-
-  // The caller guarantees that the two chars are valid spelling chars.
-  inline static bool is_same_spl_char(char ch1, char ch2) {
-    return ch1 == ch2 || ch1 - ch2 == 'a' - 'A' || ch2 - ch1 == 'a' - 'A';
-  }
-
-  // Construct the tree from the input pinyin array
-  // The given string list should have been sorted.
-  // score_amplifier is used to convert a possibility value into score.
-  // average_score is the average_score of all spellings. The dumb node is
-  // assigned with this score.
-  bool construct(const char* spelling_arr, size_t item_size, size_t item_num,
-                 float score_amplifier, unsigned char average_score);
-
-  // Test if the given id is a valid spelling id.
-  // If function returns true, the given splid may be updated like this:
-  // When 'A' is not enabled in ShouZiMu mode, the parsing result for 'A' is
-  // first given as a half id 1, but because 'A' is a one-char Yunmu and
-  // it is a valid id, it needs to updated to its corresponding full id.
-  bool if_valid_id_update(uint16 *splid) const;
-
-  // Test if the given id is a half id.
-  bool is_half_id(uint16 splid) const;
-
-  bool is_full_id(uint16 splid) const;
-
-  // Test if the given id is a one-char Yunmu id (obviously, it is also a half
-  // id), such as 'A', 'E' and 'O'.
-  bool is_half_id_yunmu(uint16 splid) const;
-
-  // Test if this char is a ShouZiMu char. This ShouZiMu char may be not enabled.
-  // For Pinyin, only i/u/v is not a ShouZiMu char.
-  // The caller should guarantee that ch >= 'A' && ch <= 'Z'
-  bool is_szm_char(char ch) const;
-
-  // Test If this char is enabled in ShouZiMu mode.
-  // The caller should guarantee that ch >= 'A' && ch <= 'Z'
-  bool szm_is_enabled(char ch) const;
-
-  // Enable/disable Shengmus in ShouZiMu mode(using the first char of a spelling
-  // to input).
-  void szm_enable_shm(bool enable);
-
-  // Enable/disable Yunmus in ShouZiMu mode.
-  void szm_enable_ym(bool enable);
-
-  // Test if this char is enabled in ShouZiMu mode.
-  // The caller should guarantee ch >= 'A' && ch <= 'Z'
-  bool is_szm_enabled(char ch) const;
-
-  // Return the number of full ids for the given half id.
-  uint16 half2full_num(uint16 half_id) const;
-
-  // Return the number of full ids for the given half id, and fill spl_id_start
-  // to return the first full id.
-  uint16 half_to_full(uint16 half_id, uint16 *spl_id_start) const;
-
-  // Return the corresponding half id for the given full id.
-  // Not frequently used, low efficient.
-  // Return 0 if fails.
-  uint16 full_to_half(uint16 full_id) const;
-
-  // To test whether a half id is compatible with a full id.
-  // Generally, when half_id == full_to_half(full_id), return true.
-  // But for "Zh, Ch, Sh", if fussy mode is on, half id for 'Z' is compatible
-  // with a full id like "Zhe". (Fussy mode is not ready).
-  bool half_full_compatible(uint16 half_id, uint16 full_id) const;
-
-  static const SpellingTrie* get_cpinstance();
-
-  static SpellingTrie& get_instance();
-
-  // Save to the file stream
-  bool save_spl_trie(FILE *fp);
-
-  // Load from the file stream
-  bool load_spl_trie(FILE *fp);
-
-  // Get the number of spellings
-  size_t get_spelling_num();
-
-  // Return the Yunmu id for the given Yunmu string.
-  // If the string is not valid, return 0;
-  uint8 get_ym_id(const char* ym_str);
-
-  // Get the readonly Pinyin string for a given spelling id
-  const char* get_spelling_str(uint16 splid);
-
-  // Get the readonly Pinyin string for a given spelling id
-  const char16* get_spelling_str16(uint16 splid);
-
-  // Get Pinyin string for a given spelling id. Return the length of the
-  // string, and fill-in '\0' at the end.
-  size_t get_spelling_str16(uint16 splid, char16 *splstr16,
-                            size_t splstr16_len);
-};
-}
-
-#endif  // PINYINIME_INCLUDE_SPELLINGTRIE_H__
diff --git a/splparser.cpp b/splparser.cpp
deleted file mode 100644
index 69efb18..0000000
--- a/splparser.cpp
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include "./splparser.h"
-
-namespace ime_pinyin {
-
-SpellingParser::SpellingParser() {
-  spl_trie_ = SpellingTrie::get_cpinstance();
-}
-
-bool SpellingParser::is_valid_to_parse(char ch) {
-  return SpellingTrie::is_valid_spl_char(ch);
-}
-
-uint16 SpellingParser::splstr_to_idxs(const char *splstr, uint16 str_len,
-                                      uint16 spl_idx[], uint16 start_pos[],
-                                      uint16 max_size, bool &last_is_pre) {
-  if (NULL == splstr || 0 == max_size || 0 == str_len)
-    return 0;
-
-  if (!SpellingTrie::is_valid_spl_char(splstr[0]))
-    return 0;
-
-  last_is_pre = false;
-
-  const SpellingNode *node_this = spl_trie_->root_;
-
-  uint16 str_pos = 0;
-  uint16 idx_num = 0;
-  if (NULL != start_pos)
-    start_pos[0] = 0;
-  bool last_is_splitter = false;
-
-  while (str_pos < str_len) {
-    char char_this = splstr[str_pos];
-    // all characters outside of [a, z] are considered as splitters
-    if (!SpellingTrie::is_valid_spl_char(char_this)) {
-      // test if the current node is endable
-      uint16 id_this = node_this->spelling_idx;
-      if (spl_trie_->if_valid_id_update(&id_this)) {
-        spl_idx[idx_num] = id_this;
-
-        idx_num++;
-        str_pos++;
-        if (NULL != start_pos)
-          start_pos[idx_num] = str_pos;
-        if (idx_num >= max_size)
-          return idx_num;
-
-        node_this = spl_trie_->root_;
-        last_is_splitter = true;
-        continue;
-      } else {
-        if (last_is_splitter) {
-          str_pos++;
-          if (NULL != start_pos)
-            start_pos[idx_num] = str_pos;
-          continue;
-        } else {
-          return idx_num;
-        }
-      }
-    }
-
-    last_is_splitter = false;
-
-    SpellingNode *found_son = NULL;
-
-    if (0 == str_pos) {
-      if (char_this >= 'a')
-        found_son = spl_trie_->level1_sons_[char_this - 'a'];
-      else
-        found_son = spl_trie_->level1_sons_[char_this - 'A'];
-    } else {
-      SpellingNode *first_son = node_this->first_son;
-      // Because for Zh/Ch/Sh nodes, they are the last in the buffer and
-      // frequently used, so we scan from the end.
-      for (int i = 0; i < node_this->num_of_son; i++) {
-        SpellingNode *this_son = first_son + i;
-        if (SpellingTrie::is_same_spl_char(
-            this_son->char_this_node, char_this)) {
-          found_son = this_son;
-          break;
-        }
-      }
-    }
-
-    // found, just move the current node pointer to the the son
-    if (NULL != found_son) {
-      node_this = found_son;
-    } else {
-      // not found, test if it is endable
-      uint16 id_this = node_this->spelling_idx;
-      if (spl_trie_->if_valid_id_update(&id_this)) {
-        // endable, remember the index
-        spl_idx[idx_num] = id_this;
-
-        idx_num++;
-        if (NULL != start_pos)
-          start_pos[idx_num] = str_pos;
-        if (idx_num >= max_size)
-          return idx_num;
-        node_this = spl_trie_->root_;
-        continue;
-      } else {
-        return idx_num;
-      }
-    }
-
-    str_pos++;
-  }
-
-  uint16 id_this = node_this->spelling_idx;
-  if (spl_trie_->if_valid_id_update(&id_this)) {
-    // endable, remember the index
-    spl_idx[idx_num] = id_this;
-
-    idx_num++;
-    if (NULL != start_pos)
-      start_pos[idx_num] = str_pos;
-  }
-
-  last_is_pre = !last_is_splitter;
-
-  return idx_num;
-}
-
-uint16 SpellingParser::splstr_to_idxs_f(const char *splstr, uint16 str_len,
-                                        uint16 spl_idx[], uint16 start_pos[],
-                                        uint16 max_size, bool &last_is_pre) {
-  uint16 idx_num = splstr_to_idxs(splstr, str_len, spl_idx, start_pos,
-                                  max_size, last_is_pre);
-  for (uint16 pos = 0; pos < idx_num; pos++) {
-    if (spl_trie_->is_half_id_yunmu(spl_idx[pos])) {
-      spl_trie_->half_to_full(spl_idx[pos], spl_idx + pos);
-      if (pos == idx_num - 1) {
-        last_is_pre = false;
-      }
-    }
-  }
-  return idx_num;
-}
-
-uint16 SpellingParser::splstr16_to_idxs(const char16 *splstr, uint16 str_len,
-                                        uint16 spl_idx[], uint16 start_pos[],
-                                        uint16 max_size, bool &last_is_pre) {
-  if (NULL == splstr || 0 == max_size || 0 == str_len)
-    return 0;
-
-  if (!SpellingTrie::is_valid_spl_char(splstr[0]))
-    return 0;
-
-  last_is_pre = false;
-
-  const SpellingNode *node_this = spl_trie_->root_;
-
-  uint16 str_pos = 0;
-  uint16 idx_num = 0;
-  if (NULL != start_pos)
-    start_pos[0] = 0;
-  bool last_is_splitter = false;
-
-  while (str_pos < str_len) {
-    char16 char_this = splstr[str_pos];
-    // all characters outside of [a, z] are considered as splitters
-    if (!SpellingTrie::is_valid_spl_char(char_this)) {
-      // test if the current node is endable
-      uint16 id_this = node_this->spelling_idx;
-      if (spl_trie_->if_valid_id_update(&id_this)) {
-        spl_idx[idx_num] = id_this;
-
-        idx_num++;
-        str_pos++;
-        if (NULL != start_pos)
-          start_pos[idx_num] = str_pos;
-        if (idx_num >= max_size)
-          return idx_num;
-
-        node_this = spl_trie_->root_;
-        last_is_splitter = true;
-        continue;
-      } else {
-        if (last_is_splitter) {
-          str_pos++;
-          if (NULL != start_pos)
-            start_pos[idx_num] = str_pos;
-          continue;
-        } else {
-          return idx_num;
-        }
-      }
-    }
-
-    last_is_splitter = false;
-
-    SpellingNode *found_son = NULL;
-
-    if (0 == str_pos) {
-      if (char_this >= 'a')
-        found_son = spl_trie_->level1_sons_[char_this - 'a'];
-      else
-        found_son = spl_trie_->level1_sons_[char_this - 'A'];
-    } else {
-      SpellingNode *first_son = node_this->first_son;
-      // Because for Zh/Ch/Sh nodes, they are the last in the buffer and
-      // frequently used, so we scan from the end.
-      for (int i = 0; i < node_this->num_of_son; i++) {
-        SpellingNode *this_son = first_son + i;
-        if (SpellingTrie::is_same_spl_char(
-            this_son->char_this_node, char_this)) {
-          found_son = this_son;
-          break;
-        }
-      }
-    }
-
-    // found, just move the current node pointer to the the son
-    if (NULL != found_son) {
-      node_this = found_son;
-    } else {
-      // not found, test if it is endable
-      uint16 id_this = node_this->spelling_idx;
-      if (spl_trie_->if_valid_id_update(&id_this)) {
-        // endable, remember the index
-        spl_idx[idx_num] = id_this;
-
-        idx_num++;
-        if (NULL != start_pos)
-          start_pos[idx_num] = str_pos;
-        if (idx_num >= max_size)
-          return idx_num;
-        node_this = spl_trie_->root_;
-        continue;
-      } else {
-        return idx_num;
-      }
-    }
-
-    str_pos++;
-  }
-
-  uint16 id_this = node_this->spelling_idx;
-  if (spl_trie_->if_valid_id_update(&id_this)) {
-    // endable, remember the index
-    spl_idx[idx_num] = id_this;
-
-    idx_num++;
-    if (NULL != start_pos)
-      start_pos[idx_num] = str_pos;
-  }
-
-  last_is_pre = !last_is_splitter;
-
-  return idx_num;
-}
-
-uint16 SpellingParser::splstr16_to_idxs_f(const char16 *splstr, uint16 str_len,
-                                          uint16 spl_idx[], uint16 start_pos[],
-                                          uint16 max_size, bool &last_is_pre) {
-  uint16 idx_num = splstr16_to_idxs(splstr, str_len, spl_idx, start_pos,
-                                    max_size, last_is_pre);
-  for (uint16 pos = 0; pos < idx_num; pos++) {
-    if (spl_trie_->is_half_id_yunmu(spl_idx[pos])) {
-      spl_trie_->half_to_full(spl_idx[pos], spl_idx + pos);
-      if (pos == idx_num - 1) {
-        last_is_pre = false;
-      }
-    }
-  }
-  return idx_num;
-}
-
-uint16 SpellingParser::get_splid_by_str(const char *splstr, uint16 str_len,
-                                        bool *is_pre) {
-  if (NULL == is_pre)
-    return 0;
-
-  uint16 spl_idx[2];
-  uint16 start_pos[3];
-
-  if (splstr_to_idxs(splstr, str_len, spl_idx, start_pos, 2, *is_pre) != 1)
-    return 0;
-
-  if (start_pos[1] != str_len)
-    return 0;
-  return spl_idx[0];
-}
-
-uint16 SpellingParser::get_splid_by_str_f(const char *splstr, uint16 str_len,
-                                          bool *is_pre) {
-  if (NULL == is_pre)
-    return 0;
-
-  uint16 spl_idx[2];
-  uint16 start_pos[3];
-
-  if (splstr_to_idxs(splstr, str_len, spl_idx, start_pos, 2, *is_pre) != 1)
-    return 0;
-
-  if (start_pos[1] != str_len)
-    return 0;
-  if (spl_trie_->is_half_id_yunmu(spl_idx[0])) {
-    spl_trie_->half_to_full(spl_idx[0], spl_idx);
-    *is_pre = false;
-  }
-
-  return spl_idx[0];
-}
-
-uint16 SpellingParser::get_splids_parallel(const char *splstr, uint16 str_len,
-    uint16 splidx[], uint16 max_size,
-    uint16 &full_id_num, bool &is_pre) {
-  if (max_size <= 0 || !is_valid_to_parse(splstr[0]))
-    return 0;
-
-  splidx[0] = get_splid_by_str(splstr, str_len, &is_pre);
-  full_id_num = 0;
-  if (0 != splidx[0]) {
-    if (splidx[0] >= kFullSplIdStart)
-      full_id_num = 1;
-    return 1;
-  }
-  return 0;
-}
-
-}  // namespace ime_pinyin
diff --git a/splparser.h b/splparser.h
deleted file mode 100644
index d783bd7..0000000
--- a/splparser.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_SPLPARSER_H__
-#define PINYINIME_INCLUDE_SPLPARSER_H__
-
-#include "./dictdef.h"
-#include "./spellingtrie.h"
-
-namespace ime_pinyin {
-
-class SpellingParser {
- protected:
-  const SpellingTrie *spl_trie_;
-
- public:
-  SpellingParser();
-
-  // Given a string, parse it into a spelling id stream.
-  // If the whole string are sucessfully parsed, last_is_pre will be true;
-  // if the whole string is not fullly parsed, last_is_pre will return whether
-  // the last part of the string is a prefix of a full spelling string. For
-  // example, given string "zhengzhon", "zhon" is not a valid speling, but it is
-  // the prefix of "zhong".
-  //
-  // If splstr starts with a character not in ['a'-z'] (it is a split char),
-  // return 0.
-  // Split char can only appear in the middle of the string or at the end.
-  uint16 splstr_to_idxs(const char *splstr, uint16 str_len, uint16 splidx[],
-                        uint16 start_pos[], uint16 max_size, bool &last_is_pre);
-
-  // Similar to splstr_to_idxs(), the only difference is that splstr_to_idxs()
-  // convert single-character Yunmus into half ids, while this function converts
-  // them into full ids.
-  uint16 splstr_to_idxs_f(const char *splstr, uint16 str_len, uint16 splidx[],
-          uint16 start_pos[], uint16 max_size, bool &last_is_pre);
-
-  // Similar to splstr_to_idxs(), the only difference is that this function
-  // uses char16 instead of char8.
-  uint16 splstr16_to_idxs(const char16 *splstr, uint16 str_len, uint16 splidx[],
-                        uint16 start_pos[], uint16 max_size, bool &last_is_pre);
-
-  // Similar to splstr_to_idxs_f(), the only difference is that this function
-  // uses char16 instead of char8.
-  uint16 splstr16_to_idxs_f(const char16 *splstr16, uint16 str_len,
-                            uint16 splidx[], uint16 start_pos[],
-                            uint16 max_size, bool &last_is_pre);
-
-  // If the given string is a spelling, return the id, others, return 0.
-  // If the give string is a single char Yunmus like "A", and the char is
-  // enabled in ShouZiMu mode, the returned spelling id will be a half id.
-  // When the returned spelling id is a half id, *is_pre returns whether it
-  // is a prefix of a full spelling string.
-  uint16 get_splid_by_str(const char *splstr, uint16 str_len, bool *is_pre);
-
-  // If the given string is a spelling, return the id, others, return 0.
-  // If the give string is a single char Yunmus like "a", no matter the char
-  // is enabled in ShouZiMu mode or not, the returned spelling id will be
-  // a full id.
-  // When the returned spelling id is a half id, *p_is_pre returns whether it
-  // is a prefix of a full spelling string.
-  uint16 get_splid_by_str_f(const char *splstr, uint16 str_len, bool *is_pre);
-
-  // Splitter chars are not included.
-  bool is_valid_to_parse(char ch);
-
-  // When auto-correction is not enabled, get_splid_by_str() will be called to
-  // return the single result. When auto-correction is enabled, this function
-  // will be called to get the results. Auto-correction is not ready.
-  // full_id_num returns number of full spelling ids.
-  // is_pre returns whether the given string is the prefix of a full spelling
-  // string.
-  // If splstr starts with a character not in [a-zA-Z] (it is a split char),
-  // return 0.
-  // Split char can only appear in the middle of the string or at the end.
-  // The caller should guarantee NULL != splstr && str_len > 0 && NULL != splidx
-  uint16 get_splids_parallel(const char *splstr, uint16 str_len,
-                             uint16 splidx[], uint16 max_size,
-                             uint16 &full_id_num, bool &is_pre);
-};
-}
-
-#endif  // PINYINIME_INCLUDE_SPLPARSER_H__
diff --git a/src/include/atomdictbase.h b/src/include/atomdictbase.h
new file mode 100644
index 0000000..330bb0b
--- /dev/null
+++ b/src/include/atomdictbase.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This class defines AtomDictBase class which is the base class for all atom
+ * dictionaries. Atom dictionaries are managed by the decoder class
+ * MatrixSearch.
+ *
+ * When the user appends a new character to the Pinyin string, all enabled atom
+ * dictionaries' extend_dict() will be called at least once to get candidates
+ * ended in this step (the information of starting step is also given in the
+ * parameter). Usually, when extend_dict() is called, a MileStoneHandle object
+ * returned by a previous calling for a earlier step is given to speed up the
+ * look-up process, and a new MileStoneHandle object will be returned if
+ * the extension is successful.
+ *
+ * A returned MileStoneHandle object should keep alive until Function
+ * reset_milestones() is called and this object is noticed to be reset.
+ *
+ * Usually, the atom dictionary can use step information to manage its
+ * MileStoneHandle objects, or it can make the objects in ascendant order to
+ * make the reset easier.
+ *
+ * When the decoder loads the dictionary, it will give a starting lemma id for
+ * this atom dictionary to map a inner id to a global id. Global ids should be
+ * used when an atom dictionary talks to any component outside.
+ */
+#ifndef PINYINIME_INCLUDE_ATOMDICTBASE_H__
+#define PINYINIME_INCLUDE_ATOMDICTBASE_H__
+
+#include <stdlib.h>
+#include "./dictdef.h"
+#include "./searchutility.h"
+
+namespace ime_pinyin {
+class AtomDictBase {
+   public:
+    virtual ~AtomDictBase() {}
+
+    /**
+     * Load an atom dictionary from a file.
+     *
+     * @param file_name The file name to load dictionary.
+     * @param start_id The starting id used for this atom dictionary.
+     * @param end_id The end id (included) which can be used for this atom
+     * dictionary. User dictionary will always use the last id space, so it can
+     * ignore this paramter. All other atom dictionaries should check this
+     * parameter.
+     * @return True if succeed.
+     */
+    virtual bool load_dict(const char *file_name, LemmaIdType start_id, LemmaIdType end_id) = 0;
+
+    /**
+     * Close this atom dictionary.
+     *
+     * @return True if succeed.
+     */
+    virtual bool close_dict() = 0;
+
+    /**
+     * Get the total number of lemmas in this atom dictionary.
+     *
+     * @return The total number of lemmas.
+     */
+    virtual size_t number_of_lemmas() = 0;
+
+    /**
+     * This function is called by the decoder when user deletes a character from
+     * the input string, or begins a new input string.
+     *
+     * Different atom dictionaries may implement this function in different way.
+     * an atom dictionary can use one of these two parameters (or both) to reset
+     * its corresponding MileStoneHandle objects according its detailed
+     * implementation.
+     *
+     * For example, if an atom dictionary uses step information to manage its
+     * MileStoneHandle objects, parameter from_step can be used to identify which
+     * objects should be reset; otherwise, if another atom dictionary does not
+     * use the detailed step information, it only uses ascendant handles
+     * (according to step. For the same step, earlier call, smaller handle), it
+     * can easily reset those MileStoneHandle which are larger than from_handle.
+     *
+     * The decoder always reset the decoding state by step. So when it begins
+     * resetting, it will call reset_milestones() of its atom dictionaries with
+     * the step information, and the MileStoneHandle objects returned by the
+     * earliest calling of extend_dict() for that step.
+     *
+     * If an atom dictionary does not implement incremental search, this function
+     * can be totally ignored.
+     *
+     * @param from_step From which step(included) the MileStoneHandle
+     * objects should be reset.
+     * @param from_handle The ealiest MileStoneHandle object for step from_step
+     */
+    virtual void reset_milestones(uint16 from_step, MileStoneHandle from_handle) = 0;
+
+    /**
+     * Used to extend in this dictionary. The handle returned should keep valid
+     * until reset_milestones() is called.
+     *
+     * @param from_handle Its previous returned extended handle without the new
+     * spelling id, it can be used to speed up the extending.
+     * @param dep The paramter used for extending.
+     * @param lpi_items Used to fill in the lemmas matched.
+     * @param lpi_max The length of the buffer
+     * @param lpi_num Used to return the newly added items.
+     * @return The new mile stone for this extending. 0 if fail.
+     */
+    virtual MileStoneHandle extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num) = 0;
+
+    /**
+     * Get lemma items with scores according to a spelling id stream.
+     * This atom dictionary does not need to sort the returned items.
+     *
+     * @param splid_str The spelling id stream buffer.
+     * @param splid_str_len The length of the spelling id stream buffer.
+     * @param lpi_items Used to return matched lemma items with scores.
+     * @param lpi_max The maximum size of the buffer to return result.
+     * @return The number of matched items which have been filled in to lpi_items.
+     */
+    virtual size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max) = 0;
+
+    /**
+     * Get a lemma string (The Chinese string) by the given lemma id.
+     *
+     * @param id_lemma The lemma id to get the string.
+     * @param str_buf The buffer to return the Chinese string.
+     * @param str_max The maximum size of the buffer.
+     * @return The length of the string, 0 if fail.
+     */
+    virtual uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max) = 0;
+
+    /**
+     * Get the full spelling ids for the given lemma id.
+     * If the given buffer is too short, return 0.
+     *
+     * @param splids Used to return the spelling ids.
+     * @param splids_max The maximum buffer length of splids.
+     * @param arg_valid Used to indicate if the incoming parameters have been
+     * initialized are valid. If it is true, the splids and splids_max are valid
+     * and there may be half ids in splids to be updated to full ids. In this
+     * case, splids_max is the number of valid ids in splids.
+     * @return The number of ids in the buffer.
+     */
+    virtual uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid) = 0;
+
+    /**
+     * Function used for prediction.
+     * No need to sort the newly added items.
+     *
+     * @param last_hzs The last n Chinese chracters(called Hanzi), its length
+     * should be less than or equal to kMaxPredictSize.
+     * @param hzs_len specifies the length(<= kMaxPredictSize) of the history.
+     * @param npre_items Used used to return the result.
+     * @param npre_max The length of the buffer to return result
+     * @param b4_used Number of prediction result (from npre_items[-b4_used])
+     * from other atom dictionaries. A atom ditionary can just ignore it.
+     * @return The number of prediction result from this atom dictionary.
+     */
+    virtual size_t predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used) = 0;
+
+    /**
+     * Add a lemma to the dictionary. If the dictionary allows to add new
+     * items and this item does not exist, add it.
+     *
+     * @param lemma_str The Chinese string of the lemma.
+     * @param splids The spelling ids of the lemma.
+     * @param lemma_len The length of the Chinese lemma.
+     * @param count The frequency count for this lemma.
+     */
+    virtual LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count) = 0;
+
+    /**
+     * Update a lemma's occuring count.
+     *
+     * @param lemma_id The lemma id to update.
+     * @param delta_count The frequnecy count to ajust.
+     * @param selected Indicate whether this lemma is selected by user and
+     * submitted to target edit box.
+     * @return The id if succeed, 0 if fail.
+     */
+    virtual LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count, bool selected) = 0;
+
+    /**
+     * Get the lemma id for the given lemma.
+     *
+     * @param lemma_str The Chinese string of the lemma.
+     * @param splids The spelling ids of the lemma.
+     * @param lemma_len The length of the lemma.
+     * @return The matched lemma id, or 0 if fail.
+     */
+    virtual LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[], uint16 lemma_len) = 0;
+
+    /**
+     * Get the lemma score.
+     *
+     * @param lemma_id The lemma id to get score.
+     * @return The score of the lemma, or 0 if fail.
+     */
+    virtual LmaScoreType get_lemma_score(LemmaIdType lemma_id) = 0;
+
+    /**
+     * Get the lemma score.
+     *
+     * @param lemma_str The Chinese string of the lemma.
+     * @param splids The spelling ids of the lemma.
+     * @param lemma_len The length of the lemma.
+     * @return The score of the lamm, or 0 if fail.
+     */
+    virtual LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len) = 0;
+
+    /**
+     * If the dictionary allowed, remove a lemma from it.
+     *
+     * @param lemma_id The id of the lemma to remove.
+     * @return True if succeed.
+     */
+    virtual bool remove_lemma(LemmaIdType lemma_id) = 0;
+
+    /**
+     * Get the total occuring count of this atom dictionary.
+     *
+     * @return The total occuring count of this atom dictionary.
+     */
+    virtual size_t get_total_lemma_count() = 0;
+
+    /**
+     * Set the total occuring count of other atom dictionaries.
+     *
+     * @param count The total occuring count of other atom dictionaies.
+     */
+    virtual void set_total_lemma_count_of_others(size_t count) = 0;
+
+    /**
+     * Notify this atom dictionary to flush the cached data to persistent storage
+     * if necessary.
+     */
+    virtual void flush_cache() = 0;
+};
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_INCLUDE_ATOMDICTBASE_H__
diff --git a/src/include/dictbuilder.h b/src/include/dictbuilder.h
new file mode 100644
index 0000000..e36bab0
--- /dev/null
+++ b/src/include/dictbuilder.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_DICTBUILDER_H__
+#define PINYINIME_INCLUDE_DICTBUILDER_H__
+
+#include <stdlib.h>
+#include "./utf16char.h"
+#include "./dictdef.h"
+#include "./dictlist.h"
+#include "./spellingtable.h"
+#include "./spellingtrie.h"
+#include "./splparser.h"
+
+namespace ime_pinyin {
+
+#ifdef ___BUILD_MODEL___
+
+#define ___DO_STATISTICS___
+
+class DictTrie;
+
+class DictBuilder {
+   private:
+    // The raw lemma array buffer.
+    LemmaEntry *lemma_arr_;
+    size_t lemma_num_;
+
+    // Used to store all possible single char items.
+    // Two items may have the same Hanzi while their spelling ids are different.
+    SingleCharItem *scis_;
+    size_t scis_num_;
+
+    // In the tree, root's level is -1.
+    // Lemma nodes for root, and level 0
+    LmaNodeLE0 *lma_nodes_le0_;
+
+    // Lemma nodes for layers whose levels are deeper than 0
+    LmaNodeGE1 *lma_nodes_ge1_;
+
+    // Number of used lemma nodes
+    size_t lma_nds_used_num_le0_;
+    size_t lma_nds_used_num_ge1_;
+
+    // Used to store homophonies' ids.
+    LemmaIdType *homo_idx_buf_;
+    // Number of homophonies each of which only contains one Chinese character.
+    size_t homo_idx_num_eq1_;
+    // Number of homophonies each of which contains more than one character.
+    size_t homo_idx_num_gt1_;
+
+    // The items with highest scores.
+    LemmaEntry *top_lmas_;
+    size_t top_lmas_num_;
+
+    SpellingTable *spl_table_;
+    SpellingParser *spl_parser_;
+
+#ifdef ___DO_STATISTICS___
+    size_t max_sonbuf_len_[kMaxLemmaSize];
+    size_t max_homobuf_len_[kMaxLemmaSize];
+
+    size_t total_son_num_[kMaxLemmaSize];
+    size_t total_node_hasson_[kMaxLemmaSize];
+    size_t total_sonbuf_num_[kMaxLemmaSize];
+    size_t total_sonbuf_allnoson_[kMaxLemmaSize];
+    size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize];
+    size_t total_homo_num_[kMaxLemmaSize];
+
+    size_t sonbufs_num1_;    // Number of son buffer with only 1 son
+    size_t sonbufs_numgt1_;  // Number of son buffer with more 1 son;
+
+    size_t total_lma_node_num_;
+
+    void stat_init();
+    void stat_print();
+#endif
+
+   public:
+    DictBuilder();
+    ~DictBuilder();
+
+    // Build dictionary trie from the file fn_raw. File fn_validhzs provides
+    // valid chars. If fn_validhzs is NULL, only chars in GB2312 will be
+    // included.
+    bool build_dict(const char *fn_raw, const char *fn_validhzs, DictTrie *dict_trie);
+
+   private:
+    // Fill in the buffer with id. The caller guarantees that the paramters are
+    // vaild.
+    void id_to_charbuf(unsigned char *buf, LemmaIdType id);
+
+    // Update the offset of sons for a node.
+    void set_son_offset(LmaNodeGE1 *node, size_t offset);
+
+    // Update the offset of homophonies' ids for a node.
+    void set_homo_id_buf_offset(LmaNodeGE1 *node, size_t offset);
+
+    // Format a speling string.
+    void format_spelling_str(char *spl_str);
+
+    // Sort the lemma_arr by the hanzi string, and give each of unique items
+    // a id. Why we need to sort the lemma list according to their Hanzi string
+    // is to find items started by a given prefix string to do prediction.
+    // Actually, the single char items are be in other order, for example,
+    // in spelling id order, etc.
+    // Return value is next un-allocated idx available.
+    LemmaIdType sort_lemmas_by_hz();
+
+    // Build the SingleCharItem list, and fill the hanzi_scis_ids in the
+    // lemma buffer lemma_arr_.
+    // This function should be called after the lemma array is ready.
+    // Return the number of unique SingleCharItem elements.
+    size_t build_scis();
+
+    // Construct a subtree using a subset of the spelling array (from
+    // item_star to item_end)
+    // parent is the parent node to update the necessary information
+    // parent can be a member of LmaNodeLE0 or LmaNodeGE1
+    bool construct_subset(void *parent, LemmaEntry *lemma_arr, size_t item_start, size_t item_end, size_t level);
+
+    // Read valid Chinese Hanzis from the given file.
+    // num is used to return number of chars.
+    // The return buffer is sorted and caller needs to free the returned buffer.
+    char16 *read_valid_hanzis(const char *fn_validhzs, size_t *num);
+
+    // Read a raw dictionary. max_item is the maximum number of items. If there
+    // are more items in the ditionary, only the first max_item will be read.
+    // Returned value is the number of items successfully read from the file.
+    size_t read_raw_dict(const char *fn_raw, const char *fn_validhzs, size_t max_item);
+
+    // Try to find if a character is in hzs buffer.
+    bool hz_in_hanzis_list(const char16 *hzs, size_t hzs_len, char16 hz);
+
+    // Try to find if all characters in str are in hzs buffer.
+    bool str_in_hanzis_list(const char16 *hzs, size_t hzs_len, const char16 *str, size_t str_len);
+
+    // Get these lemmas with toppest scores.
+    void get_top_lemmas();
+
+    // Allocate resource to build dictionary.
+    // lma_num is the number of items to be loaded
+    bool alloc_resource(size_t lma_num);
+
+    // Free resource.
+    void free_resource();
+};
+#endif  // ___BUILD_MODEL___
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_INCLUDE_DICTBUILDER_H__
diff --git a/dictdef.h b/src/include/dictdef.h
similarity index 76%
rename from dictdef.h
rename to src/include/dictdef.h
index 591515f..e765afa 100644
--- a/dictdef.h
+++ b/src/include/dictdef.h
@@ -23,16 +23,16 @@
 namespace ime_pinyin {
 
 // Enable the following line when building the binary dictionary model.
- #define ___BUILD_MODEL___
+#define ___BUILD_MODEL___
 
-typedef unsigned char      uint8;
-typedef unsigned short     uint16;
-typedef unsigned int       uint32;
+typedef unsigned char uint8;
+typedef unsigned short uint16;
+typedef unsigned int uint32;
 
-typedef signed char        int8;
-typedef short              int16;
-typedef int                int32;
-typedef long long          int64;
+typedef signed char int8;
+typedef short int16;
+typedef int int32;
+typedef long long int64;
 typedef unsigned long long uint64;
 
 const bool kPrintDebug0 = false;
@@ -87,11 +87,10 @@ const LemmaIdType kUserDictIdStart = 500001;
 const LemmaIdType kUserDictIdEnd = 600000;
 
 typedef struct {
-  uint16 half_splid:5;
-  uint16 full_splid:11;
+    uint16 half_splid : 5;
+    uint16 full_splid : 11;
 } SpellingId, *PSpellingId;
 
-
 /**
  * We use different node types for different layers
  * Statistical data of the building result for a testing dictionary:
@@ -109,11 +108,11 @@ typedef struct {
  * A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K
  */
 struct LmaNodeLE0 {
-  uint32 son_1st_off;
-  uint32 homo_idx_buf_off;
-  uint16 spl_idx;
-  uint16 num_of_son;
-  uint16 num_of_homo;
+    uint32 son_1st_off;
+    uint32 homo_idx_buf_off;
+    uint16 spl_idx;
+    uint16 num_of_son;
+    uint16 num_of_homo;
 };
 
 /**
@@ -121,34 +120,34 @@ struct LmaNodeLE0 {
  * A node occupies 8 bytes.
  */
 struct LmaNodeGE1 {
-  uint16 son_1st_off_l;        // Low bits of the son_1st_off
-  uint16 homo_idx_buf_off_l;   // Low bits of the homo_idx_buf_off_1
-  uint16 spl_idx;
-  unsigned char num_of_son;            // number of son nodes
-  unsigned char num_of_homo;           // number of homo words
-  unsigned char son_1st_off_h;         // high bits of the son_1st_off
-  unsigned char homo_idx_buf_off_h;    // high bits of the homo_idx_buf_off
+    uint16 son_1st_off_l;       // Low bits of the son_1st_off
+    uint16 homo_idx_buf_off_l;  // Low bits of the homo_idx_buf_off_1
+    uint16 spl_idx;
+    unsigned char num_of_son;          // number of son nodes
+    unsigned char num_of_homo;         // number of homo words
+    unsigned char son_1st_off_h;       // high bits of the son_1st_off
+    unsigned char homo_idx_buf_off_h;  // high bits of the homo_idx_buf_off
 };
 
 #ifdef ___BUILD_MODEL___
 struct SingleCharItem {
-  float freq;
-  char16 hz;
-  SpellingId splid;
+    float freq;
+    char16 hz;
+    SpellingId splid;
 };
 
 struct LemmaEntry {
-  LemmaIdType idx_by_py;
-  LemmaIdType idx_by_hz;
-  char16 hanzi_str[kMaxLemmaSize + 1];
+    LemmaIdType idx_by_py;
+    LemmaIdType idx_by_hz;
+    char16 hanzi_str[kMaxLemmaSize + 1];
 
-  // The SingleCharItem id for each Hanzi.
-  uint16 hanzi_scis_ids[kMaxLemmaSize];
+    // The SingleCharItem id for each Hanzi.
+    uint16 hanzi_scis_ids[kMaxLemmaSize];
 
-  uint16 spl_idx_arr[kMaxLemmaSize + 1];
-  char pinyin_str[kMaxLemmaSize][kMaxPinyinSize + 1];
-  unsigned char hz_str_len;
-  float freq;
+    uint16 spl_idx_arr[kMaxLemmaSize + 1];
+    char pinyin_str[kMaxLemmaSize][kMaxPinyinSize + 1];
+    unsigned char hz_str_len;
+    float freq;
 };
 #endif  // ___BUILD_MODEL___
 
diff --git a/src/include/dictlist.h b/src/include/dictlist.h
new file mode 100644
index 0000000..5413e65
--- /dev/null
+++ b/src/include/dictlist.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_DICTLIST_H__
+#define PINYINIME_INCLUDE_DICTLIST_H__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "./dictdef.h"
+#include "./searchutility.h"
+#include "./spellingtrie.h"
+#include "./utf16char.h"
+
+namespace ime_pinyin {
+
+class DictList {
+   private:
+    bool initialized_;
+
+    const SpellingTrie *spl_trie_;
+
+    // Number of SingCharItem. The first is blank, because id 0 is invalid.
+    size_t scis_num_;
+    char16 *scis_hz_;
+    SpellingId *scis_splid_;
+
+    // The large memory block to store the word list.
+    char16 *buf_;
+
+    // Starting position of those words whose lengths are i+1, counted in
+    // char16
+    uint32 start_pos_[kMaxLemmaSize + 1];
+
+    uint32 start_id_[kMaxLemmaSize + 1];
+
+    int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
+
+    bool alloc_resource(size_t buf_size, size_t scim_num);
+
+    void free_resource();
+
+#ifdef ___BUILD_MODEL___
+    // Calculate the requsted memory, including the start_pos[] buffer.
+    size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num);
+
+    void fill_scis(const SingleCharItem *scis, size_t scis_num);
+
+    // Copy the related content to the inner buffer
+    // It should be called after calculate_size()
+    void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num);
+
+    // Find the starting position for the buffer of those 2-character Chinese word
+    // whose first character is the given Chinese character.
+    char16 *find_pos2_startedbyhz(char16 hz_char);
+#endif
+
+    // Find the starting position for the buffer of those words whose lengths are
+    // word_len. The given parameter cmp_func decides how many characters from
+    // beginning will be used to compare.
+    char16 *find_pos_startedbyhzs(const char16 last_hzs[], size_t word_Len, int (*cmp_func)(const void *, const void *));
+
+   public:
+    DictList();
+    ~DictList();
+
+    bool save_list(FILE *fp);
+    bool load_list(FILE *fp);
+
+#ifdef ___BUILD_MODEL___
+    // Init the list from the LemmaEntry array.
+    // lemma_arr should have been sorted by the hanzi_str, and have been given
+    // ids from 1
+    bool init_list(const SingleCharItem *scis, size_t scis_num, const LemmaEntry *lemma_arr, size_t lemma_num);
+#endif
+
+    // Get the hanzi string for the given id
+    uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
+
+    void convert_to_hanzis(char16 *str, uint16 str_len);
+
+    void convert_to_scis_ids(char16 *str, uint16 str_len);
+
+    // last_hzs stores the last n Chinese characters history, its length should be
+    // less or equal than kMaxPredictSize.
+    // hzs_len specifies the length(<= kMaxPredictSize).
+    // predict_buf is used to store the result.
+    // buf_len specifies the buffer length.
+    // b4_used specifies how many items before predict_buf have been used.
+    // Returned value is the number of newly added items.
+    size_t predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used);
+
+    // If half_splid is a valid half spelling id, return those full spelling
+    // ids which share this half id.
+    uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid, uint16 *splids, uint16 max_splids);
+
+    LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
+};
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_INCLUDE_DICTLIST_H__
diff --git a/src/include/dicttrie.h b/src/include/dicttrie.h
new file mode 100644
index 0000000..37395bd
--- /dev/null
+++ b/src/include/dicttrie.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_DICTTRIE_H__
+#define PINYINIME_INCLUDE_DICTTRIE_H__
+
+#include <stdlib.h>
+#include "./atomdictbase.h"
+#include "./dictdef.h"
+#include "./dictlist.h"
+#include "./searchutility.h"
+
+namespace ime_pinyin {
+
+class DictTrie : AtomDictBase {
+   private:
+    struct ParsingMark {
+        size_t node_offset : 24;
+        size_t node_num : 8;  // Number of nodes with this spelling id given
+                              // by spl_id. If spl_id is a Shengmu, for nodes
+                              // in the first layer of DictTrie, it equals to
+                              // SpellingTrie::shm2full_num(); but for those
+                              // nodes which are not in the first layer,
+                              // node_num < SpellingTrie::shm2full_num().
+                              // For a full spelling id, node_num = 1;
+    };
+
+    // Used to indicate an extended mile stone.
+    // An extended mile stone is used to mark a partial match in the dictionary
+    // trie to speed up further potential extending.
+    // For example, when the user inputs "w", a mile stone is created to mark the
+    // partial match status, so that when user inputs another char 'm', it will be
+    // faster to extend search space based on this mile stone.
+    //
+    // For partial match status of "wm", there can be more than one sub mile
+    // stone, for example, "wm" can be matched to "wanm", "wom", ..., etc, so
+    // there may be more one parsing mark used to mark these partial matchings.
+    // A mile stone records the starting position in the mark list and number of
+    // marks.
+    struct MileStone {
+        uint16 mark_start;
+        uint16 mark_num;
+    };
+
+    DictList *dict_list_;
+
+    const SpellingTrie *spl_trie_;
+
+    LmaNodeLE0 *root_;       // Nodes for root and the first layer.
+    LmaNodeGE1 *nodes_ge1_;  // Nodes for other layers.
+
+    // An quick index from spelling id to the LmaNodeLE0 node buffer, or
+    // to the root_ buffer.
+    // Index length:
+    // SpellingTrie::get_instance().get_spelling_num() + 1. The last one is used
+    // to get the end.
+    // All Shengmu ids are not indexed because they will be converted into
+    // corresponding full ids.
+    // So, given an id splid, the son is:
+    // root_[splid_le0_index_[splid - kFullSplIdStart]]
+    uint16 *splid_le0_index_;
+
+    size_t lma_node_num_le0_;
+    size_t lma_node_num_ge1_;
+
+    // The first part is for homophnies, and the last  top_lma_num_ items are
+    // lemmas with highest scores.
+    unsigned char *lma_idx_buf_;
+    size_t lma_idx_buf_len_;  // The total size of lma_idx_buf_ in byte.
+    size_t total_lma_num_;    // Total number of lemmas in this dictionary.
+    size_t top_lmas_num_;     // Number of lemma with highest scores.
+
+    // Parsing mark list used to mark the detailed extended statuses.
+    ParsingMark *parsing_marks_;
+    // The position for next available mark.
+    uint16 parsing_marks_pos_;
+
+    // Mile stone list used to mark the extended status.
+    MileStone *mile_stones_;
+    // The position for the next available mile stone. We use positions (except 0)
+    // as handles.
+    MileStoneHandle mile_stones_pos_;
+
+    // Get the offset of sons for a node.
+    inline size_t get_son_offset(const LmaNodeGE1 *node);
+
+    // Get the offset of homonious ids for a node.
+    inline size_t get_homo_idx_buf_offset(const LmaNodeGE1 *node);
+
+    // Get the lemma id by the offset.
+    inline LemmaIdType get_lemma_id(size_t id_offset);
+
+    void free_resource(bool free_dict_list);
+
+    bool load_dict(FILE *fp);
+
+    // Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill
+    // them into the lpi_items buffer.
+    // This function is called by the search engine.
+    size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size, LmaNodeLE0 *node);
+
+    // Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill
+    // them into the lpi_items buffer.
+    // This function is called by inner functions extend_dict0(), extend_dict1()
+    // and extend_dict2().
+    size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size, size_t homo_buf_off, LmaNodeGE1 *node, uint16 lma_len);
+
+    // Extend in the trie from level 0.
+    MileStoneHandle extend_dict0(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num);
+
+    // Extend in the trie from level 1.
+    MileStoneHandle extend_dict1(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num);
+
+    // Extend in the trie from level 2.
+    MileStoneHandle extend_dict2(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num);
+
+    // Try to extend the given spelling id buffer, and if the given id_lemma can
+    // be successfully gotten, return true;
+    // The given spelling ids are all valid full ids.
+    bool try_extend(const uint16 *splids, uint16 splid_num, LemmaIdType id_lemma);
+
+#ifdef ___BUILD_MODEL___
+    bool save_dict(FILE *fp);
+#endif  // ___BUILD_MODEL___
+
+    static const int kMaxMileStone = 100;
+    static const int kMaxParsingMark = 600;
+    static const MileStoneHandle kFirstValidMileStoneHandle = 1;
+
+    friend class DictParser;
+    friend class DictBuilder;
+
+   public:
+    DictTrie();
+    ~DictTrie();
+
+#ifdef ___BUILD_MODEL___
+    // Construct the tree from the file fn_raw.
+    // fn_validhzs provide the valid hanzi list. If fn_validhzs is
+    // NULL, only chars in GB2312 will be included.
+    bool build_dict(const char *fn_raw, const char *fn_validhzs);
+
+    // Save the binary dictionary
+    // Actually, the SpellingTrie/DictList instance will be also saved.
+    bool save_dict(const char *filename);
+#endif  // ___BUILD_MODEL___
+
+    void convert_to_hanzis(char16 *str, uint16 str_len);
+
+    void convert_to_scis_ids(char16 *str, uint16 str_len);
+
+    // Load a binary dictionary
+    // The SpellingTrie instance/DictList will be also loaded
+    bool load_dict(const char *filename, LemmaIdType start_id, LemmaIdType end_id);
+    bool load_dict_fd(int sys_fd, long start_offset, long length, LemmaIdType start_id, LemmaIdType end_id);
+    bool close_dict() { return true; }
+    size_t number_of_lemmas() { return 0; }
+
+    void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
+
+    MileStoneHandle extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num);
+
+    size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max);
+
+    uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
+
+    uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid);
+
+    size_t predict(const char16 *last_hzs, uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used);
+
+    LemmaIdType put_lemma(char16 /*lemma_str*/[], uint16 /*splids*/[], uint16 /*lemma_len*/, uint16 /*count*/) { return 0; }
+
+    LemmaIdType update_lemma(LemmaIdType /*lemma_id*/, int16 /*delta_count*/, bool /*selected*/) { return 0; }
+
+    LemmaIdType get_lemma_id(char16 /*lemma_str*/[], uint16 /*splids*/[], uint16 /*lemma_len*/) { return 0; }
+
+    LmaScoreType get_lemma_score(LemmaIdType /*lemma_id*/) { return 0; }
+
+    LmaScoreType get_lemma_score(char16 /*lemma_str*/[], uint16 /*splids*/[], uint16 /*lemma_len*/) { return 0; }
+
+    bool remove_lemma(LemmaIdType /*lemma_id*/) { return false; }
+
+    size_t get_total_lemma_count() { return 0; }
+    void set_total_lemma_count_of_others(size_t count);
+
+    void flush_cache() {}
+
+    LemmaIdType get_lemma_id(const char16 lemma_str[], uint16 lemma_len);
+
+    // Fill the lemmas with highest scores to the prediction buffer.
+    // his_len is the history length to fill in the prediction buffer.
+    size_t predict_top_lmas(size_t his_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used);
+};
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_INCLUDE_DICTTRIE_H__
diff --git a/src/include/lpicache.h b/src/include/lpicache.h
new file mode 100644
index 0000000..bdebb81
--- /dev/null
+++ b/src/include/lpicache.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
+#define PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
+
+#include <stdlib.h>
+#include "./searchutility.h"
+#include "./spellingtrie.h"
+
+namespace ime_pinyin {
+
+// Used to cache LmaPsbItem list for half spelling ids.
+class LpiCache {
+   private:
+    static LpiCache *instance_;
+    static const int kMaxLpiCachePerId = 15;
+
+    LmaPsbItem *lpi_cache_;
+    uint16 *lpi_cache_len_;
+
+   public:
+    LpiCache();
+    ~LpiCache();
+
+    static LpiCache &get_instance();
+
+    // Test if the LPI list of the given splid  has been cached.
+    // If splid is a full spelling id, it returns false, because we only cache
+    // list for half ids.
+    bool is_cached(uint16 splid);
+
+    // Put LPI list to cahce. If the length of the list, lpi_num, is longer than
+    // the cache buffer. the list will be truncated, and function returns the
+    // maximum length of the cache buffer.
+    // Note: splid must be a half id, and lpi_items must be not NULL. The
+    // caller of this function should guarantee this.
+    size_t put_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_num);
+
+    // Get the cached list for the given half id.
+    // Return the length of the cached buffer.
+    // Note: splid must be a half id, and lpi_items must be not NULL. The
+    // caller of this function should guarantee this.
+    size_t get_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_max);
+};
+
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
diff --git a/src/include/matrixsearch.h b/src/include/matrixsearch.h
new file mode 100644
index 0000000..697baf9
--- /dev/null
+++ b/src/include/matrixsearch.h
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
+#define PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
+
+#include <stdlib.h>
+#include "./atomdictbase.h"
+#include "./dicttrie.h"
+#include "./searchutility.h"
+#include "./spellingtrie.h"
+#include "./splparser.h"
+
+namespace ime_pinyin {
+
+static const size_t kMaxRowNum = kMaxSearchSteps;
+
+typedef struct {
+    // MileStoneHandle objects for the system and user dictionaries.
+    MileStoneHandle dict_handles[2];
+    // From which DMI node. -1 means it's from root.
+    PoolPosType dmi_fr;
+    // The spelling id for the Pinyin string from the previous DMI to this node.
+    // If it is a half id like Shengmu, the node pointed by dict_node is the first
+    // node with this Shengmu,
+    uint16 spl_id;
+    // What's the level of the dict node. Level of root is 0, but root is never
+    // recorded by dict_node.
+    unsigned char dict_level : 7;
+    // If this node is for composing phrase, this bit is 1.
+    unsigned char c_phrase : 1;
+    // Whether the spl_id is parsed with a split character at the end.
+    unsigned char splid_end_split : 1;
+    // What's the length of the spelling string for this match, for the whole
+    // word.
+    unsigned char splstr_len : 7;
+    // Used to indicate whether all spelling ids from the root are full spelling
+    // ids. This information is useful for keymapping mode(not finished). Because
+    // in this mode, there is no clear boundaries, we prefer those results which
+    // have full spelling ids.
+    unsigned char all_full_id : 1;
+} DictMatchInfo, *PDictMatchInfo;
+
+typedef struct MatrixNode {
+    LemmaIdType id;
+    float score;
+    MatrixNode *from;
+    // From which DMI node. Used to trace the spelling segmentation.
+    PoolPosType dmi_fr;
+    uint16 step;
+} MatrixNode, *PMatrixNode;
+
+typedef struct {
+    // The MatrixNode position in the matrix pool
+    PoolPosType mtrx_nd_pos;
+    // The DictMatchInfo position in the DictMatchInfo pool.
+    PoolPosType dmi_pos;
+    uint16 mtrx_nd_num;
+    uint16 dmi_num : 15;
+    // Used to indicate whether there are dmi nodes in this step with full
+    // spelling id. This information is used to decide whether a substring of a
+    // valid Pinyin should be extended.
+    //
+    // Example1: shoudao
+    // When the last char 'o' is added, the parser will find "dao" is a valid
+    // Pinyin, and because all dmi nodes at location 'd' (including those for
+    // "shoud", and those for "d") have Shengmu id only, so it is not necessary
+    // to extend "ao", otherwise the result may be "shoud ao", that is not
+    // reasonable.
+    //
+    // Example2: hengao
+    // When the last 'o' is added, the parser finds "gao" is a valid Pinyin.
+    // Because some dmi nodes at 'g' has Shengmu ids (hen'g and g), but some dmi
+    // nodes at 'g' has full ids ('heng'), so it is necessary to extend "ao", thus
+    // "heng ao" can also be the result.
+    //
+    // Similarly, "ganga" is expanded to "gang a".
+    //
+    // For Pinyin string "xian", because "xian" is a valid Pinyin, because all dmi
+    // nodes at 'x' only have Shengmu ids, the parser will not try "x ian" (and it
+    // is not valid either). If the parser uses break in the loop, the result
+    // always be "xian"; but if the parser uses continue in the loop, "xi an" will
+    // also be tried. This behaviour can be set via the function
+    // set_xi_an_switch().
+    uint16 dmi_has_full_id : 1;
+    // Points to a MatrixNode of the current step to indicate which choice the
+    // user selects.
+    MatrixNode *mtrx_nd_fixed;
+} MatrixRow, *PMatrixRow;
+
+// When user inputs and selects candidates, the fixed lemma ids are stored in
+// lma_id_ of class MatrixSearch, and fixed_lmas_ is used to indicate how many
+// lemmas from the beginning are fixed. If user deletes Pinyin characters one
+// by one from the end, these fixed lemmas can be unlocked one by one when
+// necessary. Whenever user deletes a Chinese character and its spelling string
+// in these fixed lemmas, all fixed lemmas will be merged together into a unit
+// named ComposingPhrase with a lemma id kLemmaIdComposing, and this composing
+// phrase will be the first lemma in the sentence. Because it contains some
+// modified lemmas (by deleting a character), these merged lemmas are called
+// sub lemmas (sublma), and each of them are represented individually, so that
+// when user deletes Pinyin characters from the end, these sub lemmas can also
+// be unlocked one by one.
+typedef struct {
+    uint16 spl_ids[kMaxRowNum];
+    uint16 spl_start[kMaxRowNum];
+    char16 chn_str[kMaxRowNum];       // Chinese string.
+    uint16 sublma_start[kMaxRowNum];  // Counted in Chinese characters.
+    size_t sublma_num;
+    uint16 length;  // Counted in Chinese characters.
+} ComposingPhrase, *TComposingPhrase;
+
+class MatrixSearch {
+   private:
+    // If it is true, prediction list by string whose length is greater than 1
+    // will be limited to a reasonable number.
+    static const bool kPredictLimitGt1 = false;
+
+    // If it is true, the engine will prefer long history based prediction,
+    // for example, when user inputs "BeiJing", we prefer "DaXue", etc., which are
+    // based on the two-character history.
+    static const bool kPreferLongHistoryPredict = true;
+
+    // If it is true, prediction will only be based on user dictionary. this flag
+    // is for debug purpose.
+    static const bool kOnlyUserDictPredict = false;
+
+    // The maximum buffer to store LmaPsbItems.
+    static const size_t kMaxLmaPsbItems = 1450;
+
+    // How many rows for each step.
+    static const size_t kMaxNodeARow = 5;
+
+    // The maximum length of the sentence candidates counted in chinese
+    // characters
+    static const size_t kMaxSentenceLength = 16;
+
+    // The size of the matrix node pool.
+    static const size_t kMtrxNdPoolSize = 200;
+
+    // The size of the DMI node pool.
+    static const size_t kDmiPoolSize = 800;
+
+    // Used to indicate whether this object has been initialized.
+    bool inited_;
+
+    // Spelling trie.
+    const SpellingTrie *spl_trie_;
+
+    // Used to indicate this switcher status: when "xian" is parseed, should
+    // "xi an" also be extended. Default is false.
+    // These cases include: xia, xian, xiang, zhuan, jiang..., etc. The string
+    // should be valid for a FULL spelling, or a combination of two spellings,
+    // first of which is a FULL id too. So even it is true, "da" will never be
+    // split into "d a", because "d" is not a full spelling id.
+    bool xi_an_enabled_;
+
+    // System dictionary.
+    DictTrie *dict_trie_;
+
+    // User dictionary.
+    AtomDictBase *user_dict_;
+
+    // Spelling parser.
+    SpellingParser *spl_parser_;
+
+    // The maximum allowed length of spelling string (such as a Pinyin string).
+    size_t max_sps_len_;
+
+    // The maximum allowed length of a result Chinese string.
+    size_t max_hzs_len_;
+
+    // Pinyin string. Max length: kMaxRowNum - 1
+    char pys_[kMaxRowNum];
+
+    // The length of the string that has been decoded successfully.
+    size_t pys_decoded_len_;
+
+    // Shared buffer for multiple purposes.
+    size_t *share_buf_;
+
+    MatrixNode *mtrx_nd_pool_;
+    PoolPosType mtrx_nd_pool_used_;  // How many nodes used in the pool
+    DictMatchInfo *dmi_pool_;
+    PoolPosType dmi_pool_used_;  // How many items used in the pool
+
+    MatrixRow *matrix_;  // The first row is for starting
+
+    DictExtPara *dep_;  // Parameter used to extend DMI nodes.
+
+    NPredictItem *npre_items_;  // Used to do prediction
+    size_t npre_items_len_;
+
+    // The starting positions and lemma ids for the full sentence candidate.
+    size_t lma_id_num_;
+    uint16 lma_start_[kMaxRowNum];  // Counted in spelling ids.
+    LemmaIdType lma_id_[kMaxRowNum];
+    size_t fixed_lmas_;
+
+    // If fixed_lmas_ is bigger than i,  Element i is used to indicate whether
+    // the i'th lemma id in lma_id_ is the first candidate for that step.
+    // If all candidates are the first one for that step, the whole string can be
+    // decoded by the engine automatically, so no need to add it to user
+    // dictionary. (We are considering to add it to user dictionary in the
+    // future).
+    uint8 fixed_lmas_no1_[kMaxRowNum];
+
+    // Composing phrase
+    ComposingPhrase c_phrase_;
+
+    // If dmi_c_phrase_ is true, the decoder will try to match the
+    // composing phrase (And definitely it will match successfully). If it
+    // is false, the decoder will try to match lemmas items in dictionaries.
+    bool dmi_c_phrase_;
+
+    // The starting positions and spelling ids for the first full sentence
+    // candidate.
+    size_t spl_id_num_;             // Number of splling ids
+    uint16 spl_start_[kMaxRowNum];  // Starting positions
+    uint16 spl_id_[kMaxRowNum];     // Spelling ids
+    // Used to remember the last fixed position, counted in Hanzi.
+    size_t fixed_hzs_;
+
+    // Lemma Items with possibility score, two purposes:
+    // 1. In Viterbi decoding, this buffer is used to get all possible candidates
+    // for current step;
+    // 2. When the search is done, this buffer is used to get candiates from the
+    // first un-fixed step and show them to the user.
+    LmaPsbItem lpi_items_[kMaxLmaPsbItems];
+    size_t lpi_total_;
+
+    // Assign the pointers with NULL. The caller makes sure that all pointers are
+    // not valid before calling it. This function only will be called in the
+    // construction function and free_resource().
+    void reset_pointers_to_null();
+
+    bool alloc_resource();
+
+    void free_resource();
+
+    // Reset the search space totally.
+    bool reset_search0();
+
+    // Reset the search space from ch_pos step. For example, if the original
+    // input Pinyin is "an", reset_search(1) will reset the search space to the
+    // result of "a". If the given position is out of range, return false.
+    // if clear_fixed_this_step is true, and the ch_pos step is a fixed step,
+    // clear its fixed status. if clear_dmi_his_step is true, clear the DMI nodes.
+    // If clear_mtrx_this_sTep is true, clear the mtrx nodes of this step.
+    // The DMI nodes will be kept.
+    //
+    // Note: this function should not destroy content of pys_.
+    bool reset_search(size_t ch_pos, bool clear_fixed_this_step, bool clear_dmi_this_step, bool clear_mtrx_this_step);
+
+    // Delete a part of the content in pys_.
+    void del_in_pys(size_t start, size_t len);
+
+    // Delete a spelling id and its corresponding Chinese character, and merge
+    // the fixed lemmas into the composing phrase.
+    // del_spl_pos indicates which spelling id needs to be delete.
+    // This function will update the lemma and spelling segmentation information.
+    // The caller guarantees that fixed_lmas_ > 0 and del_spl_pos is within
+    // the fixed lemmas.
+    void merge_fixed_lmas(size_t del_spl_pos);
+
+    // Get spelling start posistions and ids. The result will be stored in
+    // spl_id_num_, spl_start_[], spl_id_[].
+    // fixed_hzs_ will be also assigned.
+    void get_spl_start_id();
+
+    // Get all lemma ids with match the given spelling id stream(shorter than the
+    // maximum length of a word).
+    // If pfullsent is not NULL, means the full sentence candidate may be the
+    // same with the coming lemma string, if so, remove that lemma.
+    // The result is sorted in descendant order by the frequency score.
+    size_t get_lpis(const uint16 *splid_str, size_t splid_str_len, LmaPsbItem *lma_buf, size_t max_lma_buf, const char16 *pfullsent, bool sort_by_psb);
+
+    uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
+
+    uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid);
+
+    // Extend a DMI node with a spelling id. ext_len is the length of the rows
+    // to extend, actually, it is the size of the spelling string of splid.
+    // return value can be 1 or 0.
+    // 1 means a new DMI is filled in (dmi_pool_used_ is the next blank DMI in
+    // the pool).
+    // 0 means either the dmi node can not be extended with splid, or the splid
+    // is a Shengmu id, which is only used to get lpi_items, or the result node
+    // in DictTrie has no son, it is not nccessary to keep the new DMI.
+    //
+    // This function modifies the content of lpi_items_ and lpi_total_.
+    // lpi_items_ is used to get the LmaPsbItem list, lpi_total_ returns the size.
+    // The function's returned value has no relation with the value of lpi_num.
+    //
+    // If dmi == NULL, this function will extend the root node of DictTrie
+    //
+    // This function will not change dmi_nd_pool_used_. Please change it after
+    // calling this function if necessary.
+    //
+    // The caller should guarantees that NULL != dep.
+    size_t extend_dmi(DictExtPara *dep, DictMatchInfo *dmi_s);
+
+    // Extend dmi for the composing phrase.
+    size_t extend_dmi_c(DictExtPara *dep, DictMatchInfo *dmi_s);
+
+    // Extend a MatrixNode with the give LmaPsbItem list.
+    // res_row is the destination row number.
+    // This function does not change mtrx_nd_pool_used_. Please change it after
+    // calling this function if necessary.
+    // return 0 always.
+    size_t extend_mtrx_nd(MatrixNode *mtrx_nd, LmaPsbItem lpi_items[], size_t lpi_num, PoolPosType dmi_fr, size_t res_row);
+
+    // Try to find a dmi node at step_to position, and the found dmi node should
+    // match the given spelling id strings.
+    PoolPosType match_dmi(size_t step_to, uint16 spl_ids[], uint16 spl_id_num);
+
+    bool add_char(char ch);
+    bool prepare_add_char(char ch);
+
+    // Called after prepare_add_char, so the input char has been saved.
+    bool add_char_qwerty();
+
+    // Prepare candidates from the last fixed hanzi position.
+    void prepare_candidates();
+
+    // Is the character in step pos a splitter character?
+    // The caller guarantees that the position is valid.
+    bool is_split_at(uint16 pos);
+
+    void fill_dmi(DictMatchInfo *dmi, MileStoneHandle *handles, PoolPosType dmi_fr, uint16 spl_id, uint16 node_num, unsigned char dict_level, bool splid_end_split, unsigned char splstr_len, unsigned char all_full_id);
+
+    size_t inner_predict(const char16 fixed_scis_ids[], uint16 scis_num, char16 predict_buf[][kMaxPredictSize + 1], size_t buf_len);
+
+    // Add the first candidate to the user dictionary.
+    bool try_add_cand0_to_userdict();
+
+    // Add a user lemma to the user dictionary. This lemma is a subset of
+    // candidate 0. lma_from is from which lemma in lma_ids_, lma_num is the
+    // number of lemmas to be combined together as a new lemma. The caller
+    // gurantees that the combined new lemma's length is less or equal to
+    // kMaxLemmaSize.
+    bool add_lma_to_userdict(uint16 lma_from, uint16 lma_num, float score);
+
+    // Update dictionary frequencies.
+    void update_dict_freq();
+
+    void debug_print_dmi(PoolPosType dmi_pos, uint16 nest_level);
+
+   public:
+    MatrixSearch();
+    ~MatrixSearch();
+
+    bool init(const char *fn_sys_dict, const char *fn_usr_dict);
+
+    bool init_fd(int sys_fd, long start_offset, long length, const char *fn_usr_dict);
+
+    void set_max_lens(size_t max_sps_len, size_t max_hzs_len);
+
+    void close();
+
+    void flush_cache();
+
+    void set_xi_an_switch(bool xi_an_enabled);
+
+    bool get_xi_an_switch();
+
+    // Reset the search space. Equivalent to reset_search(0).
+    // If inited, always return true;
+    bool reset_search();
+
+    // Search a Pinyin string.
+    // Return value is the position successfully parsed.
+    size_t search(const char *py, size_t py_len);
+
+    // Used to delete something in the Pinyin string kept by the engine, and do
+    // a re-search.
+    // Return value is the new length of Pinyin string kept by the engine which
+    // is parsed successfully.
+    // If is_pos_in_splid is false, pos is used to indicate that pos-th Pinyin
+    // character needs to be deleted. If is_pos_in_splid is true, all Pinyin
+    // characters for pos-th spelling id needs to be deleted.
+    // If the deleted character(s) is just after a fixed lemma or sub lemma in
+    // composing phrase, clear_fixed_this_step indicates whether we needs to
+    // unlock the last fixed lemma or sub lemma.
+    // If is_pos_in_splid is false, and pos-th character is in the range for the
+    // fixed lemmas or composing string, this function will do nothing and just
+    // return the result of the previous search.
+    size_t delsearch(size_t pos, bool is_pos_in_splid, bool clear_fixed_this_step);
+
+    // Get the number of candiates, called after search().
+    size_t get_candidate_num();
+
+    // Get the Pinyin string stored by the engine.
+    // *decoded_len returns the length of the successfully decoded string.
+    const char *get_pystr(size_t *decoded_len);
+
+    // Get the spelling boundaries for the first sentence candidate.
+    // Number of spellings will be returned. The number of valid elements in
+    // spl_start is one more than the return value because the last one is used
+    // to indicate the beginning of the next un-input speling.
+    // For a Pinyin "women", the returned value is 2, spl_start is [0, 2, 5] .
+    size_t get_spl_start(const uint16 *&spl_start);
+
+    // Get one candiate string. If full sentence candidate is available, it will
+    // be the first one.
+    char16 *get_candidate(size_t cand_id, char16 *cand_str, size_t max_len);
+
+    // Get the first candiate, which is a "full sentence".
+    // retstr_len is not NULL, it will be used to return the string length.
+    // If only_unfixed is true, only unfixed part will be fetched.
+    char16 *get_candidate0(char16 *cand_str, size_t max_len, uint16 *retstr_len, bool only_unfixed);
+
+    // Choose a candidate. The decoder will do a search after the fixed position.
+    size_t choose(size_t cand_id);
+
+    // Cancel the last choosing operation, and return the new number of choices.
+    size_t cancel_last_choice();
+
+    // Get the length of fixed Hanzis.
+    size_t get_fixedlen();
+
+    size_t get_predicts(const char16 fixed_buf[], char16 predict_buf[][kMaxPredictSize + 1], size_t buf_len);
+};
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
diff --git a/mystdlib.h b/src/include/mystdlib.h
similarity index 75%
rename from mystdlib.h
rename to src/include/mystdlib.h
index dfcf980..a3addb3 100644
--- a/mystdlib.h
+++ b/src/include/mystdlib.h
@@ -21,12 +21,9 @@
 
 namespace ime_pinyin {
 
-void myqsort(void *p, size_t n, size_t es,
-             int (*cmp)(const void *, const void *));
+void myqsort(void *p, size_t n, size_t es, int (*cmp)(const void *, const void *));
 
-void *mybsearch(const void *key, const void *base,
-                size_t nmemb, size_t size,
-                int (*compar)(const void *, const void *));
-}
+void *mybsearch(const void *key, const void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *));
+}  // namespace ime_pinyin
 
 #endif  // PINYINIME_INCLUDE_MYSTDLIB_H__
diff --git a/src/include/ngram.h b/src/include/ngram.h
new file mode 100644
index 0000000..52855bd
--- /dev/null
+++ b/src/include/ngram.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_NGRAM_H__
+#define PINYINIME_INCLUDE_NGRAM_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "./dictdef.h"
+
+namespace ime_pinyin {
+
+typedef unsigned char CODEBOOK_TYPE;
+
+static const size_t kCodeBookSize = 256;
+
+class NGram {
+   public:
+    // The maximum score of a lemma item.
+    static const LmaScoreType kMaxScore = 0x3fff;
+
+    // In order to reduce the storage size, the original log value is amplified by
+    // kScoreAmplifier, and we use LmaScoreType to store.
+    // After this process, an item with a lower score has a higher frequency.
+    static const int kLogValueAmplifier = -800;
+
+    // System words' total frequency. It is not the real total frequency, instead,
+    // It is only used to adjust system lemmas' scores when the user dictionary's
+    // total frequency changes.
+    // In this version, frequencies of system lemmas are fixed. We are considering
+    // to make them changable in next version.
+    static const size_t kSysDictTotalFreq = 100000000;
+
+   private:
+    static NGram *instance_;
+
+    bool initialized_;
+    size_t idx_num_;
+
+    size_t total_freq_none_sys_;
+
+    // Score compensation for system dictionary lemmas.
+    // Because after user adds some user lemmas, the total frequency changes, and
+    // we use this value to normalize the score.
+    float sys_score_compensation_;
+
+#ifdef ___BUILD_MODEL___
+    double *freq_codes_df_;
+#endif
+    LmaScoreType *freq_codes_;
+    CODEBOOK_TYPE *lma_freq_idx_;
+
+   public:
+    NGram();
+    ~NGram();
+
+    static NGram &get_instance();
+
+    bool save_ngram(FILE *fp);
+    bool load_ngram(FILE *fp);
+
+    // Set the total frequency of all none system dictionaries.
+    void set_total_freq_none_sys(size_t freq_none_sys);
+
+    float get_uni_psb(LemmaIdType lma_id);
+
+    // Convert a probability to score. Actually, the score will be limited to
+    // kMaxScore, but at runtime, we also need float expression to get accurate
+    // value of the score.
+    // After the conversion, a lower score indicates a higher probability of the
+    // item.
+    static float convert_psb_to_score(double psb);
+
+#ifdef ___BUILD_MODEL___
+    // For constructing the unigram mode model.
+    bool build_unigram(LemmaEntry *lemma_arr, size_t num, LemmaIdType next_idx_unused);
+#endif
+};
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_INCLUDE_NGRAM_H__
diff --git a/src/include/pinyinime.h b/src/include/pinyinime.h
new file mode 100644
index 0000000..2b2657f
--- /dev/null
+++ b/src/include/pinyinime.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_ANDPYIME_H__
+#define PINYINIME_INCLUDE_ANDPYIME_H__
+
+#include <stdlib.h>
+#include "./dictdef.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+namespace ime_pinyin {
+
+/**
+ * Open the decoder engine via the system and user dictionary file names.
+ *
+ * @param fn_sys_dict The file name of the system dictionary.
+ * @param fn_usr_dict The file name of the user dictionary.
+ * @return true if open the decoder engine successfully.
+ */
+bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict);
+
+/**
+ * Open the decoder engine via the system dictionary FD and user dictionary
+ * file name. Because on Android, the system dictionary is embedded in the
+ * whole application apk file.
+ *
+ * @param sys_fd The file in which the system dictionary is embedded.
+ * @param start_offset The starting position of the system dictionary in the
+ * file sys_fd.
+ * @param length The length of the system dictionary in the file sys_fd,
+ * counted in byte.
+ * @return true if succeed.
+ */
+bool im_open_decoder_fd(int sys_fd, long start_offset, long length, const char *fn_usr_dict);
+
+/**
+ * Close the decoder engine.
+ */
+void im_close_decoder();
+
+/**
+ * Set maximum limitations for decoding. If this function is not called,
+ * default values will be used. For example, due to screen size limitation,
+ * the UI engine of the IME can only show a certain number of letters(input)
+ * to decode, and a certain number of Chinese characters(output). If after
+ * user adds a new letter, the input or the output string is longer than the
+ * limitations, the engine will discard the recent letter.
+ *
+ * @param max_sps_len Maximum length of the spelling string(Pinyin string).
+ * @max_hzs_len Maximum length of the decoded Chinese character string.
+ */
+void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len);
+
+/**
+ * Flush cached data to persistent memory. Because at runtime, in order to
+ * achieve best performance, some data is only store in memory.
+ */
+void im_flush_cache();
+
+/**
+ * Use a spelling string(Pinyin string) to search. The engine will try to do
+ * an incremental search based on its previous search result, so if the new
+ * string has the same prefix with the previous one stored in the decoder,
+ * the decoder will only continue the search from the end of the prefix.
+ * If the caller needs to do a brand new search, please call im_reset_search()
+ * first. Calling im_search() is equivalent to calling im_add_letter() one by
+ * one.
+ *
+ * @param sps_buf The spelling string buffer to decode.
+ * @param sps_len The length of the spelling string buffer.
+ * @return The number of candidates.
+ */
+size_t im_search(const char *sps_buf, size_t sps_len);
+
+/**
+ * Make a delete operation in the current search result, and make research if
+ * necessary.
+ *
+ * @param pos The posistion of char in spelling string to delete, or the
+ * position of spelling id in result string to delete.
+ * @param is_pos_in_splid Indicate whether the pos parameter is the position
+ * in the spelling string, or the position in the result spelling id string.
+ * @return The number of candidates.
+ */
+size_t im_delsearch(size_t pos, bool is_pos_in_splid, bool clear_fixed_this_step);
+
+/**
+ * Reset the previous search result.
+ */
+void im_reset_search();
+
+/**
+ * Add a Pinyin letter to the current spelling string kept by decoder. If the
+ * decoder fails in adding the letter, it will do nothing. im_get_sps_str()
+ * can be used to get the spelling string kept by decoder currently.
+ *
+ * @param ch The letter to add.
+ * @return The number of candidates.
+ */
+size_t im_add_letter(char ch);
+
+/**
+ * Get the spelling string kept by the decoder.
+ *
+ * @param decoded_len Used to return how many characters in the spelling
+ * string is successfully parsed.
+ * @return The spelling string kept by the decoder.
+ */
+const char *im_get_sps_str(size_t *decoded_len);
+
+/**
+ * Get a candidate(or choice) string.
+ *
+ * @param cand_id The id to get a candidate. Started from 0. Usually, id 0
+ * is a sentence-level candidate.
+ * @param cand_str The buffer to store the candidate.
+ * @param max_len The maximum length of the buffer.
+ * @return cand_str if succeeds, otherwise NULL.
+ */
+char16 *im_get_candidate(size_t cand_id, char16 *cand_str, size_t max_len);
+
+/**
+ * Get the segmentation information(the starting positions) of the spelling
+ * string.
+ *
+ * @param spl_start Used to return the starting posistions.
+ * @return The number of spelling ids. If it is L, there will be L+1 valid
+ * elements in spl_start, and spl_start[L] is the posistion after the end of
+ * the last spelling id.
+ */
+size_t im_get_spl_start_pos(const uint16 *&spl_start);
+
+/**
+ * Choose a candidate and make it fixed. If the candidate does not match
+ * the end of all spelling ids, new candidates will be provided from the
+ * first unfixed position. If the candidate matches the end of the all
+ * spelling ids, there will be only one new candidates, or the whole fixed
+ * sentence.
+ *
+ * @param cand_id The id of candidate to select and make it fixed.
+ * @return The number of candidates. If after the selection, the whole result
+ * string has been fixed, there will be only one candidate.
+ */
+size_t im_choose(size_t cand_id);
+
+/**
+ * Cancel the last selection, or revert the last operation of im_choose().
+ *
+ * @return The number of candidates.
+ */
+size_t im_cancel_last_choice();
+
+/**
+ * Get the number of fixed spelling ids, or Chinese characters.
+ *
+ * @return The number of fixed spelling ids, of Chinese characters.
+ */
+size_t im_get_fixed_len();
+
+/**
+ * Cancel the input state and reset the search workspace.
+ */
+bool im_cancel_input();
+
+/**
+ * Get prediction candiates based on the given fixed Chinese string as the
+ * history.
+ *
+ * @param his_buf The history buffer to do the prediction. It should be ended
+ * with '\0'.
+ * @param pre_buf Used to return prediction result list.
+ * @return The number of predicted result string.
+ */
+size_t im_get_predicts(const char16 *his_buf, char16 (*&pre_buf)[kMaxPredictSize + 1]);
+
+/**
+ * Enable Shengmus in ShouZiMu mode.
+ */
+void im_enable_shm_as_szm(bool enable);
+
+/**
+ * Enable Yunmus in ShouZiMu mode.
+ */
+void im_enable_ym_as_szm(bool enable);
+}  // namespace ime_pinyin
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // PINYINIME_INCLUDE_ANDPYIME_H__
diff --git a/searchutility.h b/src/include/searchutility.h
similarity index 62%
rename from searchutility.h
rename to src/include/searchutility.h
index f135710..fa8b97c 100644
--- a/searchutility.h
+++ b/src/include/searchutility.h
@@ -30,25 +30,24 @@ typedef uint16 MileStoneHandle;
 
 // Type used to express a lemma and its probability score.
 typedef struct {
-  size_t id:(kLemmaIdSize * 8);
-  size_t lma_len:4;
-  uint16 psb;  // The score, the lower psb, the higher possibility.
-  // For single character items, we may also need Hanzi.
-  // For multiple characer items, ignore it.
-  char16 hanzi;
+    size_t id : (kLemmaIdSize * 8);
+    size_t lma_len : 4;
+    uint16 psb;  // The score, the lower psb, the higher possibility.
+    // For single character items, we may also need Hanzi.
+    // For multiple characer items, ignore it.
+    char16 hanzi;
 } LmaPsbItem, *PLmaPsbItem;
 
 // LmaPsbItem extended with string.
 typedef struct {
-  LmaPsbItem lpi;
-  char16 str[kMaxLemmaSize + 1];
+    LmaPsbItem lpi;
+    char16 str[kMaxLemmaSize + 1];
 } LmaPsbStrItem, *PLmaPsbStrItem;
 
-
 typedef struct {
-  float psb;
-  char16 pre_hzs[kMaxPredictSize];
-  uint16 his_len;  // The length of the history used to do the prediction.
+    float psb;
+    char16 pre_hzs[kMaxPredictSize];
+    uint16 his_len;  // The length of the history used to do the prediction.
 } NPredictItem, *PNPredictItem;
 
 // Parameter structure used to extend in a dictionary. All dictionaries
@@ -72,41 +71,41 @@ typedef struct {
 // step_no = 4; splid_end_split = false; id_start = men, id_num = 1.
 //
 typedef struct {
-  // Spelling ids for extending, there are splids_extended + 1 ids in the
-  // buffer.
-  // For a normal lemma, there can only be kMaxLemmaSize spelling ids in max,
-  // but for a composing phrase, there can kMaxSearchSteps spelling ids.
-  uint16 splids[kMaxSearchSteps];
+    // Spelling ids for extending, there are splids_extended + 1 ids in the
+    // buffer.
+    // For a normal lemma, there can only be kMaxLemmaSize spelling ids in max,
+    // but for a composing phrase, there can kMaxSearchSteps spelling ids.
+    uint16 splids[kMaxSearchSteps];
 
-  // Number of ids that have been used before. splids[splids_extended] is the
-  // newly added id for the current extension.
-  uint16 splids_extended;
+    // Number of ids that have been used before. splids[splids_extended] is the
+    // newly added id for the current extension.
+    uint16 splids_extended;
 
-  // The step span of the extension. It is also the size of the string for
-  // the newly added spelling id.
-  uint16 ext_len;
+    // The step span of the extension. It is also the size of the string for
+    // the newly added spelling id.
+    uint16 ext_len;
 
-  // The step number for the current extension. It is also the ending position
-  // in the input Pinyin string for the substring of spelling ids in splids[].
-  // For example, when the user inputs "women", step_no = 4.
-  // This parameter may useful to manage the MileStoneHandle list for each
-  // step. When the user deletes a character from the string, MileStoneHandle
-  // objects for the the steps after that character should be reset; when the
-  // user begins a new string, all MileStoneHandle objects should be reset.
-  uint16 step_no;
+    // The step number for the current extension. It is also the ending position
+    // in the input Pinyin string for the substring of spelling ids in splids[].
+    // For example, when the user inputs "women", step_no = 4.
+    // This parameter may useful to manage the MileStoneHandle list for each
+    // step. When the user deletes a character from the string, MileStoneHandle
+    // objects for the the steps after that character should be reset; when the
+    // user begins a new string, all MileStoneHandle objects should be reset.
+    uint16 step_no;
 
-  // Indicate whether the newly added spelling ends with a splitting character
-  bool splid_end_split;
+    // Indicate whether the newly added spelling ends with a splitting character
+    bool splid_end_split;
 
-  // If the newly added id is a half id, id_start is the first id of the
-  // corresponding full ids; if the newly added id is a full id, id_start is
-  // that id.
-  uint16 id_start;
+    // If the newly added id is a half id, id_start is the first id of the
+    // corresponding full ids; if the newly added id is a full id, id_start is
+    // that id.
+    uint16 id_start;
 
-  // If the newly added id is a half id, id_num is the number of corresponding
-  // ids; if it is a full id, id_num == 1.
-  uint16 id_num;
-}DictExtPara, *PDictExtPara;
+    // If the newly added id is a half id, id_num is the number of corresponding
+    // ids; if it is a full id, id_num == 1.
+    uint16 id_num;
+} DictExtPara, *PDictExtPara;
 
 bool is_system_lemma(LemmaIdType lma_id);
 bool is_user_lemma(LemmaIdType lma_id);
@@ -132,11 +131,10 @@ int cmp_npre_by_score(const void *p1, const void *p2);
 int cmp_npre_by_hislen_score(const void *p1, const void *p2);
 int cmp_npre_by_hanzi_score(const void *p1, const void *p2);
 
-
 size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num);
 
 size_t align_to_size_t(size_t size);
 
-}  // namespace
+}  // namespace ime_pinyin
 
 #endif  // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
diff --git a/src/include/spellingtable.h b/src/include/spellingtable.h
new file mode 100644
index 0000000..2be1dd2
--- /dev/null
+++ b/src/include/spellingtable.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_SPELLINGTABLE_H__
+#define PINYINIME_INCLUDE_SPELLINGTABLE_H__
+
+#include <stdlib.h>
+#include "./dictdef.h"
+
+namespace ime_pinyin {
+
+#ifdef ___BUILD_MODEL___
+
+const size_t kMaxSpellingSize = kMaxPinyinSize;
+
+typedef struct {
+    char str[kMaxSpellingSize + 1];
+    double freq;
+} RawSpelling, *PRawSpelling;
+
+// This class is used to store the spelling strings
+// The length of the input spelling string should be less or equal to the
+// spelling_size_ (set by init_table). If the input string is too long,
+// we only keep its first spelling_size_ chars.
+class SpellingTable {
+   private:
+    static const size_t kNotSupportNum = 3;
+    static const char kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1];
+
+    bool need_score_;
+
+    size_t spelling_max_num_;
+
+    RawSpelling* raw_spellings_;
+
+    // Used to store spelling strings. If the spelling table needs to calculate
+    // score, an extra char after each spelling string is the score.
+    // An item with a lower score has a higher probability.
+    char* spelling_buf_;
+    size_t spelling_size_;
+
+    double total_freq_;
+
+    size_t spelling_num_;
+
+    double score_amplifier_;
+
+    unsigned char average_score_;
+
+    // If frozen is true, put_spelling() and contain() are not allowed to call.
+    bool frozen_;
+
+    size_t get_hash_pos(const char* spelling_str);
+    size_t hash_pos_next(size_t hash_pos);
+    void free_resource();
+
+   public:
+    SpellingTable();
+    ~SpellingTable();
+
+    // pure_spl_size is the pure maximum spelling string size. For example,
+    // "zhuang" is the longgest item in Pinyin, so pure_spl_size should be 6.
+    // spl_max_num is the maximum number of spelling strings to store.
+    // need_score is used to indicate whether the caller needs to calculate a
+    // score for each spelling.
+    bool init_table(size_t pure_spl_size, size_t spl_max_num, bool need_score);
+
+    // Put a spelling string to the table.
+    // It always returns false if called after arrange() withtout a new
+    // init_table() operation.
+    // freq is the spelling's occuring count.
+    // If the spelling has been in the table, occuring count will accumulated.
+    bool put_spelling(const char* spelling_str, double spl_count);
+
+    // Test whether a spelling string is in the table.
+    // It always returns false, when being called after arrange() withtout a new
+    // init_table() operation.
+    bool contain(const char* spelling_str);
+
+    // Sort the spelling strings and put them from the begin of the buffer.
+    // Return the pointer of the sorted spelling strings.
+    // item_size and spl_num return the item size and number of spelling.
+    // Because each spelling uses a '\0' as terminator, the returned item_size is
+    // at least one char longer than the spl_size parameter specified by
+    // init_table(). If the table is initialized to calculate score, item_size
+    // will be increased by 1, and current_spl_str[item_size - 1] stores an
+    // unsinged char score.
+    // An item with a lower score has a higher probability.
+    // Do not call put_spelling() and contains() after arrange().
+    const char* arrange(size_t* item_size, size_t* spl_num);
+
+    float get_score_amplifier();
+
+    unsigned char get_average_score();
+};
+#endif  // ___BUILD_MODEL___
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_INCLUDE_SPELLINGTABLE_H__
diff --git a/src/include/spellingtrie.h b/src/include/spellingtrie.h
new file mode 100644
index 0000000..8efed50
--- /dev/null
+++ b/src/include/spellingtrie.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_SPELLINGTRIE_H__
+#define PINYINIME_INCLUDE_SPELLINGTRIE_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "./dictdef.h"
+
+namespace ime_pinyin {
+
+static const unsigned short kFullSplIdStart = kHalfSpellingIdNum + 1;
+
+// Node used for the trie of spellings
+struct SpellingNode {
+    SpellingNode* first_son;
+    // The spelling id for each node. If you need more bits to store
+    // spelling id, please adjust this structure.
+    uint16 spelling_idx : 11;
+    uint16 num_of_son : 5;
+    char char_this_node;
+    unsigned char score;
+};
+
+class SpellingTrie {
+   private:
+    static const int kMaxYmNum = 64;
+    static const size_t kValidSplCharNum = 26;
+
+    static const uint16 kHalfIdShengmuMask = 0x01;
+    static const uint16 kHalfIdYunmuMask = 0x02;
+    static const uint16 kHalfIdSzmMask = 0x04;
+
+    // Map from half spelling id to single char.
+    // For half ids of Zh/Ch/Sh, map to z/c/s (low case) respectively.
+    // For example, 1 to 'A', 2 to 'B', 3 to 'C', 4 to 'c', 5 to 'D', ...,
+    // 28 to 'Z', 29 to 'z'.
+    // [0] is not used to achieve better efficiency.
+    static const char kHalfId2Sc_[kFullSplIdStart + 1];
+
+    static unsigned char char_flags_[];
+    static SpellingTrie* instance_;
+
+    // The spelling table
+    char* spelling_buf_;
+
+    // The size of longest spelling string, includes '\0' and an extra char to
+    // store score. For example, "zhuang" is the longgest item in Pinyin list,
+    // so spelling_size_ is 8.
+    // Structure: The string ended with '\0' + score char.
+    // An item with a lower score has a higher probability.
+    size_t spelling_size_;
+
+    // Number of full spelling ids.
+    size_t spelling_num_;
+
+    float score_amplifier_;
+    unsigned char average_score_;
+
+    // The Yunmu id list for the spelling ids (for half ids of Shengmu,
+    // the Yunmu id is 0).
+    // The length of the list is spelling_num_ + kFullSplIdStart,
+    // so that spl_ym_ids_[splid] is the Yunmu id of the splid.
+    uint8* spl_ym_ids_;
+
+    // The Yunmu table.
+    // Each Yunmu will be assigned with Yunmu id from 1.
+    char* ym_buf_;
+    size_t ym_size_;  // The size of longest Yunmu string, '\0'included.
+    size_t ym_num_;
+
+    // The spelling string just queried
+    char* splstr_queried_;
+
+    // The spelling string just queried
+    char16* splstr16_queried_;
+
+    // The root node of the spelling tree
+    SpellingNode* root_;
+
+    // If a none qwerty key such as a fnction key like ENTER is given, this node
+    // will be used to indicate that this is not a QWERTY node.
+    SpellingNode* dumb_node_;
+
+    // If a splitter key is pressed, this node will be used to indicate that this
+    // is a splitter key.
+    SpellingNode* splitter_node_;
+
+    // Used to get the first level sons.
+    SpellingNode* level1_sons_[kValidSplCharNum];
+
+    // The full spl_id range for specific half id.
+    // h2f means half to full.
+    // A half id can be a ShouZiMu id (id to represent the first char of a full
+    // spelling, including Shengmu and Yunmu), or id of zh/ch/sh.
+    // [1..kFullSplIdStart-1] is the arrange of half id.
+    uint16 h2f_start_[kFullSplIdStart];
+    uint16 h2f_num_[kFullSplIdStart];
+
+    // Map from full id to half id.
+    uint16* f2h_;
+
+#ifdef ___BUILD_MODEL___
+    // How many node used to build the trie.
+    size_t node_num_;
+#endif
+
+    SpellingTrie();
+
+    void free_son_trie(SpellingNode* node);
+
+    // Construct a subtree using a subset of the spelling array (from
+    // item_star to item_end).
+    // Member spelliing_buf_ and spelling_size_ should be valid.
+    // parent is used to update its num_of_son and score.
+    SpellingNode* construct_spellings_subset(size_t item_start, size_t item_end, size_t level, SpellingNode* parent);
+    bool build_f2h();
+
+    // The caller should guarantee ch >= 'A' && ch <= 'Z'
+    bool is_shengmu_char(char ch) const;
+
+    // The caller should guarantee ch >= 'A' && ch <= 'Z'
+    bool is_yunmu_char(char ch) const;
+
+#ifdef ___BUILD_MODEL___
+    // Given a spelling string, return its Yunmu string.
+    // The caller guaratees spl_str is valid.
+    const char* get_ym_str(const char* spl_str);
+
+    // Build the Yunmu list, and the mapping relation between the full ids and the
+    // Yunmu ids. This functin is called after the spelling trie is built.
+    bool build_ym_info();
+#endif
+
+    friend class SpellingParser;
+    friend class SmartSplParser;
+    friend class SmartSplParser2;
+
+   public:
+    ~SpellingTrie();
+
+    inline static bool is_valid_spl_char(char ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); }
+
+    // The caller guarantees that the two chars are valid spelling chars.
+    inline static bool is_same_spl_char(char ch1, char ch2) { return ch1 == ch2 || ch1 - ch2 == 'a' - 'A' || ch2 - ch1 == 'a' - 'A'; }
+
+    // Construct the tree from the input pinyin array
+    // The given string list should have been sorted.
+    // score_amplifier is used to convert a possibility value into score.
+    // average_score is the average_score of all spellings. The dumb node is
+    // assigned with this score.
+    bool construct(const char* spelling_arr, size_t item_size, size_t item_num, float score_amplifier, unsigned char average_score);
+
+    // Test if the given id is a valid spelling id.
+    // If function returns true, the given splid may be updated like this:
+    // When 'A' is not enabled in ShouZiMu mode, the parsing result for 'A' is
+    // first given as a half id 1, but because 'A' is a one-char Yunmu and
+    // it is a valid id, it needs to updated to its corresponding full id.
+    bool if_valid_id_update(uint16* splid) const;
+
+    // Test if the given id is a half id.
+    bool is_half_id(uint16 splid) const;
+
+    bool is_full_id(uint16 splid) const;
+
+    // Test if the given id is a one-char Yunmu id (obviously, it is also a half
+    // id), such as 'A', 'E' and 'O'.
+    bool is_half_id_yunmu(uint16 splid) const;
+
+    // Test if this char is a ShouZiMu char. This ShouZiMu char may be not enabled.
+    // For Pinyin, only i/u/v is not a ShouZiMu char.
+    // The caller should guarantee that ch >= 'A' && ch <= 'Z'
+    bool is_szm_char(char ch) const;
+
+    // Test If this char is enabled in ShouZiMu mode.
+    // The caller should guarantee that ch >= 'A' && ch <= 'Z'
+    bool szm_is_enabled(char ch) const;
+
+    // Enable/disable Shengmus in ShouZiMu mode(using the first char of a spelling
+    // to input).
+    void szm_enable_shm(bool enable);
+
+    // Enable/disable Yunmus in ShouZiMu mode.
+    void szm_enable_ym(bool enable);
+
+    // Test if this char is enabled in ShouZiMu mode.
+    // The caller should guarantee ch >= 'A' && ch <= 'Z'
+    bool is_szm_enabled(char ch) const;
+
+    // Return the number of full ids for the given half id.
+    uint16 half2full_num(uint16 half_id) const;
+
+    // Return the number of full ids for the given half id, and fill spl_id_start
+    // to return the first full id.
+    uint16 half_to_full(uint16 half_id, uint16* spl_id_start) const;
+
+    // Return the corresponding half id for the given full id.
+    // Not frequently used, low efficient.
+    // Return 0 if fails.
+    uint16 full_to_half(uint16 full_id) const;
+
+    // To test whether a half id is compatible with a full id.
+    // Generally, when half_id == full_to_half(full_id), return true.
+    // But for "Zh, Ch, Sh", if fussy mode is on, half id for 'Z' is compatible
+    // with a full id like "Zhe". (Fussy mode is not ready).
+    bool half_full_compatible(uint16 half_id, uint16 full_id) const;
+
+    static const SpellingTrie* get_cpinstance();
+
+    static SpellingTrie& get_instance();
+
+    // Save to the file stream
+    bool save_spl_trie(FILE* fp);
+
+    // Load from the file stream
+    bool load_spl_trie(FILE* fp);
+
+    // Get the number of spellings
+    size_t get_spelling_num();
+
+    // Return the Yunmu id for the given Yunmu string.
+    // If the string is not valid, return 0;
+    uint8 get_ym_id(const char* ym_str);
+
+    // Get the readonly Pinyin string for a given spelling id
+    const char* get_spelling_str(uint16 splid);
+
+    // Get the readonly Pinyin string for a given spelling id
+    const char16* get_spelling_str16(uint16 splid);
+
+    // Get Pinyin string for a given spelling id. Return the length of the
+    // string, and fill-in '\0' at the end.
+    size_t get_spelling_str16(uint16 splid, char16* splstr16, size_t splstr16_len);
+};
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_INCLUDE_SPELLINGTRIE_H__
diff --git a/src/include/splparser.h b/src/include/splparser.h
new file mode 100644
index 0000000..2a73901
--- /dev/null
+++ b/src/include/splparser.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_SPLPARSER_H__
+#define PINYINIME_INCLUDE_SPLPARSER_H__
+
+#include "./dictdef.h"
+#include "./spellingtrie.h"
+
+namespace ime_pinyin {
+
+class SpellingParser {
+   protected:
+    const SpellingTrie *spl_trie_;
+
+   public:
+    SpellingParser();
+
+    // Given a string, parse it into a spelling id stream.
+    // If the whole string are sucessfully parsed, last_is_pre will be true;
+    // if the whole string is not fullly parsed, last_is_pre will return whether
+    // the last part of the string is a prefix of a full spelling string. For
+    // example, given string "zhengzhon", "zhon" is not a valid speling, but it is
+    // the prefix of "zhong".
+    //
+    // If splstr starts with a character not in ['a'-z'] (it is a split char),
+    // return 0.
+    // Split char can only appear in the middle of the string or at the end.
+    uint16 splstr_to_idxs(const char *splstr, uint16 str_len, uint16 splidx[], uint16 start_pos[], uint16 max_size, bool &last_is_pre);
+
+    // Similar to splstr_to_idxs(), the only difference is that splstr_to_idxs()
+    // convert single-character Yunmus into half ids, while this function converts
+    // them into full ids.
+    uint16 splstr_to_idxs_f(const char *splstr, uint16 str_len, uint16 splidx[], uint16 start_pos[], uint16 max_size, bool &last_is_pre);
+
+    // Similar to splstr_to_idxs(), the only difference is that this function
+    // uses char16 instead of char8.
+    uint16 splstr16_to_idxs(const char16 *splstr, uint16 str_len, uint16 splidx[], uint16 start_pos[], uint16 max_size, bool &last_is_pre);
+
+    // Similar to splstr_to_idxs_f(), the only difference is that this function
+    // uses char16 instead of char8.
+    uint16 splstr16_to_idxs_f(const char16 *splstr16, uint16 str_len, uint16 splidx[], uint16 start_pos[], uint16 max_size, bool &last_is_pre);
+
+    // If the given string is a spelling, return the id, others, return 0.
+    // If the give string is a single char Yunmus like "A", and the char is
+    // enabled in ShouZiMu mode, the returned spelling id will be a half id.
+    // When the returned spelling id is a half id, *is_pre returns whether it
+    // is a prefix of a full spelling string.
+    uint16 get_splid_by_str(const char *splstr, uint16 str_len, bool *is_pre);
+
+    // If the given string is a spelling, return the id, others, return 0.
+    // If the give string is a single char Yunmus like "a", no matter the char
+    // is enabled in ShouZiMu mode or not, the returned spelling id will be
+    // a full id.
+    // When the returned spelling id is a half id, *p_is_pre returns whether it
+    // is a prefix of a full spelling string.
+    uint16 get_splid_by_str_f(const char *splstr, uint16 str_len, bool *is_pre);
+
+    // Splitter chars are not included.
+    bool is_valid_to_parse(char ch);
+
+    // When auto-correction is not enabled, get_splid_by_str() will be called to
+    // return the single result. When auto-correction is enabled, this function
+    // will be called to get the results. Auto-correction is not ready.
+    // full_id_num returns number of full spelling ids.
+    // is_pre returns whether the given string is the prefix of a full spelling
+    // string.
+    // If splstr starts with a character not in [a-zA-Z] (it is a split char),
+    // return 0.
+    // Split char can only appear in the middle of the string or at the end.
+    // The caller should guarantee NULL != splstr && str_len > 0 && NULL != splidx
+    uint16 get_splids_parallel(const char *splstr, uint16 str_len, uint16 splidx[], uint16 max_size, uint16 &full_id_num, bool &is_pre);
+};
+}  // namespace ime_pinyin
+
+#endif  // PINYINIME_INCLUDE_SPLPARSER_H__
diff --git a/sync.h b/src/include/sync.h
similarity index 51%
rename from sync.h
rename to src/include/sync.h
index bf42d1f..6fd11d8 100644
--- a/sync.h
+++ b/src/include/sync.h
@@ -37,48 +37,48 @@ namespace ime_pinyin {
 //   } x N ->
 //   finish()
 class Sync {
- public:
-  Sync();
-  ~Sync();
+   public:
+    Sync();
+    ~Sync();
 
-  static const int kUserDictMaxLemmaCount = 5000;
-  static const int kUserDictMaxLemmaSize = 200000;
-  static const int kUserDictRatio = 20;
+    static const int kUserDictMaxLemmaCount = 5000;
+    static const int kUserDictMaxLemmaSize = 200000;
+    static const int kUserDictRatio = 20;
 
-  bool begin(const char * filename);
+    bool begin(const char* filename);
 
-  // Merge lemmas downloaded from sync server into local dictionary
-  // lemmas, lemmas string encoded in UTF16LE
-  // len, length of lemmas string
-  // Return how many lemmas merged successfully
-  int put_lemmas(char16 * lemmas, int len);
+    // Merge lemmas downloaded from sync server into local dictionary
+    // lemmas, lemmas string encoded in UTF16LE
+    // len, length of lemmas string
+    // Return how many lemmas merged successfully
+    int put_lemmas(char16* lemmas, int len);
 
-  // Get local new user lemmas into UTF16LE string
-  // str, buffer ptr to store new user lemmas
-  // size, size of buffer
-  // Return length of returned buffer in measure of UTF16LE
-  int get_lemmas(char16 * str, int size);
+    // Get local new user lemmas into UTF16LE string
+    // str, buffer ptr to store new user lemmas
+    // size, size of buffer
+    // Return length of returned buffer in measure of UTF16LE
+    int get_lemmas(char16* str, int size);
 
-  // Return lemmas count in last get_lemmas()
-  int get_last_got_count();
+    // Return lemmas count in last get_lemmas()
+    int get_last_got_count();
 
-  // Return total lemmas count need get_lemmas()
-  int get_total_count();
+    // Return total lemmas count need get_lemmas()
+    int get_total_count();
 
-  // Clear lemmas got by recent get_lemmas()
-  void clear_last_got();
+    // Clear lemmas got by recent get_lemmas()
+    void clear_last_got();
 
-  void finish();
+    void finish();
 
-  int get_capacity();
+    int get_capacity();
 
- private:
-  UserDict * userdict_;
-  char * dictfile_;
-  int last_count_;
+   private:
+    UserDict* userdict_;
+    char* dictfile_;
+    int last_count_;
 };
 
-}
+}  // namespace ime_pinyin
 
 #endif
 
diff --git a/src/include/userdict.h b/src/include/userdict.h
new file mode 100644
index 0000000..32914fe
--- /dev/null
+++ b/src/include/userdict.h
@@ -0,0 +1,390 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PINYINIME_INCLUDE_USERDICT_H__
+#define PINYINIME_INCLUDE_USERDICT_H__
+
+#define ___CACHE_ENABLED___
+#define ___SYNC_ENABLED___
+#define ___PREDICT_ENABLED___
+
+// Debug performance for operations
+// #define ___DEBUG_PERF___
+
+#include <pthread.h>
+#include "atomdictbase.h"
+
+namespace ime_pinyin {
+
+class UserDict : public AtomDictBase {
+   public:
+    UserDict();
+    ~UserDict();
+
+    bool load_dict(const char *file_name, LemmaIdType start_id, LemmaIdType end_id);
+
+    bool close_dict();
+
+    size_t number_of_lemmas();
+
+    void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
+
+    MileStoneHandle extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num);
+
+    size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max);
+
+    uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
+
+    uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid);
+
+    size_t predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used);
+
+    // Full spelling ids are required
+    LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count);
+
+    LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count, bool selected);
+
+    LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
+
+    LmaScoreType get_lemma_score(LemmaIdType lemma_id);
+
+    LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
+
+    bool remove_lemma(LemmaIdType lemma_id);
+
+    size_t get_total_lemma_count();
+    void set_total_lemma_count_of_others(size_t count);
+
+    void flush_cache();
+
+    void set_limit(uint32 max_lemma_count, uint32 max_lemma_size, uint32 reclaim_ratio);
+
+    void reclaim();
+
+    void defragment();
+
+#ifdef ___SYNC_ENABLED___
+    void clear_sync_lemmas(unsigned int start, unsigned int end);
+
+    int get_sync_count();
+
+    LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
+    /**
+     * Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
+     *
+     * @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
+     * @param len length of lemmas string in UTF-16LE
+     * @return newly added lemma count
+     */
+    int put_lemmas_no_sync_from_utf16le_string(char16 *lemmas, int len);
+
+    /**
+     * Get lemmas need sync to a UTF-16LE string of above format.
+     * Note: input buffer (str) must not be too small. If str is too small to
+     *       contain single one lemma, there might be a dead loop.
+     *
+     * @param str buffer to write lemmas
+     * @param size buffer size in UTF-16LE
+     * @param count output value of lemma returned
+     * @return UTF-16LE string length
+     */
+    int get_sync_lemmas_in_utf16le_string_from_beginning(char16 *str, int size, int *count);
+
+#endif
+
+    struct UserDictStat {
+        uint32 version;
+        const char *file_name;
+        struct timeval load_time;
+        struct timeval last_update;
+        uint32 disk_size;
+        uint32 lemma_count;
+        uint32 lemma_size;
+        uint32 delete_count;
+        uint32 delete_size;
+#ifdef ___SYNC_ENABLED___
+        uint32 sync_count;
+#endif
+        uint32 reclaim_ratio;
+        uint32 limit_lemma_count;
+        uint32 limit_lemma_size;
+    };
+
+    bool state(UserDictStat *stat);
+
+   private:
+    uint32 total_other_nfreq_;
+    struct timeval load_time_;
+    LemmaIdType start_id_;
+    uint32 version_;
+    uint8 *lemmas_;
+
+    // In-Memory-Only flag for each lemma
+    static const uint8 kUserDictLemmaFlagRemove = 1;
+    // Inuse lemmas' offset
+    uint32 *offsets_;
+    // Highest bit in offset tells whether corresponding lemma is removed
+    static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
+    // Maximum possible for the offset
+    static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
+    // Bit width for last modified time, from 1 to 16
+    static const uint32 kUserDictLMTBitWidth = 16;
+    // Granularity for last modified time in second
+    static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
+    // Maximum frequency count
+    static const uint16 kUserDictMaxFrequency = 0xFFFF;
+
+#define COARSE_UTC(year, month, day, hour, minute, second) ((year - 1970) * 365 * 24 * 60 * 60 + (month - 1) * 30 * 24 * 60 * 60 + (day - 1) * 24 * 60 * 60 + (hour - 0) * 60 * 60 + (minute - 0) * 60 + (second - 0))
+    static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);
+
+    // Correspond to offsets_
+    uint32 *scores_;
+    // Following two fields are only valid in memory
+    uint32 *ids_;
+#ifdef ___PREDICT_ENABLED___
+    uint32 *predicts_;
+#endif
+#ifdef ___SYNC_ENABLED___
+    uint32 *syncs_;
+    size_t sync_count_size_;
+#endif
+    uint32 *offsets_by_id_;
+
+    size_t lemma_count_left_;
+    size_t lemma_size_left_;
+
+    const char *dict_file_;
+
+    // Be sure size is 4xN
+    struct UserDictInfo {
+        // When limitation reached, how much percentage will be reclaimed (1 ~ 100)
+        uint32 reclaim_ratio;
+        // maximum lemma count, 0 means no limitation
+        uint32 limit_lemma_count;
+        // Maximum lemma size, it's different from
+        // whole disk file size or in-mem dict size
+        // 0 means no limitation
+        uint32 limit_lemma_size;
+        // Total lemma count including deleted and inuse
+        // Also indicate offsets_ size
+        uint32 lemma_count;
+        // Total size of lemmas including used and freed
+        uint32 lemma_size;
+        // Freed lemma count
+        uint32 free_count;
+        // Freed lemma size in byte
+        uint32 free_size;
+#ifdef ___SYNC_ENABLED___
+        uint32 sync_count;
+#endif
+        int32 total_nfreq;
+    } dict_info_;
+
+    static const uint32 kUserDictVersion = 0x0ABCDEF0;
+
+    static const uint32 kUserDictPreAlloc = 32;
+    static const uint32 kUserDictAverageNchar = 8;
+
+    enum UserDictState {
+        // Keep in order
+        USER_DICT_NONE = 0,
+        USER_DICT_SYNC,
+#ifdef ___SYNC_ENABLED___
+        USER_DICT_SYNC_DIRTY,
+#endif
+        USER_DICT_SCORE_DIRTY,
+        USER_DICT_OFFSET_DIRTY,
+        USER_DICT_LEMMA_DIRTY,
+
+        USER_DICT_DEFRAGMENTED,
+    } state_;
+
+    struct UserDictSearchable {
+        uint16 splids_len;
+        uint16 splid_start[kMaxLemmaSize];
+        uint16 splid_count[kMaxLemmaSize];
+        // Compact inital letters for both FuzzyCompareSpellId and cache system
+        uint32 signature[kMaxLemmaSize / 4];
+    };
+
+#ifdef ___CACHE_ENABLED___
+    enum UserDictCacheType {
+        USER_DICT_CACHE,
+        USER_DICT_MISS_CACHE,
+    };
+
+    static const int kUserDictCacheSize = 4;
+    static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;
+
+    struct UserDictMissCache {
+        uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
+        uint16 head, tail;
+    } miss_caches_[kMaxLemmaSize];
+
+    struct UserDictCache {
+        uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
+        uint32 offsets[kUserDictCacheSize];
+        uint32 lengths[kUserDictCacheSize];
+        // Ring buffer
+        uint16 head, tail;
+    } caches_[kMaxLemmaSize];
+
+    void cache_init();
+
+    void cache_push(UserDictCacheType type, UserDictSearchable *searchable, uint32 offset, uint32 length);
+
+    bool cache_hit(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
+
+    bool load_cache(UserDictSearchable *searchable, uint32 *offset, uint32 *length);
+
+    void save_cache(UserDictSearchable *searchable, uint32 offset, uint32 length);
+
+    void reset_cache();
+
+    bool load_miss_cache(UserDictSearchable *searchable);
+
+    void save_miss_cache(UserDictSearchable *searchable);
+
+    void reset_miss_cache();
+#endif
+
+    LmaScoreType translate_score(int f);
+
+    int extract_score_freq(int raw_score);
+
+    uint64 extract_score_lmt(int raw_score);
+
+    inline int build_score(uint64 lmt, int freq);
+
+    inline int64 utf16le_atoll(uint16 *s, int len);
+
+    inline int utf16le_lltoa(int64 v, uint16 *s, int size);
+
+    LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
+
+    size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend);
+
+    int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
+
+    int _get_lemma_score(LemmaIdType lemma_id);
+
+    int is_fuzzy_prefix_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
+
+    bool is_prefix_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
+
+    uint32 get_dict_file_size(UserDictInfo *info);
+
+    bool reset(const char *file);
+
+    bool validate(const char *file);
+
+    bool load(const char *file, LemmaIdType start_id);
+
+    bool is_valid_state();
+
+    bool is_valid_lemma_id(LemmaIdType id);
+
+    LemmaIdType get_max_lemma_id();
+
+    void set_lemma_flag(uint32 offset, uint8 flag);
+
+    char get_lemma_flag(uint32 offset);
+
+    char get_lemma_nchar(uint32 offset);
+
+    uint16 *get_lemma_spell_ids(uint32 offset);
+
+    uint16 *get_lemma_word(uint32 offset);
+
+    // Prepare searchable to fasten locate process
+    void prepare_locate(UserDictSearchable *searchable, const uint16 *splids, uint16 len);
+
+    // Compare initial letters only
+    int32 fuzzy_compare_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable);
+
+    // Compare exactly two spell ids
+    // First argument must be a full id spell id
+    bool equal_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable);
+
+    // Find first item by initial letters
+    int32 locate_first_in_offsets(const UserDictSearchable *searchable);
+
+    LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt);
+
+    // Check if a lemma is in dictionary
+    int32 locate_in_offsets(char16 lemma_str[], uint16 splid_str[], uint16 lemma_len);
+
+    bool remove_lemma_by_offset_index(int offset_index);
+#ifdef ___PREDICT_ENABLED___
+    uint32 locate_where_to_insert_in_predicts(const uint16 *words, int lemma_len);
+
+    int32 locate_first_in_predicts(const uint16 *words, int lemma_len);
+
+    void remove_lemma_from_predict_list(uint32 offset);
+#endif
+#ifdef ___SYNC_ENABLED___
+    void queue_lemma_for_sync(LemmaIdType id);
+
+    void remove_lemma_from_sync_list(uint32 offset);
+
+    void write_back_sync(int fd);
+#endif
+    void write_back_score(int fd);
+    void write_back_offset(int fd);
+    void write_back_lemma(int fd);
+    void write_back_all(int fd);
+    void write_back();
+
+    struct UserDictScoreOffsetPair {
+        int score;
+        uint32 offset_index;
+    };
+
+    inline void swap(UserDictScoreOffsetPair *sop, int i, int j);
+
+    void shift_down(UserDictScoreOffsetPair *sop, int i, int n);
+
+    // On-disk format for each lemma
+    // +-------------+
+    // | Version (4) |
+    // +-------------+
+    // +-----------+-----------+--------------------+-------------------+
+    // | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
+    // +-----------+-----------+--------------------+-------------------+
+    // ...
+    // +-----------------------+     +-------------+      <---Offset of offset
+    // | Offset1 by_splids (4) | ... | OffsetN (4) |
+    // +-----------------------+     +-------------+
+#ifdef ___PREDICT_ENABLED___
+    // +----------------------+     +-------------+
+    // | Offset1 by_lemma (4) | ... | OffsetN (4) |
+    // +----------------------+     +-------------+
+#endif
+    // +------------+     +------------+
+    // | Score1 (4) | ... | ScoreN (4) |
+    // +------------+     +------------+
+#ifdef ___SYNC_ENABLED___
+    // +-------------+     +-------------+
+    // | NewAdd1 (4) | ... | NewAddN (4) |
+    // +-------------+     +-------------+
+#endif
+    // +----------------+
+    // | Dict Info (4x) |
+    // +----------------+
+};
+}  // namespace ime_pinyin
+
+#endif
diff --git a/utf16char.h b/src/include/utf16char.h
similarity index 52%
rename from utf16char.h
rename to src/include/utf16char.h
index 7e957db..913a0cf 100644
--- a/utf16char.h
+++ b/src/include/utf16char.h
@@ -25,32 +25,30 @@ namespace ime_pinyin {
 extern "C" {
 #endif
 
-  typedef unsigned short char16;
+typedef unsigned short char16;
 
-  // Get a token from utf16_str,
-  // Returned pointer is a '\0'-terminated utf16 string, or NULL
-  // *utf16_str_next returns the next part of the string for further tokenizing
-  char16* utf16_strtok(char16 *utf16_str, size_t *token_size,
-                       char16 **utf16_str_next);
+// Get a token from utf16_str,
+// Returned pointer is a '\0'-terminated utf16 string, or NULL
+// *utf16_str_next returns the next part of the string for further tokenizing
+char16 *utf16_strtok(char16 *utf16_str, size_t *token_size, char16 **utf16_str_next);
 
-  int utf16_atoi(const char16 *utf16_str);
+int utf16_atoi(const char16 *utf16_str);
 
-  float utf16_atof(const char16 *utf16_str);
+float utf16_atof(const char16 *utf16_str);
 
-  size_t utf16_strlen(const char16 *utf16_str);
+size_t utf16_strlen(const char16 *utf16_str);
 
-  int utf16_strcmp(const char16 *str1, const char16 *str2);
-  int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size);
+int utf16_strcmp(const char16 *str1, const char16 *str2);
+int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size);
 
-  char16* utf16_strcpy(char16 *dst, const char16 *src);
-  char16* utf16_strncpy(char16 *dst, const char16 *src, size_t size);
+char16 *utf16_strcpy(char16 *dst, const char16 *src);
+char16 *utf16_strncpy(char16 *dst, const char16 *src, size_t size);
 
-
-  char* utf16_strcpy_tochar(char *dst, const char16 *src);
+char *utf16_strcpy_tochar(char *dst, const char16 *src);
 
 #ifdef __cplusplus
 }
 #endif
-}
+}  // namespace ime_pinyin
 
 #endif  // PINYINIME_INCLUDE_UTF16CHAR_H__
diff --git a/utf16reader.h b/src/include/utf16reader.h
similarity index 60%
rename from utf16reader.h
rename to src/include/utf16reader.h
index b6d6719..2a3e1d6 100644
--- a/utf16reader.h
+++ b/src/include/utf16reader.h
@@ -23,26 +23,26 @@
 namespace ime_pinyin {
 
 class Utf16Reader {
- private:
-  FILE *fp_;
-  char16 *buffer_;
-  size_t buffer_total_len_;
-  size_t buffer_next_pos_;
+   private:
+    FILE* fp_;
+    char16* buffer_;
+    size_t buffer_total_len_;
+    size_t buffer_next_pos_;
 
-  // Always less than buffer_total_len_ - buffer_next_pos_
-  size_t buffer_valid_len_;
+    // Always less than buffer_total_len_ - buffer_next_pos_
+    size_t buffer_valid_len_;
 
- public:
-  Utf16Reader();
-  ~Utf16Reader();
+   public:
+    Utf16Reader();
+    ~Utf16Reader();
 
-  // filename is the name of the file to open.
-  // buffer_len specifies how long buffer should be allocated to speed up the
-  // future reading
-  bool open(const char* filename, size_t buffer_len);
-  char16* readline(char16* read_buf, size_t max_len);
-  bool close();
+    // filename is the name of the file to open.
+    // buffer_len specifies how long buffer should be allocated to speed up the
+    // future reading
+    bool open(const char* filename, size_t buffer_len);
+    char16* readline(char16* read_buf, size_t max_len);
+    bool close();
 };
-}
+}  // namespace ime_pinyin
 
 #endif  // PINYINIME_INCLUDE_UTF16READER_H__
diff --git a/src/share/dictbuilder.cpp b/src/share/dictbuilder.cpp
new file mode 100644
index 0000000..7bf7695
--- /dev/null
+++ b/src/share/dictbuilder.cpp
@@ -0,0 +1,951 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../include/dictbuilder.h"
+#include "../include/dicttrie.h"
+#include "../include/mystdlib.h"
+#include "../include/ngram.h"
+#include "../include/searchutility.h"
+#include "../include/spellingtable.h"
+#include "../include/spellingtrie.h"
+#include "../include/splparser.h"
+#include "../include/utf16reader.h"
+
+namespace ime_pinyin {
+
+#ifdef ___BUILD_MODEL___
+
+static const size_t kReadBufLen = 512;
+static const size_t kSplTableHashLen = 2000;
+
+// Compare a SingleCharItem, first by Hanzis, then by spelling ids, then by
+// frequencies.
+int cmp_scis_hz_splid_freq(const void *p1, const void *p2) {
+    const SingleCharItem *s1, *s2;
+    s1 = static_cast<const SingleCharItem *>(p1);
+    s2 = static_cast<const SingleCharItem *>(p2);
+
+    if (s1->hz < s2->hz) return -1;
+    if (s1->hz > s2->hz) return 1;
+
+    if (s1->splid.half_splid < s2->splid.half_splid) return -1;
+    if (s1->splid.half_splid > s2->splid.half_splid) return 1;
+
+    if (s1->splid.full_splid < s2->splid.full_splid) return -1;
+    if (s1->splid.full_splid > s2->splid.full_splid) return 1;
+
+    if (s1->freq > s2->freq) return -1;
+    if (s1->freq < s2->freq) return 1;
+    return 0;
+}
+
+int cmp_scis_hz_splid(const void *p1, const void *p2) {
+    const SingleCharItem *s1, *s2;
+    s1 = static_cast<const SingleCharItem *>(p1);
+    s2 = static_cast<const SingleCharItem *>(p2);
+
+    if (s1->hz < s2->hz) return -1;
+    if (s1->hz > s2->hz) return 1;
+
+    if (s1->splid.half_splid < s2->splid.half_splid) return -1;
+    if (s1->splid.half_splid > s2->splid.half_splid) return 1;
+
+    if (s1->splid.full_splid < s2->splid.full_splid) return -1;
+    if (s1->splid.full_splid > s2->splid.full_splid) return 1;
+
+    return 0;
+}
+
+int cmp_lemma_entry_hzs(const void *p1, const void *p2) {
+    size_t size1 = utf16_strlen(((const LemmaEntry *)p1)->hanzi_str);
+    size_t size2 = utf16_strlen(((const LemmaEntry *)p2)->hanzi_str);
+    if (size1 < size2)
+        return -1;
+    else if (size1 > size2)
+        return 1;
+
+    return utf16_strcmp(((const LemmaEntry *)p1)->hanzi_str, ((const LemmaEntry *)p2)->hanzi_str);
+}
+
+int compare_char16(const void *p1, const void *p2) {
+    if (*((const char16 *)p1) < *((const char16 *)p2)) return -1;
+    if (*((const char16 *)p1) > *((const char16 *)p2)) return 1;
+    return 0;
+}
+
+int compare_py(const void *p1, const void *p2) {
+    int ret = utf16_strcmp(((const LemmaEntry *)p1)->spl_idx_arr, ((const LemmaEntry *)p2)->spl_idx_arr);
+
+    if (0 != ret) return ret;
+
+    return static_cast<int>(((const LemmaEntry *)p2)->freq) - static_cast<int>(((const LemmaEntry *)p1)->freq);
+}
+
+// First hanzi, if the same, then Pinyin
+int cmp_lemma_entry_hzspys(const void *p1, const void *p2) {
+    size_t size1 = utf16_strlen(((const LemmaEntry *)p1)->hanzi_str);
+    size_t size2 = utf16_strlen(((const LemmaEntry *)p2)->hanzi_str);
+    if (size1 < size2)
+        return -1;
+    else if (size1 > size2)
+        return 1;
+    int ret = utf16_strcmp(((const LemmaEntry *)p1)->hanzi_str, ((const LemmaEntry *)p2)->hanzi_str);
+
+    if (0 != ret) return ret;
+
+    ret = utf16_strcmp(((const LemmaEntry *)p1)->spl_idx_arr, ((const LemmaEntry *)p2)->spl_idx_arr);
+    return ret;
+}
+
+int compare_splid2(const void *p1, const void *p2) {
+    int ret = utf16_strcmp(((const LemmaEntry *)p1)->spl_idx_arr, ((const LemmaEntry *)p2)->spl_idx_arr);
+    return ret;
+}
+
+DictBuilder::DictBuilder() {
+    lemma_arr_ = NULL;
+    lemma_num_ = 0;
+
+    scis_ = NULL;
+    scis_num_ = 0;
+
+    lma_nodes_le0_ = NULL;
+    lma_nodes_ge1_ = NULL;
+
+    lma_nds_used_num_le0_ = 0;
+    lma_nds_used_num_ge1_ = 0;
+
+    homo_idx_buf_ = NULL;
+    homo_idx_num_eq1_ = 0;
+    homo_idx_num_gt1_ = 0;
+
+    top_lmas_ = NULL;
+    top_lmas_num_ = 0;
+
+    spl_table_ = NULL;
+    spl_parser_ = NULL;
+}
+
+DictBuilder::~DictBuilder() { free_resource(); }
+
+bool DictBuilder::alloc_resource(size_t lma_num) {
+    if (0 == lma_num) return false;
+
+    free_resource();
+
+    lemma_num_ = lma_num;
+    lemma_arr_ = new LemmaEntry[lemma_num_];
+
+    top_lmas_num_ = 0;
+    top_lmas_ = new LemmaEntry[kTopScoreLemmaNum];
+
+    // New the scis_ buffer to the possible maximum size.
+    scis_num_ = lemma_num_ * kMaxLemmaSize;
+    scis_ = new SingleCharItem[scis_num_];
+
+    // The root and first level nodes is less than kMaxSpellingNum + 1
+    lma_nds_used_num_le0_ = 0;
+    lma_nodes_le0_ = new LmaNodeLE0[kMaxSpellingNum + 1];
+
+    // Other nodes is less than lemma_num
+    lma_nds_used_num_ge1_ = 0;
+    lma_nodes_ge1_ = new LmaNodeGE1[lemma_num_];
+
+    homo_idx_buf_ = new LemmaIdType[lemma_num_];
+    spl_table_ = new SpellingTable();
+    spl_parser_ = new SpellingParser();
+
+    if (NULL == lemma_arr_ || NULL == top_lmas_ || NULL == scis_ || NULL == spl_table_ || NULL == spl_parser_ || NULL == lma_nodes_le0_ || NULL == lma_nodes_ge1_ || NULL == homo_idx_buf_) {
+        free_resource();
+        return false;
+    }
+
+    memset(lemma_arr_, 0, sizeof(LemmaEntry) * lemma_num_);
+    memset(scis_, 0, sizeof(SingleCharItem) * scis_num_);
+    memset(lma_nodes_le0_, 0, sizeof(LmaNodeLE0) * (kMaxSpellingNum + 1));
+    memset(lma_nodes_ge1_, 0, sizeof(LmaNodeGE1) * lemma_num_);
+    memset(homo_idx_buf_, 0, sizeof(LemmaIdType) * lemma_num_);
+    spl_table_->init_table(kMaxPinyinSize, kSplTableHashLen, true);
+
+    return true;
+}
+
+char16 *DictBuilder::read_valid_hanzis(const char *fn_validhzs, size_t *num) {
+    if (NULL == fn_validhzs || NULL == num) return NULL;
+
+    *num = 0;
+    FILE *fp = fopen(fn_validhzs, "rb");
+    if (NULL == fp) return NULL;
+
+    char16 utf16header;
+    if (fread(&utf16header, sizeof(char16), 1, fp) != 1 || 0xfeff != utf16header) {
+        fclose(fp);
+        return NULL;
+    }
+
+    fseek(fp, 0, SEEK_END);
+    *num = ftell(fp) / sizeof(char16);
+    assert(*num >= 1);
+    *num -= 1;
+
+    char16 *hzs = new char16[*num];
+    if (NULL == hzs) {
+        fclose(fp);
+        return NULL;
+    }
+
+    fseek(fp, 2, SEEK_SET);
+
+    if (fread(hzs, sizeof(char16), *num, fp) != *num) {
+        fclose(fp);
+        delete[] hzs;
+        return NULL;
+    }
+    fclose(fp);
+
+    myqsort(hzs, *num, sizeof(char16), compare_char16);
+    return hzs;
+}
+
+bool DictBuilder::hz_in_hanzis_list(const char16 *hzs, size_t hzs_len, char16 hz) {
+    if (NULL == hzs) return false;
+
+    char16 *found;
+    found = static_cast<char16 *>(mybsearch(&hz, hzs, hzs_len, sizeof(char16), compare_char16));
+    if (NULL == found) return false;
+
+    assert(*found == hz);
+    return true;
+}
+
+// The caller makes sure that the parameters are valid.
+bool DictBuilder::str_in_hanzis_list(const char16 *hzs, size_t hzs_len, const char16 *str, size_t str_len) {
+    if (NULL == hzs || NULL == str) return false;
+
+    for (size_t pos = 0; pos < str_len; pos++) {
+        if (!hz_in_hanzis_list(hzs, hzs_len, str[pos])) return false;
+    }
+    return true;
+}
+
+void DictBuilder::get_top_lemmas() {
+    top_lmas_num_ = 0;
+    if (NULL == lemma_arr_) return;
+
+    for (size_t pos = 0; pos < lemma_num_; pos++) {
+        if (0 == top_lmas_num_) {
+            top_lmas_[0] = lemma_arr_[pos];
+            top_lmas_num_ = 1;
+            continue;
+        }
+
+        if (lemma_arr_[pos].freq > top_lmas_[top_lmas_num_ - 1].freq) {
+            if (kTopScoreLemmaNum > top_lmas_num_) top_lmas_num_ += 1;
+
+            size_t move_pos;
+            for (move_pos = top_lmas_num_ - 1; move_pos > 0; move_pos--) {
+                top_lmas_[move_pos] = top_lmas_[move_pos - 1];
+                if (0 == move_pos - 1 || (move_pos - 1 > 0 && top_lmas_[move_pos - 2].freq > lemma_arr_[pos].freq)) {
+                    break;
+                }
+            }
+            assert(move_pos > 0);
+            top_lmas_[move_pos - 1] = lemma_arr_[pos];
+        } else if (kTopScoreLemmaNum > top_lmas_num_) {
+            top_lmas_[top_lmas_num_] = lemma_arr_[pos];
+            top_lmas_num_ += 1;
+        }
+    }
+
+    if (kPrintDebug0) {
+        printf("\n------Top Lemmas------------------\n");
+        for (size_t pos = 0; pos < top_lmas_num_; pos++) {
+            printf("--%d, idx:%06d, score:%.5f\n", pos, top_lmas_[pos].idx_by_hz, top_lmas_[pos].freq);
+        }
+    }
+}
+
+void DictBuilder::free_resource() {
+    if (NULL != lemma_arr_) delete[] lemma_arr_;
+
+    if (NULL != scis_) delete[] scis_;
+
+    if (NULL != lma_nodes_le0_) delete[] lma_nodes_le0_;
+
+    if (NULL != lma_nodes_ge1_) delete[] lma_nodes_ge1_;
+
+    if (NULL != homo_idx_buf_) delete[] homo_idx_buf_;
+
+    if (NULL != spl_table_) delete spl_table_;
+
+    if (NULL != spl_parser_) delete spl_parser_;
+
+    lemma_arr_ = NULL;
+    scis_ = NULL;
+    lma_nodes_le0_ = NULL;
+    lma_nodes_ge1_ = NULL;
+    homo_idx_buf_ = NULL;
+    spl_table_ = NULL;
+    spl_parser_ = NULL;
+
+    lemma_num_ = 0;
+    lma_nds_used_num_le0_ = 0;
+    lma_nds_used_num_ge1_ = 0;
+    homo_idx_num_eq1_ = 0;
+    homo_idx_num_gt1_ = 0;
+}
+
+size_t DictBuilder::read_raw_dict(const char *fn_raw, const char *fn_validhzs, size_t max_item) {
+    if (NULL == fn_raw) return 0;
+
+    Utf16Reader utf16_reader;
+    if (!utf16_reader.open(fn_raw, kReadBufLen * 10)) return false;
+
+    char16 read_buf[kReadBufLen];
+
+    // Read the number of lemmas in the file
+    size_t lemma_num = 240000;
+
+    // allocate resource required
+    if (!alloc_resource(lemma_num)) {
+        utf16_reader.close();
+    }
+
+    // Read the valid Hanzi list.
+    char16 *valid_hzs = NULL;
+    size_t valid_hzs_num = 0;
+    valid_hzs = read_valid_hanzis(fn_validhzs, &valid_hzs_num);
+
+    // Begin reading the lemma entries
+    for (size_t i = 0; i < max_item; i++) {
+        // read next entry
+        if (!utf16_reader.readline(read_buf, kReadBufLen)) {
+            lemma_num = i;
+            break;
+        }
+
+        size_t token_size;
+        char16 *token;
+        char16 *to_tokenize = read_buf;
+
+        // Get the Hanzi string
+        token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
+        if (NULL == token) {
+            free_resource();
+            utf16_reader.close();
+            return false;
+        }
+
+        size_t lemma_size = utf16_strlen(token);
+
+        if (lemma_size > kMaxLemmaSize) {
+            i--;
+            continue;
+        }
+
+        if (lemma_size > 4) {
+            i--;
+            continue;
+        }
+
+        // Copy to the lemma entry
+        utf16_strcpy(lemma_arr_[i].hanzi_str, token);
+
+        lemma_arr_[i].hz_str_len = token_size;
+
+        // Get the freq string
+        token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
+        if (NULL == token) {
+            free_resource();
+            utf16_reader.close();
+            return false;
+        }
+        lemma_arr_[i].freq = utf16_atof(token);
+
+        if (lemma_size > 1 && lemma_arr_[i].freq < 60) {
+            i--;
+            continue;
+        }
+
+        // Get GBK mark, if no valid Hanzi list available, all items which contains
+        // GBK characters will be discarded. Otherwise, all items which contains
+        // characters outside of the valid Hanzi list will be discarded.
+        token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
+        assert(NULL != token);
+        int gbk_flag = utf16_atoi(token);
+        if (NULL == valid_hzs || 0 == valid_hzs_num) {
+            if (0 != gbk_flag) {
+                i--;
+                continue;
+            }
+        } else {
+            if (!str_in_hanzis_list(valid_hzs, valid_hzs_num, lemma_arr_[i].hanzi_str, lemma_arr_[i].hz_str_len)) {
+                i--;
+                continue;
+            }
+        }
+
+        // Get spelling String
+        bool spelling_not_support = false;
+        for (size_t hz_pos = 0; hz_pos < (size_t)lemma_arr_[i].hz_str_len; hz_pos++) {
+            // Get a Pinyin
+            token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
+            if (NULL == token) {
+                free_resource();
+                utf16_reader.close();
+                return false;
+            }
+
+            assert(utf16_strlen(token) <= kMaxPinyinSize);
+
+            utf16_strcpy_tochar(lemma_arr_[i].pinyin_str[hz_pos], token);
+
+            format_spelling_str(lemma_arr_[i].pinyin_str[hz_pos]);
+
+            // Put the pinyin to the spelling table
+            if (!spl_table_->put_spelling(lemma_arr_[i].pinyin_str[hz_pos], lemma_arr_[i].freq)) {
+                spelling_not_support = true;
+                break;
+            }
+        }
+
+        // The whole line must have been parsed fully, otherwise discard this one.
+        token = utf16_strtok(to_tokenize, &token_size, &to_tokenize);
+        if (spelling_not_support || NULL != token) {
+            i--;
+            continue;
+        }
+    }
+
+    delete[] valid_hzs;
+    utf16_reader.close();
+
+    printf("read succesfully, lemma num: %d\n", lemma_num);
+
+    return lemma_num;
+}
+
+bool DictBuilder::build_dict(const char *fn_raw, const char *fn_validhzs, DictTrie *dict_trie) {
+    if (NULL == fn_raw || NULL == dict_trie) return false;
+
+    lemma_num_ = read_raw_dict(fn_raw, fn_validhzs, 240000);
+    if (0 == lemma_num_) return false;
+
+    // Arrange the spelling table, and build a spelling tree
+    // The size of an spelling. '\0' is included. If the spelling table is
+    // initialized to calculate the spelling scores, the last char in the
+    // spelling string will be score, and it is also included in spl_item_size.
+    size_t spl_item_size;
+    size_t spl_num;
+    const char *spl_buf;
+    spl_buf = spl_table_->arrange(&spl_item_size, &spl_num);
+    if (NULL == spl_buf) {
+        free_resource();
+        return false;
+    }
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+
+    if (!spl_trie.construct(spl_buf, spl_item_size, spl_num, spl_table_->get_score_amplifier(), spl_table_->get_average_score())) {
+        free_resource();
+        return false;
+    }
+
+    printf("spelling tree construct successfully.\n");
+
+    // Convert the spelling string to idxs
+    for (size_t i = 0; i < lemma_num_; i++) {
+        for (size_t hz_pos = 0; hz_pos < (size_t)lemma_arr_[i].hz_str_len; hz_pos++) {
+            uint16 spl_idxs[2];
+            uint16 spl_start_pos[3];
+            bool is_pre = true;
+            int spl_idx_num = spl_parser_->splstr_to_idxs(lemma_arr_[i].pinyin_str[hz_pos], strlen(lemma_arr_[i].pinyin_str[hz_pos]), spl_idxs, spl_start_pos, 2, is_pre);
+            assert(1 == spl_idx_num);
+
+            if (spl_trie.is_half_id(spl_idxs[0])) {
+                uint16 num = spl_trie.half_to_full(spl_idxs[0], spl_idxs);
+                assert(0 != num);
+            }
+            lemma_arr_[i].spl_idx_arr[hz_pos] = spl_idxs[0];
+        }
+    }
+
+    // Sort the lemma items according to the hanzi, and give each unique item a
+    // id
+    sort_lemmas_by_hz();
+
+    scis_num_ = build_scis();
+
+    // Construct the dict list
+    dict_trie->dict_list_ = new DictList();
+    bool dl_success = dict_trie->dict_list_->init_list(scis_, scis_num_, lemma_arr_, lemma_num_);
+    assert(dl_success);
+
+    // Construct the NGram information
+    NGram &ngram = NGram::get_instance();
+    ngram.build_unigram(lemma_arr_, lemma_num_, lemma_arr_[lemma_num_ - 1].idx_by_hz + 1);
+
+    // sort the lemma items according to the spelling idx string
+    myqsort(lemma_arr_, lemma_num_, sizeof(LemmaEntry), compare_py);
+
+    get_top_lemmas();
+
+#ifdef ___DO_STATISTICS___
+    stat_init();
+#endif
+
+    lma_nds_used_num_le0_ = 1;  // The root node
+    bool dt_success = construct_subset(static_cast<void *>(lma_nodes_le0_), lemma_arr_, 0, lemma_num_, 0);
+    if (!dt_success) {
+        free_resource();
+        return false;
+    }
+
+#ifdef ___DO_STATISTICS___
+    stat_print();
+#endif
+
+    // Move the node data and homo data to the DictTrie
+    dict_trie->root_ = new LmaNodeLE0[lma_nds_used_num_le0_];
+    dict_trie->nodes_ge1_ = new LmaNodeGE1[lma_nds_used_num_ge1_];
+    size_t lma_idx_num = homo_idx_num_eq1_ + homo_idx_num_gt1_ + top_lmas_num_;
+    dict_trie->lma_idx_buf_ = new unsigned char[lma_idx_num * kLemmaIdSize];
+    assert(NULL != dict_trie->root_);
+    assert(NULL != dict_trie->lma_idx_buf_);
+    dict_trie->lma_node_num_le0_ = lma_nds_used_num_le0_;
+    dict_trie->lma_node_num_ge1_ = lma_nds_used_num_ge1_;
+    dict_trie->lma_idx_buf_len_ = lma_idx_num * kLemmaIdSize;
+    dict_trie->top_lmas_num_ = top_lmas_num_;
+
+    memcpy(dict_trie->root_, lma_nodes_le0_, sizeof(LmaNodeLE0) * lma_nds_used_num_le0_);
+    memcpy(dict_trie->nodes_ge1_, lma_nodes_ge1_, sizeof(LmaNodeGE1) * lma_nds_used_num_ge1_);
+
+    for (size_t pos = 0; pos < homo_idx_num_eq1_ + homo_idx_num_gt1_; pos++) {
+        id_to_charbuf(dict_trie->lma_idx_buf_ + pos * kLemmaIdSize, homo_idx_buf_[pos]);
+    }
+
+    for (size_t pos = homo_idx_num_eq1_ + homo_idx_num_gt1_; pos < lma_idx_num; pos++) {
+        LemmaIdType idx = top_lmas_[pos - homo_idx_num_eq1_ - homo_idx_num_gt1_].idx_by_hz;
+        id_to_charbuf(dict_trie->lma_idx_buf_ + pos * kLemmaIdSize, idx);
+    }
+
+    if (kPrintDebug0) {
+        printf("homo_idx_num_eq1_: %d\n", homo_idx_num_eq1_);
+        printf("homo_idx_num_gt1_: %d\n", homo_idx_num_gt1_);
+        printf("top_lmas_num_: %d\n", top_lmas_num_);
+    }
+
+    free_resource();
+
+    if (kPrintDebug0) {
+        printf("Building dict succeds\n");
+    }
+    return dt_success;
+}
+
+void DictBuilder::id_to_charbuf(unsigned char *buf, LemmaIdType id) {
+    if (NULL == buf) return;
+    for (size_t pos = 0; pos < kLemmaIdSize; pos++) {
+        (buf)[pos] = (unsigned char)(id >> (pos * 8));
+    }
+}
+
+void DictBuilder::set_son_offset(LmaNodeGE1 *node, size_t offset) {
+    node->son_1st_off_l = static_cast<uint16>(offset);
+    node->son_1st_off_h = static_cast<unsigned char>(offset >> 16);
+}
+
+void DictBuilder::set_homo_id_buf_offset(LmaNodeGE1 *node, size_t offset) {
+    node->homo_idx_buf_off_l = static_cast<uint16>(offset);
+    node->homo_idx_buf_off_h = static_cast<unsigned char>(offset >> 16);
+}
+
+// All spelling strings will be converted to upper case, except that
+// spellings started with "ZH"/"CH"/"SH" will be converted to
+// "Zh"/"Ch"/"Sh"
+void DictBuilder::format_spelling_str(char *spl_str) {
+    if (NULL == spl_str) return;
+
+    uint16 pos = 0;
+    while ('\0' != spl_str[pos]) {
+        if (spl_str[pos] >= 'a' && spl_str[pos] <= 'z') spl_str[pos] = spl_str[pos] - 'a' + 'A';
+
+        if (1 == pos && 'H' == spl_str[pos]) {
+            if ('C' == spl_str[0] || 'S' == spl_str[0] || 'Z' == spl_str[0]) {
+                spl_str[pos] = 'h';
+            }
+        }
+        pos++;
+    }
+}
+
+LemmaIdType DictBuilder::sort_lemmas_by_hz() {
+    if (NULL == lemma_arr_ || 0 == lemma_num_) return 0;
+
+    myqsort(lemma_arr_, lemma_num_, sizeof(LemmaEntry), cmp_lemma_entry_hzs);
+
+    lemma_arr_[0].idx_by_hz = 1;
+    LemmaIdType idx_max = 1;
+    for (size_t i = 1; i < lemma_num_; i++) {
+        if (utf16_strcmp(lemma_arr_[i].hanzi_str, lemma_arr_[i - 1].hanzi_str)) {
+            idx_max++;
+            lemma_arr_[i].idx_by_hz = idx_max;
+        } else {
+            idx_max++;
+            lemma_arr_[i].idx_by_hz = idx_max;
+        }
+    }
+    return idx_max + 1;
+}
+
+size_t DictBuilder::build_scis() {
+    if (NULL == scis_ || lemma_num_ * kMaxLemmaSize > scis_num_) return 0;
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+
+    // This first one is blank, because id 0 is invalid.
+    scis_[0].freq = 0;
+    scis_[0].hz = 0;
+    scis_[0].splid.full_splid = 0;
+    scis_[0].splid.half_splid = 0;
+    scis_num_ = 1;
+
+    // Copy the hanzis to the buffer
+    for (size_t pos = 0; pos < lemma_num_; pos++) {
+        size_t hz_num = lemma_arr_[pos].hz_str_len;
+        for (size_t hzpos = 0; hzpos < hz_num; hzpos++) {
+            scis_[scis_num_].hz = lemma_arr_[pos].hanzi_str[hzpos];
+            scis_[scis_num_].splid.full_splid = lemma_arr_[pos].spl_idx_arr[hzpos];
+            scis_[scis_num_].splid.half_splid = spl_trie.full_to_half(scis_[scis_num_].splid.full_splid);
+            if (1 == hz_num)
+                scis_[scis_num_].freq = lemma_arr_[pos].freq;
+            else
+                scis_[scis_num_].freq = 0.000001;
+            scis_num_++;
+        }
+    }
+
+    myqsort(scis_, scis_num_, sizeof(SingleCharItem), cmp_scis_hz_splid_freq);
+
+    // Remove repeated items
+    size_t unique_scis_num = 1;
+    for (size_t pos = 1; pos < scis_num_; pos++) {
+        if (scis_[pos].hz == scis_[pos - 1].hz && scis_[pos].splid.full_splid == scis_[pos - 1].splid.full_splid) continue;
+        scis_[unique_scis_num] = scis_[pos];
+        scis_[unique_scis_num].splid.half_splid = spl_trie.full_to_half(scis_[pos].splid.full_splid);
+        unique_scis_num++;
+    }
+
+    scis_num_ = unique_scis_num;
+
+    // Update the lemma list.
+    for (size_t pos = 0; pos < lemma_num_; pos++) {
+        size_t hz_num = lemma_arr_[pos].hz_str_len;
+        for (size_t hzpos = 0; hzpos < hz_num; hzpos++) {
+            SingleCharItem key;
+            key.hz = lemma_arr_[pos].hanzi_str[hzpos];
+            key.splid.full_splid = lemma_arr_[pos].spl_idx_arr[hzpos];
+            key.splid.half_splid = spl_trie.full_to_half(key.splid.full_splid);
+
+            SingleCharItem *found;
+            found = static_cast<SingleCharItem *>(mybsearch(&key, scis_, unique_scis_num, sizeof(SingleCharItem), cmp_scis_hz_splid));
+
+            assert(found);
+
+            lemma_arr_[pos].hanzi_scis_ids[hzpos] = static_cast<uint16>(found - scis_);
+            lemma_arr_[pos].spl_idx_arr[hzpos] = found->splid.full_splid;
+        }
+    }
+
+    return scis_num_;
+}
+
+bool DictBuilder::construct_subset(void *parent, LemmaEntry *lemma_arr, size_t item_start, size_t item_end, size_t level) {
+    if (level >= kMaxLemmaSize || item_end <= item_start) return false;
+
+    // 1. Scan for how many sons
+    size_t parent_son_num = 0;
+    // LemmaNode *son_1st = NULL;
+    // parent.num_of_son = 0;
+
+    LemmaEntry *lma_last_start = lemma_arr_ + item_start;
+    uint16 spl_idx_node = lma_last_start->spl_idx_arr[level];
+
+    // Scan for how many sons to be allocaed
+    for (size_t i = item_start + 1; i < item_end; i++) {
+        LemmaEntry *lma_current = lemma_arr + i;
+        uint16 spl_idx_current = lma_current->spl_idx_arr[level];
+        if (spl_idx_current != spl_idx_node) {
+            parent_son_num++;
+            spl_idx_node = spl_idx_current;
+        }
+    }
+    parent_son_num++;
+
+#ifdef ___DO_STATISTICS___
+    // Use to indicate whether all nodes of this layer have no son.
+    bool allson_noson = true;
+
+    assert(level < kMaxLemmaSize);
+    if (parent_son_num > max_sonbuf_len_[level]) max_sonbuf_len_[level] = parent_son_num;
+
+    total_son_num_[level] += parent_son_num;
+    total_sonbuf_num_[level] += 1;
+
+    if (parent_son_num == 1)
+        sonbufs_num1_++;
+    else
+        sonbufs_numgt1_++;
+    total_lma_node_num_ += parent_son_num;
+#endif
+
+    // 2. Update the parent's information
+    //    Update the parent's son list;
+    LmaNodeLE0 *son_1st_le0 = NULL;  // only one of le0 or ge1 is used
+    LmaNodeGE1 *son_1st_ge1 = NULL;  // only one of le0 or ge1 is used.
+    if (0 == level) {                // the parent is root
+        (static_cast<LmaNodeLE0 *>(parent))->son_1st_off = lma_nds_used_num_le0_;
+        son_1st_le0 = lma_nodes_le0_ + lma_nds_used_num_le0_;
+        lma_nds_used_num_le0_ += parent_son_num;
+
+        assert(parent_son_num <= 65535);
+        (static_cast<LmaNodeLE0 *>(parent))->num_of_son = static_cast<uint16>(parent_son_num);
+    } else if (1 == level) {  // the parent is a son of root
+        (static_cast<LmaNodeLE0 *>(parent))->son_1st_off = lma_nds_used_num_ge1_;
+        son_1st_ge1 = lma_nodes_ge1_ + lma_nds_used_num_ge1_;
+        lma_nds_used_num_ge1_ += parent_son_num;
+
+        assert(parent_son_num <= 65535);
+        (static_cast<LmaNodeLE0 *>(parent))->num_of_son = static_cast<uint16>(parent_son_num);
+    } else {
+        set_son_offset((static_cast<LmaNodeGE1 *>(parent)), lma_nds_used_num_ge1_);
+        son_1st_ge1 = lma_nodes_ge1_ + lma_nds_used_num_ge1_;
+        lma_nds_used_num_ge1_ += parent_son_num;
+
+        assert(parent_son_num <= 255);
+        (static_cast<LmaNodeGE1 *>(parent))->num_of_son = (unsigned char)parent_son_num;
+    }
+
+    // 3. Now begin to construct the son one by one
+    size_t son_pos = 0;
+
+    lma_last_start = lemma_arr_ + item_start;
+    spl_idx_node = lma_last_start->spl_idx_arr[level];
+
+    size_t homo_num = 0;
+    if (lma_last_start->spl_idx_arr[level + 1] == 0) homo_num = 1;
+
+    size_t item_start_next = item_start;
+
+    for (size_t i = item_start + 1; i < item_end; i++) {
+        LemmaEntry *lma_current = lemma_arr_ + i;
+        uint16 spl_idx_current = lma_current->spl_idx_arr[level];
+
+        if (spl_idx_current == spl_idx_node) {
+            if (lma_current->spl_idx_arr[level + 1] == 0) homo_num++;
+        } else {
+            // Construct a node
+            LmaNodeLE0 *node_cur_le0 = NULL;  // only one of them is valid
+            LmaNodeGE1 *node_cur_ge1 = NULL;
+            if (0 == level) {
+                node_cur_le0 = son_1st_le0 + son_pos;
+                node_cur_le0->spl_idx = spl_idx_node;
+                node_cur_le0->homo_idx_buf_off = homo_idx_num_eq1_ + homo_idx_num_gt1_;
+                node_cur_le0->son_1st_off = 0;
+                homo_idx_num_eq1_ += homo_num;
+            } else {
+                node_cur_ge1 = son_1st_ge1 + son_pos;
+                node_cur_ge1->spl_idx = spl_idx_node;
+
+                set_homo_id_buf_offset(node_cur_ge1, (homo_idx_num_eq1_ + homo_idx_num_gt1_));
+                set_son_offset(node_cur_ge1, 0);
+                homo_idx_num_gt1_ += homo_num;
+            }
+
+            if (homo_num > 0) {
+                LemmaIdType *idx_buf = homo_idx_buf_ + homo_idx_num_eq1_ + homo_idx_num_gt1_ - homo_num;
+                if (0 == level) {
+                    assert(homo_num <= 65535);
+                    node_cur_le0->num_of_homo = static_cast<uint16>(homo_num);
+                } else {
+                    assert(homo_num <= 255);
+                    node_cur_ge1->num_of_homo = (unsigned char)homo_num;
+                }
+
+                for (size_t homo_pos = 0; homo_pos < homo_num; homo_pos++) {
+                    idx_buf[homo_pos] = lemma_arr_[item_start_next + homo_pos].idx_by_hz;
+                }
+
+#ifdef ___DO_STATISTICS___
+                if (homo_num > max_homobuf_len_[level]) max_homobuf_len_[level] = homo_num;
+
+                total_homo_num_[level] += homo_num;
+#endif
+            }
+
+            if (i - item_start_next > homo_num) {
+                void *next_parent;
+                if (0 == level)
+                    next_parent = static_cast<void *>(node_cur_le0);
+                else
+                    next_parent = static_cast<void *>(node_cur_ge1);
+                construct_subset(next_parent, lemma_arr, item_start_next + homo_num, i, level + 1);
+#ifdef ___DO_STATISTICS___
+
+                total_node_hasson_[level] += 1;
+                allson_noson = false;
+#endif
+            }
+
+            // for the next son
+            lma_last_start = lma_current;
+            spl_idx_node = spl_idx_current;
+            item_start_next = i;
+            homo_num = 0;
+            if (lma_current->spl_idx_arr[level + 1] == 0) homo_num = 1;
+
+            son_pos++;
+        }
+    }
+
+    // 4. The last one to construct
+    LmaNodeLE0 *node_cur_le0 = NULL;  // only one of them is valid
+    LmaNodeGE1 *node_cur_ge1 = NULL;
+    if (0 == level) {
+        node_cur_le0 = son_1st_le0 + son_pos;
+        node_cur_le0->spl_idx = spl_idx_node;
+        node_cur_le0->homo_idx_buf_off = homo_idx_num_eq1_ + homo_idx_num_gt1_;
+        node_cur_le0->son_1st_off = 0;
+        homo_idx_num_eq1_ += homo_num;
+    } else {
+        node_cur_ge1 = son_1st_ge1 + son_pos;
+        node_cur_ge1->spl_idx = spl_idx_node;
+
+        set_homo_id_buf_offset(node_cur_ge1, (homo_idx_num_eq1_ + homo_idx_num_gt1_));
+        set_son_offset(node_cur_ge1, 0);
+        homo_idx_num_gt1_ += homo_num;
+    }
+
+    if (homo_num > 0) {
+        LemmaIdType *idx_buf = homo_idx_buf_ + homo_idx_num_eq1_ + homo_idx_num_gt1_ - homo_num;
+        if (0 == level) {
+            assert(homo_num <= 65535);
+            node_cur_le0->num_of_homo = static_cast<uint16>(homo_num);
+        } else {
+            assert(homo_num <= 255);
+            node_cur_ge1->num_of_homo = (unsigned char)homo_num;
+        }
+
+        for (size_t homo_pos = 0; homo_pos < homo_num; homo_pos++) {
+            idx_buf[homo_pos] = lemma_arr[item_start_next + homo_pos].idx_by_hz;
+        }
+
+#ifdef ___DO_STATISTICS___
+        if (homo_num > max_homobuf_len_[level]) max_homobuf_len_[level] = homo_num;
+
+        total_homo_num_[level] += homo_num;
+#endif
+    }
+
+    if (item_end - item_start_next > homo_num) {
+        void *next_parent;
+        if (0 == level)
+            next_parent = static_cast<void *>(node_cur_le0);
+        else
+            next_parent = static_cast<void *>(node_cur_ge1);
+        construct_subset(next_parent, lemma_arr, item_start_next + homo_num, item_end, level + 1);
+#ifdef ___DO_STATISTICS___
+
+        total_node_hasson_[level] += 1;
+        allson_noson = false;
+#endif
+    }
+
+#ifdef ___DO_STATISTICS___
+    if (allson_noson) {
+        total_sonbuf_allnoson_[level] += 1;
+        total_node_in_sonbuf_allnoson_[level] += parent_son_num;
+    }
+#endif
+
+    assert(son_pos + 1 == parent_son_num);
+    return true;
+}
+
+#ifdef ___DO_STATISTICS___
+void DictBuilder::stat_init() {
+    memset(max_sonbuf_len_, 0, sizeof(size_t) * kMaxLemmaSize);
+    memset(max_homobuf_len_, 0, sizeof(size_t) * kMaxLemmaSize);
+    memset(total_son_num_, 0, sizeof(size_t) * kMaxLemmaSize);
+    memset(total_node_hasson_, 0, sizeof(size_t) * kMaxLemmaSize);
+    memset(total_sonbuf_num_, 0, sizeof(size_t) * kMaxLemmaSize);
+    memset(total_sonbuf_allnoson_, 0, sizeof(size_t) * kMaxLemmaSize);
+    memset(total_node_in_sonbuf_allnoson_, 0, sizeof(size_t) * kMaxLemmaSize);
+    memset(total_homo_num_, 0, sizeof(size_t) * kMaxLemmaSize);
+
+    sonbufs_num1_ = 0;
+    sonbufs_numgt1_ = 0;
+    total_lma_node_num_ = 0;
+}
+
+void DictBuilder::stat_print() {
+    printf("\n------------STAT INFO-------------\n");
+    printf("[root is layer -1]\n");
+    printf(".. max_sonbuf_len per layer(from layer 0):\n   ");
+    for (size_t i = 0; i < kMaxLemmaSize; i++) printf("%d, ", max_sonbuf_len_[i]);
+    printf("-, \n");
+
+    printf(".. max_homobuf_len per layer:\n   -, ");
+    for (size_t i = 0; i < kMaxLemmaSize; i++) printf("%d, ", max_homobuf_len_[i]);
+    printf("\n");
+
+    printf(".. total_son_num per layer:\n   ");
+    for (size_t i = 0; i < kMaxLemmaSize; i++) printf("%d, ", total_son_num_[i]);
+    printf("-, \n");
+
+    printf(".. total_node_hasson per layer:\n   1, ");
+    for (size_t i = 0; i < kMaxLemmaSize; i++) printf("%d, ", total_node_hasson_[i]);
+    printf("\n");
+
+    printf(".. total_sonbuf_num per layer:\n   ");
+    for (size_t i = 0; i < kMaxLemmaSize; i++) printf("%d, ", total_sonbuf_num_[i]);
+    printf("-, \n");
+
+    printf(".. total_sonbuf_allnoson per layer:\n   ");
+    for (size_t i = 0; i < kMaxLemmaSize; i++) printf("%d, ", total_sonbuf_allnoson_[i]);
+    printf("-, \n");
+
+    printf(".. total_node_in_sonbuf_allnoson per layer:\n   ");
+    for (size_t i = 0; i < kMaxLemmaSize; i++) printf("%d, ", total_node_in_sonbuf_allnoson_[i]);
+    printf("-, \n");
+
+    printf(".. total_homo_num per layer:\n   0, ");
+    for (size_t i = 0; i < kMaxLemmaSize; i++) printf("%d, ", total_homo_num_[i]);
+    printf("\n");
+
+    printf(".. son buf allocation number with only 1 son: %d\n", sonbufs_num1_);
+    printf(".. son buf allocation number with more than 1 son: %d\n", sonbufs_numgt1_);
+    printf(".. total lemma node number: %d\n", total_lma_node_num_ + 1);
+}
+#endif  // ___DO_STATISTICS___
+
+#endif  // ___BUILD_MODEL___
+}  // namespace ime_pinyin
diff --git a/src/share/dictlist.cpp b/src/share/dictlist.cpp
new file mode 100644
index 0000000..3323f4a
--- /dev/null
+++ b/src/share/dictlist.cpp
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "../include/dictlist.h"
+#include "../include/mystdlib.h"
+#include "../include/ngram.h"
+#include "../include/searchutility.h"
+
+namespace ime_pinyin {
+
+DictList::DictList() {
+    initialized_ = false;
+    scis_num_ = 0;
+    scis_hz_ = NULL;
+    scis_splid_ = NULL;
+    buf_ = NULL;
+    spl_trie_ = SpellingTrie::get_cpinstance();
+
+    assert(kMaxLemmaSize == 8);
+    cmp_func_[0] = cmp_hanzis_1;
+    cmp_func_[1] = cmp_hanzis_2;
+    cmp_func_[2] = cmp_hanzis_3;
+    cmp_func_[3] = cmp_hanzis_4;
+    cmp_func_[4] = cmp_hanzis_5;
+    cmp_func_[5] = cmp_hanzis_6;
+    cmp_func_[6] = cmp_hanzis_7;
+    cmp_func_[7] = cmp_hanzis_8;
+}
+
+DictList::~DictList() { free_resource(); }
+
+bool DictList::alloc_resource(size_t buf_size, size_t scis_num) {
+    // Allocate memory
+    buf_ = static_cast<char16 *>(malloc(buf_size * sizeof(char16)));
+    if (NULL == buf_) return false;
+
+    scis_num_ = scis_num;
+
+    scis_hz_ = static_cast<char16 *>(malloc(scis_num_ * sizeof(char16)));
+    if (NULL == scis_hz_) return false;
+
+    scis_splid_ = static_cast<SpellingId *>(malloc(scis_num_ * sizeof(SpellingId)));
+
+    if (NULL == scis_splid_) return false;
+
+    return true;
+}
+
+void DictList::free_resource() {
+    if (NULL != buf_) free(buf_);
+    buf_ = NULL;
+
+    if (NULL != scis_hz_) free(scis_hz_);
+    scis_hz_ = NULL;
+
+    if (NULL != scis_splid_) free(scis_splid_);
+    scis_splid_ = NULL;
+}
+
+#ifdef ___BUILD_MODEL___
+bool DictList::init_list(const SingleCharItem *scis, size_t scis_num, const LemmaEntry *lemma_arr, size_t lemma_num) {
+    if (NULL == scis || 0 == scis_num || NULL == lemma_arr || 0 == lemma_num) return false;
+
+    initialized_ = false;
+
+    if (NULL != buf_) free(buf_);
+
+    // calculate the size
+    size_t buf_size = calculate_size(lemma_arr, lemma_num);
+    if (0 == buf_size) return false;
+
+    if (!alloc_resource(buf_size, scis_num)) return false;
+
+    fill_scis(scis, scis_num);
+
+    // Copy the related content from the array to inner buffer
+    fill_list(lemma_arr, lemma_num);
+
+    initialized_ = true;
+    return true;
+}
+
+size_t DictList::calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num) {
+    size_t last_hz_len = 0;
+    size_t list_size = 0;
+    size_t id_num = 0;
+
+    for (size_t i = 0; i < lemma_num; i++) {
+        if (0 == i) {
+            last_hz_len = lemma_arr[i].hz_str_len;
+
+            assert(last_hz_len > 0);
+            assert(lemma_arr[0].idx_by_hz == 1);
+
+            id_num++;
+            start_pos_[0] = 0;
+            start_id_[0] = id_num;
+
+            last_hz_len = 1;
+            list_size += last_hz_len;
+        } else {
+            size_t current_hz_len = lemma_arr[i].hz_str_len;
+
+            assert(current_hz_len >= last_hz_len);
+
+            if (current_hz_len == last_hz_len) {
+                list_size += current_hz_len;
+                id_num++;
+            } else {
+                for (size_t len = last_hz_len; len < current_hz_len - 1; len++) {
+                    start_pos_[len] = start_pos_[len - 1];
+                    start_id_[len] = start_id_[len - 1];
+                }
+
+                start_pos_[current_hz_len - 1] = list_size;
+
+                id_num++;
+                start_id_[current_hz_len - 1] = id_num;
+
+                last_hz_len = current_hz_len;
+                list_size += current_hz_len;
+            }
+        }
+    }
+
+    for (size_t i = last_hz_len; i <= kMaxLemmaSize; i++) {
+        if (0 == i) {
+            start_pos_[0] = 0;
+            start_id_[0] = 1;
+        } else {
+            start_pos_[i] = list_size;
+            start_id_[i] = id_num;
+        }
+    }
+
+    return start_pos_[kMaxLemmaSize];
+}
+
+void DictList::fill_scis(const SingleCharItem *scis, size_t scis_num) {
+    assert(scis_num_ == scis_num);
+
+    for (size_t pos = 0; pos < scis_num_; pos++) {
+        scis_hz_[pos] = scis[pos].hz;
+        scis_splid_[pos] = scis[pos].splid;
+    }
+}
+
+void DictList::fill_list(const LemmaEntry *lemma_arr, size_t lemma_num) {
+    size_t current_pos = 0;
+
+    utf16_strncpy(buf_, lemma_arr[0].hanzi_str, lemma_arr[0].hz_str_len);
+
+    current_pos = lemma_arr[0].hz_str_len;
+
+    size_t id_num = 1;
+
+    for (size_t i = 1; i < lemma_num; i++) {
+        utf16_strncpy(buf_ + current_pos, lemma_arr[i].hanzi_str, lemma_arr[i].hz_str_len);
+
+        id_num++;
+        current_pos += lemma_arr[i].hz_str_len;
+    }
+
+    assert(current_pos == start_pos_[kMaxLemmaSize]);
+    assert(id_num == start_id_[kMaxLemmaSize]);
+}
+
+char16 *DictList::find_pos2_startedbyhz(char16 hz_char) {
+    char16 *found_2w = static_cast<char16 *>(mybsearch(&hz_char, buf_ + start_pos_[1], (start_pos_[2] - start_pos_[1]) / 2, sizeof(char16) * 2, cmp_hanzis_1));
+    if (NULL == found_2w) return NULL;
+
+    while (found_2w > buf_ + start_pos_[1] && *found_2w == *(found_2w - 1)) found_2w -= 2;
+
+    return found_2w;
+}
+#endif  // ___BUILD_MODEL___
+
+char16 *DictList::find_pos_startedbyhzs(const char16 last_hzs[], size_t word_len, int (*cmp_func)(const void *, const void *)) {
+    char16 *found_w = static_cast<char16 *>(mybsearch(last_hzs, buf_ + start_pos_[word_len - 1], (start_pos_[word_len] - start_pos_[word_len - 1]) / word_len, sizeof(char16) * word_len, cmp_func));
+
+    if (NULL == found_w) return NULL;
+
+    while (found_w > buf_ + start_pos_[word_len - 1] && cmp_func(found_w, found_w - word_len) == 0) found_w -= word_len;
+
+    return found_w;
+}
+
+size_t DictList::predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used) {
+    assert(hzs_len <= kMaxPredictSize && hzs_len > 0);
+
+    // 1. Prepare work
+    int (*cmp_func)(const void *, const void *) = cmp_func_[hzs_len - 1];
+
+    NGram &ngram = NGram::get_instance();
+
+    size_t item_num = 0;
+
+    // 2. Do prediction
+    for (uint16 pre_len = 1; pre_len <= kMaxPredictSize + 1 - hzs_len; pre_len++) {
+        uint16 word_len = hzs_len + pre_len;
+        char16 *w_buf = find_pos_startedbyhzs(last_hzs, word_len, cmp_func);
+        if (NULL == w_buf) continue;
+        while (w_buf < buf_ + start_pos_[word_len] && cmp_func(w_buf, last_hzs) == 0 && item_num < npre_max) {
+            memset(npre_items + item_num, 0, sizeof(NPredictItem));
+            utf16_strncpy(npre_items[item_num].pre_hzs, w_buf + hzs_len, pre_len);
+            npre_items[item_num].psb = ngram.get_uni_psb((size_t)(w_buf - buf_ - start_pos_[word_len - 1]) / word_len + start_id_[word_len - 1]);
+            npre_items[item_num].his_len = hzs_len;
+            item_num++;
+            w_buf += word_len;
+        }
+    }
+
+    size_t new_num = 0;
+    for (size_t i = 0; i < item_num; i++) {
+        // Try to find it in the existing items
+        size_t e_pos;
+        for (e_pos = 1; e_pos <= b4_used; e_pos++) {
+            if (utf16_strncmp((*(npre_items - e_pos)).pre_hzs, npre_items[i].pre_hzs, kMaxPredictSize) == 0) break;
+        }
+        if (e_pos <= b4_used) continue;
+
+        // If not found, append it to the buffer
+        npre_items[new_num] = npre_items[i];
+        new_num++;
+    }
+
+    return new_num;
+}
+
+uint16 DictList::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max) {
+    if (!initialized_ || id_lemma >= start_id_[kMaxLemmaSize] || NULL == str_buf || str_max <= 1) return 0;
+
+    // Find the range
+    for (uint16 i = 0; i < kMaxLemmaSize; i++) {
+        if (i + 1 > str_max - 1) return 0;
+        if (start_id_[i] <= id_lemma && start_id_[i + 1] > id_lemma) {
+            size_t id_span = id_lemma - start_id_[i];
+
+            uint16 *buf = buf_ + start_pos_[i] + id_span * (i + 1);
+            for (uint16 len = 0; len <= i; len++) {
+                str_buf[len] = buf[len];
+            }
+            str_buf[i + 1] = (char16)'\0';
+            return i + 1;
+        }
+    }
+    return 0;
+}
+
+uint16 DictList::get_splids_for_hanzi(char16 hanzi, uint16 half_splid, uint16 *splids, uint16 max_splids) {
+    char16 *hz_found = static_cast<char16 *>(mybsearch(&hanzi, scis_hz_, scis_num_, sizeof(char16), cmp_hanzis_1));
+    assert(NULL != hz_found && hanzi == *hz_found);
+
+    // Move to the first one.
+    while (hz_found > scis_hz_ && hanzi == *(hz_found - 1)) hz_found--;
+
+    // First try to found if strict comparison result is not zero.
+    char16 *hz_f = hz_found;
+    bool strict = false;
+    while (hz_f < scis_hz_ + scis_num_ && hanzi == *hz_f) {
+        uint16 pos = hz_f - scis_hz_;
+        if (0 == half_splid || scis_splid_[pos].half_splid == half_splid) {
+            strict = true;
+        }
+        hz_f++;
+    }
+
+    uint16 found_num = 0;
+    while (hz_found < scis_hz_ + scis_num_ && hanzi == *hz_found) {
+        uint16 pos = hz_found - scis_hz_;
+        if (0 == half_splid || (strict && scis_splid_[pos].half_splid == half_splid) || (!strict && spl_trie_->half_full_compatible(half_splid, scis_splid_[pos].full_splid))) {
+            assert(found_num + 1 < max_splids);
+            splids[found_num] = scis_splid_[pos].full_splid;
+            found_num++;
+        }
+        hz_found++;
+    }
+
+    return found_num;
+}
+
+LemmaIdType DictList::get_lemma_id(const char16 *str, uint16 str_len) {
+    if (NULL == str || str_len > kMaxLemmaSize) return 0;
+
+    char16 *found = find_pos_startedbyhzs(str, str_len, cmp_func_[str_len - 1]);
+    if (NULL == found) return 0;
+
+    assert(found > buf_);
+    assert(static_cast<size_t>(found - buf_) >= start_pos_[str_len - 1]);
+    return static_cast<LemmaIdType>(start_id_[str_len - 1] + (found - buf_ - start_pos_[str_len - 1]) / str_len);
+}
+
+void DictList::convert_to_hanzis(char16 *str, uint16 str_len) {
+    assert(NULL != str);
+
+    for (uint16 str_pos = 0; str_pos < str_len; str_pos++) {
+        str[str_pos] = scis_hz_[str[str_pos]];
+    }
+}
+
+void DictList::convert_to_scis_ids(char16 *str, uint16 str_len) {
+    assert(NULL != str);
+
+    for (uint16 str_pos = 0; str_pos < str_len; str_pos++) {
+        str[str_pos] = 0x100;
+    }
+}
+
+bool DictList::save_list(FILE *fp) {
+    if (!initialized_ || NULL == fp) return false;
+
+    if (NULL == buf_ || 0 == start_pos_[kMaxLemmaSize] || NULL == scis_hz_ || NULL == scis_splid_ || 0 == scis_num_) return false;
+
+    if (fwrite(&scis_num_, sizeof(size_t), 1, fp) != 1) return false;
+
+    if (fwrite(start_pos_, sizeof(size_t), kMaxLemmaSize + 1, fp) != kMaxLemmaSize + 1) return false;
+
+    if (fwrite(start_id_, sizeof(size_t), kMaxLemmaSize + 1, fp) != kMaxLemmaSize + 1) return false;
+
+    if (fwrite(scis_hz_, sizeof(char16), scis_num_, fp) != scis_num_) return false;
+
+    if (fwrite(scis_splid_, sizeof(SpellingId), scis_num_, fp) != scis_num_) return false;
+
+    if (fwrite(buf_, sizeof(char16), start_pos_[kMaxLemmaSize], fp) != start_pos_[kMaxLemmaSize]) return false;
+
+    return true;
+}
+
+bool DictList::load_list(FILE *fp) {
+    if (NULL == fp) return false;
+
+    initialized_ = false;
+
+    if (fread(&scis_num_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fread(start_pos_, sizeof(uint32), kMaxLemmaSize + 1, fp) != kMaxLemmaSize + 1) return false;
+
+    if (fread(start_id_, sizeof(uint32), kMaxLemmaSize + 1, fp) != kMaxLemmaSize + 1) return false;
+
+    free_resource();
+
+    if (!alloc_resource(start_pos_[kMaxLemmaSize], scis_num_)) return false;
+
+    if (fread(scis_hz_, sizeof(char16), scis_num_, fp) != scis_num_) return false;
+
+    if (fread(scis_splid_, sizeof(SpellingId), scis_num_, fp) != scis_num_) return false;
+
+    if (fread(buf_, sizeof(char16), start_pos_[kMaxLemmaSize], fp) != start_pos_[kMaxLemmaSize]) return false;
+
+    initialized_ = true;
+    return true;
+}
+}  // namespace ime_pinyin
diff --git a/src/share/dicttrie.cpp b/src/share/dicttrie.cpp
new file mode 100644
index 0000000..4acc902
--- /dev/null
+++ b/src/share/dicttrie.cpp
@@ -0,0 +1,784 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include "../include/dicttrie.h"
+#include "../include/dictbuilder.h"
+#include "../include/lpicache.h"
+#include "../include/mystdlib.h"
+#include "../include/ngram.h"
+
+namespace ime_pinyin {
+
+DictTrie::DictTrie() {
+    spl_trie_ = SpellingTrie::get_cpinstance();
+
+    root_ = NULL;
+    splid_le0_index_ = NULL;
+    lma_node_num_le0_ = 0;
+    nodes_ge1_ = NULL;
+    lma_node_num_ge1_ = 0;
+    lma_idx_buf_ = NULL;
+    lma_idx_buf_len_ = 0;
+    total_lma_num_ = 0;
+    top_lmas_num_ = 0;
+    dict_list_ = NULL;
+
+    parsing_marks_ = NULL;
+    mile_stones_ = NULL;
+    reset_milestones(0, kFirstValidMileStoneHandle);
+}
+
+DictTrie::~DictTrie() { free_resource(true); }
+
+void DictTrie::free_resource(bool free_dict_list) {
+    if (NULL != root_) free(root_);
+    root_ = NULL;
+
+    if (NULL != splid_le0_index_) free(splid_le0_index_);
+    splid_le0_index_ = NULL;
+
+    if (NULL != nodes_ge1_) free(nodes_ge1_);
+    nodes_ge1_ = NULL;
+
+    if (NULL != nodes_ge1_) free(nodes_ge1_);
+    nodes_ge1_ = NULL;
+
+    if (free_dict_list) {
+        if (NULL != dict_list_) {
+            delete dict_list_;
+        }
+        dict_list_ = NULL;
+    }
+
+    if (parsing_marks_) delete[] parsing_marks_;
+    parsing_marks_ = NULL;
+
+    if (mile_stones_) delete[] mile_stones_;
+    mile_stones_ = NULL;
+
+    reset_milestones(0, kFirstValidMileStoneHandle);
+}
+
+inline size_t DictTrie::get_son_offset(const LmaNodeGE1 *node) { return ((size_t)node->son_1st_off_l + ((size_t)node->son_1st_off_h << 16)); }
+
+inline size_t DictTrie::get_homo_idx_buf_offset(const LmaNodeGE1 *node) { return ((size_t)node->homo_idx_buf_off_l + ((size_t)node->homo_idx_buf_off_h << 16)); }
+
+inline LemmaIdType DictTrie::get_lemma_id(size_t id_offset) {
+    LemmaIdType id = 0;
+    for (uint16 pos = kLemmaIdSize - 1; pos > 0; pos--) id = (id << 8) + lma_idx_buf_[id_offset * kLemmaIdSize + pos];
+    id = (id << 8) + lma_idx_buf_[id_offset * kLemmaIdSize];
+    return id;
+}
+
+#ifdef ___BUILD_MODEL___
+bool DictTrie::build_dict(const char *fn_raw, const char *fn_validhzs) {
+    DictBuilder *dict_builder = new DictBuilder();
+
+    free_resource(true);
+
+    return dict_builder->build_dict(fn_raw, fn_validhzs, this);
+}
+
+bool DictTrie::save_dict(FILE *fp) {
+    if (NULL == fp) return false;
+
+    if (fwrite(&lma_node_num_le0_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fwrite(&lma_node_num_ge1_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fwrite(&lma_idx_buf_len_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fwrite(&top_lmas_num_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fwrite(root_, sizeof(LmaNodeLE0), lma_node_num_le0_, fp) != lma_node_num_le0_) return false;
+
+    if (fwrite(nodes_ge1_, sizeof(LmaNodeGE1), lma_node_num_ge1_, fp) != lma_node_num_ge1_) return false;
+
+    if (fwrite(lma_idx_buf_, sizeof(unsigned char), lma_idx_buf_len_, fp) != lma_idx_buf_len_) return false;
+
+    return true;
+}
+
+bool DictTrie::save_dict(const char *filename) {
+    if (NULL == filename) return false;
+
+    if (NULL == root_ || NULL == dict_list_) return false;
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    NGram &ngram = NGram::get_instance();
+
+    FILE *fp = fopen(filename, "wb");
+    if (NULL == fp) return false;
+
+    if (!spl_trie.save_spl_trie(fp) || !dict_list_->save_list(fp) || !save_dict(fp) || !ngram.save_ngram(fp)) {
+        fclose(fp);
+        return false;
+    }
+
+    fclose(fp);
+    return true;
+}
+#endif  // ___BUILD_MODEL___
+
+bool DictTrie::load_dict(FILE *fp) {
+    if (NULL == fp) return false;
+    if (fread(&lma_node_num_le0_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fread(&lma_node_num_ge1_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fread(&lma_idx_buf_len_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fread(&top_lmas_num_, sizeof(uint32), 1, fp) != 1 || top_lmas_num_ >= lma_idx_buf_len_) return false;
+
+    free_resource(false);
+
+    root_ = static_cast<LmaNodeLE0 *>(malloc(lma_node_num_le0_ * sizeof(LmaNodeLE0)));
+    nodes_ge1_ = static_cast<LmaNodeGE1 *>(malloc(lma_node_num_ge1_ * sizeof(LmaNodeGE1)));
+    lma_idx_buf_ = (unsigned char *)malloc(lma_idx_buf_len_);
+    total_lma_num_ = lma_idx_buf_len_ / kLemmaIdSize;
+
+    size_t buf_size = SpellingTrie::get_instance().get_spelling_num() + 1;
+    assert(lma_node_num_le0_ <= buf_size);
+    splid_le0_index_ = static_cast<uint16 *>(malloc(buf_size * sizeof(uint16)));
+
+    // Init the space for parsing.
+    parsing_marks_ = new ParsingMark[kMaxParsingMark];
+    mile_stones_ = new MileStone[kMaxMileStone];
+    reset_milestones(0, kFirstValidMileStoneHandle);
+
+    if (NULL == root_ || NULL == nodes_ge1_ || NULL == lma_idx_buf_ || NULL == splid_le0_index_ || NULL == parsing_marks_ || NULL == mile_stones_) {
+        free_resource(false);
+        return false;
+    }
+
+    if (fread(root_, sizeof(LmaNodeLE0), lma_node_num_le0_, fp) != lma_node_num_le0_) return false;
+
+    if (fread(nodes_ge1_, sizeof(LmaNodeGE1), lma_node_num_ge1_, fp) != lma_node_num_ge1_) return false;
+
+    if (fread(lma_idx_buf_, sizeof(unsigned char), lma_idx_buf_len_, fp) != lma_idx_buf_len_) return false;
+
+    // The quick index for the first level sons
+    uint16 last_splid = kFullSplIdStart;
+    size_t last_pos = 0;
+    for (size_t i = 1; i < lma_node_num_le0_; i++) {
+        for (uint16 splid = last_splid; splid < root_[i].spl_idx; splid++) splid_le0_index_[splid - kFullSplIdStart] = last_pos;
+
+        splid_le0_index_[root_[i].spl_idx - kFullSplIdStart] = static_cast<uint16>(i);
+        last_splid = root_[i].spl_idx;
+        last_pos = i;
+    }
+
+    for (uint16 splid = last_splid + 1; splid < buf_size + kFullSplIdStart; splid++) {
+        assert(static_cast<size_t>(splid - kFullSplIdStart) < buf_size);
+        splid_le0_index_[splid - kFullSplIdStart] = last_pos + 1;
+    }
+
+    return true;
+}
+
+bool DictTrie::load_dict(const char *filename, LemmaIdType start_id, LemmaIdType end_id) {
+    if (NULL == filename || end_id <= start_id) return false;
+
+    FILE *fp = fopen(filename, "rb");
+    if (NULL == fp) return false;
+
+    free_resource(true);
+
+    dict_list_ = new DictList();
+    if (NULL == dict_list_) {
+        fclose(fp);
+        return false;
+    }
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    NGram &ngram = NGram::get_instance();
+
+    if (!spl_trie.load_spl_trie(fp) || !dict_list_->load_list(fp) || !load_dict(fp) || !ngram.load_ngram(fp) || total_lma_num_ > end_id - start_id + 1) {
+        free_resource(true);
+        fclose(fp);
+        return false;
+    }
+
+    fclose(fp);
+    return true;
+}
+
+bool DictTrie::load_dict_fd(int sys_fd, long start_offset, long length, LemmaIdType start_id, LemmaIdType end_id) {
+    if (start_offset < 0 || length <= 0 || end_id <= start_id) return false;
+
+    FILE *fp = fdopen(sys_fd, "rb");
+    if (NULL == fp) return false;
+
+    if (-1 == fseek(fp, start_offset, SEEK_SET)) {
+        fclose(fp);
+        return false;
+    }
+
+    free_resource(true);
+
+    dict_list_ = new DictList();
+    if (NULL == dict_list_) {
+        fclose(fp);
+        return false;
+    }
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    NGram &ngram = NGram::get_instance();
+
+    if (!spl_trie.load_spl_trie(fp) || !dict_list_->load_list(fp) || !load_dict(fp) || !ngram.load_ngram(fp) || ftell(fp) < start_offset + length || total_lma_num_ > end_id - start_id + 1) {
+        free_resource(true);
+        fclose(fp);
+        return false;
+    }
+
+    fclose(fp);
+    return true;
+}
+
+size_t DictTrie::fill_lpi_buffer(LmaPsbItem lpi_items[], size_t lpi_max, LmaNodeLE0 *node) {
+    size_t lpi_num = 0;
+    NGram &ngram = NGram::get_instance();
+    for (size_t homo = 0; homo < (size_t)node->num_of_homo; homo++) {
+        lpi_items[lpi_num].id = get_lemma_id(node->homo_idx_buf_off + homo);
+        lpi_items[lpi_num].lma_len = 1;
+        lpi_items[lpi_num].psb = static_cast<LmaScoreType>(ngram.get_uni_psb(lpi_items[lpi_num].id));
+        lpi_num++;
+        if (lpi_num >= lpi_max) break;
+    }
+
+    return lpi_num;
+}
+
+size_t DictTrie::fill_lpi_buffer(LmaPsbItem lpi_items[], size_t lpi_max, size_t homo_buf_off, LmaNodeGE1 *node, uint16 lma_len) {
+    size_t lpi_num = 0;
+    NGram &ngram = NGram::get_instance();
+    for (size_t homo = 0; homo < (size_t)node->num_of_homo; homo++) {
+        lpi_items[lpi_num].id = get_lemma_id(homo_buf_off + homo);
+        lpi_items[lpi_num].lma_len = lma_len;
+        lpi_items[lpi_num].psb = static_cast<LmaScoreType>(ngram.get_uni_psb(lpi_items[lpi_num].id));
+        lpi_num++;
+        if (lpi_num >= lpi_max) break;
+    }
+
+    return lpi_num;
+}
+
+void DictTrie::reset_milestones(uint16 from_step, MileStoneHandle from_handle) {
+    if (0 == from_step) {
+        parsing_marks_pos_ = 0;
+        mile_stones_pos_ = kFirstValidMileStoneHandle;
+    } else {
+        if (from_handle > 0 && from_handle < mile_stones_pos_) {
+            mile_stones_pos_ = from_handle;
+
+            MileStone *mile_stone = mile_stones_ + from_handle;
+            parsing_marks_pos_ = mile_stone->mark_start;
+        }
+    }
+}
+
+MileStoneHandle DictTrie::extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num) {
+    if (NULL == dep) return 0;
+
+    // from LmaNodeLE0 (root) to LmaNodeLE0
+    if (0 == from_handle) {
+        assert(0 == dep->splids_extended);
+        return extend_dict0(from_handle, dep, lpi_items, lpi_max, lpi_num);
+    }
+
+    // from LmaNodeLE0 to LmaNodeGE1
+    if (1 == dep->splids_extended) return extend_dict1(from_handle, dep, lpi_items, lpi_max, lpi_num);
+
+    // From LmaNodeGE1 to LmaNodeGE1
+    return extend_dict2(from_handle, dep, lpi_items, lpi_max, lpi_num);
+}
+
+MileStoneHandle DictTrie::extend_dict0(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num) {
+    assert(NULL != dep && 0 == from_handle);
+    *lpi_num = 0;
+    MileStoneHandle ret_handle = 0;
+
+    uint16 splid = dep->splids[dep->splids_extended];
+    uint16 id_start = dep->id_start;
+    uint16 id_num = dep->id_num;
+
+    LpiCache &lpi_cache = LpiCache::get_instance();
+    bool cached = lpi_cache.is_cached(splid);
+
+    // 2. Begin exgtending
+    // 2.1 Get the LmaPsbItem list
+    LmaNodeLE0 *node = root_;
+    size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
+    size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart];
+    for (size_t son_pos = son_start; son_pos < son_end; son_pos++) {
+        assert(1 == node->son_1st_off);
+        LmaNodeLE0 *son = root_ + son_pos;
+        assert(son->spl_idx >= id_start && son->spl_idx < id_start + id_num);
+
+        if (!cached && *lpi_num < lpi_max) {
+            bool need_lpi = true;
+            if (spl_trie_->is_half_id_yunmu(splid) && son_pos != son_start) need_lpi = false;
+
+            if (need_lpi) *lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num), lpi_max - *lpi_num, son);
+        }
+
+        // If necessary, fill in a new mile stone.
+        if (son->spl_idx == id_start) {
+            if (mile_stones_pos_ < kMaxMileStone && parsing_marks_pos_ < kMaxParsingMark) {
+                parsing_marks_[parsing_marks_pos_].node_offset = son_pos;
+                parsing_marks_[parsing_marks_pos_].node_num = id_num;
+                mile_stones_[mile_stones_pos_].mark_start = parsing_marks_pos_;
+                mile_stones_[mile_stones_pos_].mark_num = 1;
+                ret_handle = mile_stones_pos_;
+                parsing_marks_pos_++;
+                mile_stones_pos_++;
+            }
+        }
+
+        if (son->spl_idx >= id_start + id_num - 1) break;
+    }
+
+    //  printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
+    //      mile_stones_pos_);
+    return ret_handle;
+}
+
+MileStoneHandle DictTrie::extend_dict1(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num) {
+    assert(NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_);
+
+    MileStoneHandle ret_handle = 0;
+
+    // 1. If this is a half Id, get its corresponding full starting Id and
+    // number of full Id.
+    size_t ret_val = 0;
+
+    uint16 id_start = dep->id_start;
+    uint16 id_num = dep->id_num;
+
+    // 2. Begin extending.
+    MileStone *mile_stone = mile_stones_ + from_handle;
+
+    for (uint16 h_pos = 0; h_pos < mile_stone->mark_num; h_pos++) {
+        ParsingMark p_mark = parsing_marks_[mile_stone->mark_start + h_pos];
+        uint16 ext_num = p_mark.node_num;
+        for (uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++) {
+            LmaNodeLE0 *node = root_ + p_mark.node_offset + ext_pos;
+            size_t found_start = 0;
+            size_t found_num = 0;
+            for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
+                assert(node->son_1st_off <= lma_node_num_ge1_);
+                LmaNodeGE1 *son = nodes_ge1_ + node->son_1st_off + son_pos;
+                if (son->spl_idx >= id_start && son->spl_idx < id_start + id_num) {
+                    if (*lpi_num < lpi_max) {
+                        size_t homo_buf_off = get_homo_idx_buf_offset(son);
+                        *lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num), lpi_max - *lpi_num, homo_buf_off, son, 2);
+                    }
+
+                    // If necessary, fill in the new DTMI
+                    if (0 == found_num) {
+                        found_start = son_pos;
+                    }
+                    found_num++;
+                }
+                if (son->spl_idx >= id_start + id_num - 1 || son_pos == (size_t)node->num_of_son - 1) {
+                    if (found_num > 0) {
+                        if (mile_stones_pos_ < kMaxMileStone && parsing_marks_pos_ < kMaxParsingMark) {
+                            parsing_marks_[parsing_marks_pos_].node_offset = node->son_1st_off + found_start;
+                            parsing_marks_[parsing_marks_pos_].node_num = found_num;
+                            if (0 == ret_val) mile_stones_[mile_stones_pos_].mark_start = parsing_marks_pos_;
+                            parsing_marks_pos_++;
+                        }
+
+                        ret_val++;
+                    }
+                    break;
+                }  // for son_pos
+            }  // for ext_pos
+        }  // for h_pos
+    }
+
+    if (ret_val > 0) {
+        mile_stones_[mile_stones_pos_].mark_num = ret_val;
+        ret_handle = mile_stones_pos_;
+        mile_stones_pos_++;
+        ret_val = 1;
+    }
+
+    //  printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
+    //         mile_stones_pos_);
+    return ret_handle;
+}
+
+MileStoneHandle DictTrie::extend_dict2(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num) {
+    assert(NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_);
+
+    MileStoneHandle ret_handle = 0;
+
+    // 1. If this is a half Id, get its corresponding full starting Id and
+    // number of full Id.
+    size_t ret_val = 0;
+
+    uint16 id_start = dep->id_start;
+    uint16 id_num = dep->id_num;
+
+    // 2. Begin extending.
+    MileStone *mile_stone = mile_stones_ + from_handle;
+
+    for (uint16 h_pos = 0; h_pos < mile_stone->mark_num; h_pos++) {
+        ParsingMark p_mark = parsing_marks_[mile_stone->mark_start + h_pos];
+        uint16 ext_num = p_mark.node_num;
+        for (uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++) {
+            LmaNodeGE1 *node = nodes_ge1_ + p_mark.node_offset + ext_pos;
+            size_t found_start = 0;
+            size_t found_num = 0;
+
+            for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
+                assert(node->son_1st_off_l > 0 || node->son_1st_off_h > 0);
+                LmaNodeGE1 *son = nodes_ge1_ + get_son_offset(node) + son_pos;
+                if (son->spl_idx >= id_start && son->spl_idx < id_start + id_num) {
+                    if (*lpi_num < lpi_max) {
+                        size_t homo_buf_off = get_homo_idx_buf_offset(son);
+                        *lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num), lpi_max - *lpi_num, homo_buf_off, son, dep->splids_extended + 1);
+                    }
+
+                    // If necessary, fill in the new DTMI
+                    if (0 == found_num) {
+                        found_start = son_pos;
+                    }
+                    found_num++;
+                }
+                if (son->spl_idx >= id_start + id_num - 1 || son_pos == (size_t)node->num_of_son - 1) {
+                    if (found_num > 0) {
+                        if (mile_stones_pos_ < kMaxMileStone && parsing_marks_pos_ < kMaxParsingMark) {
+                            parsing_marks_[parsing_marks_pos_].node_offset = get_son_offset(node) + found_start;
+                            parsing_marks_[parsing_marks_pos_].node_num = found_num;
+                            if (0 == ret_val) mile_stones_[mile_stones_pos_].mark_start = parsing_marks_pos_;
+                            parsing_marks_pos_++;
+                        }
+
+                        ret_val++;
+                    }
+                    break;
+                }
+            }  // for son_pos
+        }  // for ext_pos
+    }  // for h_pos
+
+    if (ret_val > 0) {
+        mile_stones_[mile_stones_pos_].mark_num = ret_val;
+        ret_handle = mile_stones_pos_;
+        mile_stones_pos_++;
+    }
+
+    // printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
+    //        mile_stones_pos_);
+    return ret_handle;
+}
+
+bool DictTrie::try_extend(const uint16 *splids, uint16 splid_num, LemmaIdType id_lemma) {
+    if (0 == splid_num || NULL == splids) return false;
+
+    void *node = root_ + splid_le0_index_[splids[0] - kFullSplIdStart];
+
+    for (uint16 pos = 1; pos < splid_num; pos++) {
+        if (1 == pos) {
+            LmaNodeLE0 *node_le0 = reinterpret_cast<LmaNodeLE0 *>(node);
+            LmaNodeGE1 *node_son;
+            uint16 son_pos;
+            for (son_pos = 0; son_pos < static_cast<uint16>(node_le0->num_of_son); son_pos++) {
+                assert(node_le0->son_1st_off <= lma_node_num_ge1_);
+                node_son = nodes_ge1_ + node_le0->son_1st_off + son_pos;
+                if (node_son->spl_idx == splids[pos]) break;
+            }
+            if (son_pos < node_le0->num_of_son)
+                node = reinterpret_cast<void *>(node_son);
+            else
+                return false;
+        } else {
+            LmaNodeGE1 *node_ge1 = reinterpret_cast<LmaNodeGE1 *>(node);
+            LmaNodeGE1 *node_son;
+            uint16 son_pos;
+            for (son_pos = 0; son_pos < static_cast<uint16>(node_ge1->num_of_son); son_pos++) {
+                assert(node_ge1->son_1st_off_l > 0 || node_ge1->son_1st_off_h > 0);
+                node_son = nodes_ge1_ + get_son_offset(node_ge1) + son_pos;
+                if (node_son->spl_idx == splids[pos]) break;
+            }
+            if (son_pos < node_ge1->num_of_son)
+                node = reinterpret_cast<void *>(node_son);
+            else
+                return false;
+        }
+    }
+
+    if (1 == splid_num) {
+        LmaNodeLE0 *node_le0 = reinterpret_cast<LmaNodeLE0 *>(node);
+        size_t num_of_homo = (size_t)node_le0->num_of_homo;
+        for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
+            LemmaIdType id_this = get_lemma_id(node_le0->homo_idx_buf_off + homo_pos);
+            char16 str[2];
+            get_lemma_str(id_this, str, 2);
+            if (id_this == id_lemma) return true;
+        }
+    } else {
+        LmaNodeGE1 *node_ge1 = reinterpret_cast<LmaNodeGE1 *>(node);
+        size_t num_of_homo = (size_t)node_ge1->num_of_homo;
+        for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
+            size_t node_homo_off = get_homo_idx_buf_offset(node_ge1);
+            if (get_lemma_id(node_homo_off + homo_pos) == id_lemma) return true;
+        }
+    }
+
+    return false;
+}
+
+size_t DictTrie::get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lma_buf, size_t max_lma_buf) {
+    if (splid_str_len > kMaxLemmaSize) return 0;
+
+#define MAX_EXTENDBUF_LEN 200
+
+    size_t *node_buf1[MAX_EXTENDBUF_LEN];  // use size_t for data alignment
+    size_t *node_buf2[MAX_EXTENDBUF_LEN];
+    LmaNodeLE0 **node_fr_le0 = reinterpret_cast<LmaNodeLE0 **>(node_buf1);  // Nodes from.
+    LmaNodeLE0 **node_to_le0 = reinterpret_cast<LmaNodeLE0 **>(node_buf2);  // Nodes to.
+    LmaNodeGE1 **node_fr_ge1 = NULL;
+    LmaNodeGE1 **node_to_ge1 = NULL;
+    size_t node_fr_num = 1;
+    size_t node_to_num = 0;
+    node_fr_le0[0] = root_;
+    if (NULL == node_fr_le0[0]) return 0;
+
+    size_t spl_pos = 0;
+
+    while (spl_pos < splid_str_len) {
+        uint16 id_num = 1;
+        uint16 id_start = splid_str[spl_pos];
+        // If it is a half id
+        if (spl_trie_->is_half_id(splid_str[spl_pos])) {
+            id_num = spl_trie_->half_to_full(splid_str[spl_pos], &id_start);
+            assert(id_num > 0);
+        }
+
+        // Extend the nodes
+        if (0 == spl_pos) {  // From LmaNodeLE0 (root) to LmaNodeLE0 nodes
+            for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
+                LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
+                assert(node == root_ && 1 == node_fr_num);
+                size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
+                size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart];
+                for (size_t son_pos = son_start; son_pos < son_end; son_pos++) {
+                    assert(1 == node->son_1st_off);
+                    LmaNodeLE0 *node_son = root_ + son_pos;
+                    assert(node_son->spl_idx >= id_start && node_son->spl_idx < id_start + id_num);
+                    if (node_to_num < MAX_EXTENDBUF_LEN) {
+                        node_to_le0[node_to_num] = node_son;
+                        node_to_num++;
+                    }
+                    // id_start + id_num - 1 is the last one, which has just been
+                    // recorded.
+                    if (node_son->spl_idx >= id_start + id_num - 1) break;
+                }
+            }
+
+            spl_pos++;
+            if (spl_pos >= splid_str_len || node_to_num == 0) break;
+            // Prepare the nodes for next extending
+            // next time, from LmaNodeLE0 to LmaNodeGE1
+            LmaNodeLE0 **node_tmp = node_fr_le0;
+            node_fr_le0 = node_to_le0;
+            node_to_le0 = NULL;
+            node_to_ge1 = reinterpret_cast<LmaNodeGE1 **>(node_tmp);
+        } else if (1 == spl_pos) {  // From LmaNodeLE0 to LmaNodeGE1 nodes
+            for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
+                LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
+                for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
+                    assert(node->son_1st_off <= lma_node_num_ge1_);
+                    LmaNodeGE1 *node_son = nodes_ge1_ + node->son_1st_off + son_pos;
+                    if (node_son->spl_idx >= id_start && node_son->spl_idx < id_start + id_num) {
+                        if (node_to_num < MAX_EXTENDBUF_LEN) {
+                            node_to_ge1[node_to_num] = node_son;
+                            node_to_num++;
+                        }
+                    }
+                    // id_start + id_num - 1 is the last one, which has just been
+                    // recorded.
+                    if (node_son->spl_idx >= id_start + id_num - 1) break;
+                }
+            }
+
+            spl_pos++;
+            if (spl_pos >= splid_str_len || node_to_num == 0) break;
+            // Prepare the nodes for next extending
+            // next time, from LmaNodeGE1 to LmaNodeGE1
+            node_fr_ge1 = node_to_ge1;
+            node_to_ge1 = reinterpret_cast<LmaNodeGE1 **>(node_fr_le0);
+            node_fr_le0 = NULL;
+            node_to_le0 = NULL;
+        } else {  // From LmaNodeGE1 to LmaNodeGE1 nodes
+            for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
+                LmaNodeGE1 *node = node_fr_ge1[node_fr_pos];
+                for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
+                    assert(node->son_1st_off_l > 0 || node->son_1st_off_h > 0);
+                    LmaNodeGE1 *node_son = nodes_ge1_ + get_son_offset(node) + son_pos;
+                    if (node_son->spl_idx >= id_start && node_son->spl_idx < id_start + id_num) {
+                        if (node_to_num < MAX_EXTENDBUF_LEN) {
+                            node_to_ge1[node_to_num] = node_son;
+                            node_to_num++;
+                        }
+                    }
+                    // id_start + id_num - 1 is the last one, which has just been
+                    // recorded.
+                    if (node_son->spl_idx >= id_start + id_num - 1) break;
+                }
+            }
+
+            spl_pos++;
+            if (spl_pos >= splid_str_len || node_to_num == 0) break;
+            // Prepare the nodes for next extending
+            // next time, from LmaNodeGE1 to LmaNodeGE1
+            LmaNodeGE1 **node_tmp = node_fr_ge1;
+            node_fr_ge1 = node_to_ge1;
+            node_to_ge1 = node_tmp;
+        }
+
+        // The number of node for next extending
+        node_fr_num = node_to_num;
+        node_to_num = 0;
+    }  // while
+
+    if (0 == node_to_num) return 0;
+
+    NGram &ngram = NGram::get_instance();
+    size_t lma_num = 0;
+
+    // If the length is 1, and the splid is a one-char Yunmu like 'a', 'o', 'e',
+    // only those candidates for the full matched one-char id will be returned.
+    if (1 == splid_str_len && spl_trie_->is_half_id_yunmu(splid_str[0])) node_to_num = node_to_num > 0 ? 1 : 0;
+
+    for (size_t node_pos = 0; node_pos < node_to_num; node_pos++) {
+        size_t num_of_homo = 0;
+        if (spl_pos <= 1) {  // Get from LmaNodeLE0 nodes
+            LmaNodeLE0 *node_le0 = node_to_le0[node_pos];
+            num_of_homo = (size_t)node_le0->num_of_homo;
+            for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
+                size_t ch_pos = lma_num + homo_pos;
+                lma_buf[ch_pos].id = get_lemma_id(node_le0->homo_idx_buf_off + homo_pos);
+                lma_buf[ch_pos].lma_len = 1;
+                lma_buf[ch_pos].psb = static_cast<LmaScoreType>(ngram.get_uni_psb(lma_buf[ch_pos].id));
+
+                if (lma_num + homo_pos >= max_lma_buf - 1) break;
+            }
+        } else {  // Get from LmaNodeGE1 nodes
+            LmaNodeGE1 *node_ge1 = node_to_ge1[node_pos];
+            num_of_homo = (size_t)node_ge1->num_of_homo;
+            for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
+                size_t ch_pos = lma_num + homo_pos;
+                size_t node_homo_off = get_homo_idx_buf_offset(node_ge1);
+                lma_buf[ch_pos].id = get_lemma_id(node_homo_off + homo_pos);
+                lma_buf[ch_pos].lma_len = splid_str_len;
+                lma_buf[ch_pos].psb = static_cast<LmaScoreType>(ngram.get_uni_psb(lma_buf[ch_pos].id));
+
+                if (lma_num + homo_pos >= max_lma_buf - 1) break;
+            }
+        }
+
+        lma_num += num_of_homo;
+        if (lma_num >= max_lma_buf) {
+            lma_num = max_lma_buf;
+            break;
+        }
+    }
+    return lma_num;
+}
+
+uint16 DictTrie::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max) { return dict_list_->get_lemma_str(id_lemma, str_buf, str_max); }
+
+uint16 DictTrie::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid) {
+    char16 lma_str[kMaxLemmaSize + 1];
+    uint16 lma_len = get_lemma_str(id_lemma, lma_str, kMaxLemmaSize + 1);
+    assert((!arg_valid && splids_max >= lma_len) || lma_len == splids_max);
+
+    uint16 spl_mtrx[kMaxLemmaSize * 5];
+    uint16 spl_start[kMaxLemmaSize + 1];
+    spl_start[0] = 0;
+    uint16 try_num = 1;
+
+    for (uint16 pos = 0; pos < lma_len; pos++) {
+        uint16 cand_splids_this = 0;
+        if (arg_valid && spl_trie_->is_full_id(splids[pos])) {
+            spl_mtrx[spl_start[pos]] = splids[pos];
+            cand_splids_this = 1;
+        } else {
+            cand_splids_this = dict_list_->get_splids_for_hanzi(lma_str[pos], arg_valid ? splids[pos] : 0, spl_mtrx + spl_start[pos], kMaxLemmaSize * 5 - spl_start[pos]);
+            assert(cand_splids_this > 0);
+        }
+        spl_start[pos + 1] = spl_start[pos] + cand_splids_this;
+        try_num *= cand_splids_this;
+    }
+
+    for (uint16 try_pos = 0; try_pos < try_num; try_pos++) {
+        uint16 mod = 1;
+        for (uint16 pos = 0; pos < lma_len; pos++) {
+            uint16 radix = spl_start[pos + 1] - spl_start[pos];
+            splids[pos] = spl_mtrx[spl_start[pos] + try_pos / mod % radix];
+            mod *= radix;
+        }
+
+        if (try_extend(splids, lma_len, id_lemma)) return lma_len;
+    }
+
+    return 0;
+}
+
+void DictTrie::set_total_lemma_count_of_others(size_t count) {
+    NGram &ngram = NGram::get_instance();
+    ngram.set_total_freq_none_sys(count);
+}
+
+void DictTrie::convert_to_hanzis(char16 *str, uint16 str_len) { return dict_list_->convert_to_hanzis(str, str_len); }
+
+void DictTrie::convert_to_scis_ids(char16 *str, uint16 str_len) { return dict_list_->convert_to_scis_ids(str, str_len); }
+
+LemmaIdType DictTrie::get_lemma_id(const char16 lemma_str[], uint16 lemma_len) {
+    if (NULL == lemma_str || lemma_len > kMaxLemmaSize) return 0;
+
+    return dict_list_->get_lemma_id(lemma_str, lemma_len);
+}
+
+size_t DictTrie::predict_top_lmas(size_t his_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used) {
+    NGram &ngram = NGram::get_instance();
+
+    size_t item_num = 0;
+    size_t top_lmas_id_offset = lma_idx_buf_len_ / kLemmaIdSize - top_lmas_num_;
+    size_t top_lmas_pos = 0;
+    while (item_num < npre_max && top_lmas_pos < top_lmas_num_) {
+        memset(npre_items + item_num, 0, sizeof(NPredictItem));
+        LemmaIdType top_lma_id = get_lemma_id(top_lmas_id_offset + top_lmas_pos);
+        top_lmas_pos += 1;
+        if (dict_list_->get_lemma_str(top_lma_id, npre_items[item_num].pre_hzs, kMaxLemmaSize - 1) == 0) {
+            continue;
+        }
+        npre_items[item_num].psb = ngram.get_uni_psb(top_lma_id);
+        npre_items[item_num].his_len = his_len;
+        item_num++;
+    }
+    return item_num;
+}
+
+size_t DictTrie::predict(const char16 *last_hzs, uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used) { return dict_list_->predict(last_hzs, hzs_len, npre_items, npre_max, b4_used); }
+}  // namespace ime_pinyin
diff --git a/src/share/lpicache.cpp b/src/share/lpicache.cpp
new file mode 100644
index 0000000..ab2e620
--- /dev/null
+++ b/src/share/lpicache.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include "../include/lpicache.h"
+
+namespace ime_pinyin {
+
+LpiCache* LpiCache::instance_ = NULL;
+
+LpiCache::LpiCache() {
+    lpi_cache_ = new LmaPsbItem[kFullSplIdStart * kMaxLpiCachePerId];
+    lpi_cache_len_ = new uint16[kFullSplIdStart];
+    assert(NULL != lpi_cache_);
+    assert(NULL != lpi_cache_len_);
+    for (uint16 id = 0; id < kFullSplIdStart; id++) lpi_cache_len_[id] = 0;
+}
+
+LpiCache::~LpiCache() {
+    if (NULL != lpi_cache_) delete[] lpi_cache_;
+
+    if (NULL != lpi_cache_len_) delete[] lpi_cache_len_;
+}
+
+LpiCache& LpiCache::get_instance() {
+    if (NULL == instance_) {
+        instance_ = new LpiCache();
+        assert(NULL != instance_);
+    }
+    return *instance_;
+}
+
+bool LpiCache::is_cached(uint16 splid) {
+    if (splid >= kFullSplIdStart) return false;
+    return lpi_cache_len_[splid] != 0;
+}
+
+size_t LpiCache::put_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_num) {
+    uint16 num = kMaxLpiCachePerId;
+    if (num > lpi_num) num = static_cast<uint16>(lpi_num);
+
+    LmaPsbItem* lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId;
+    for (uint16 pos = 0; pos < num; pos++) lpi_cache_this[pos] = lpi_items[pos];
+
+    lpi_cache_len_[splid] = num;
+    return num;
+}
+
+size_t LpiCache::get_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_max) {
+    if (lpi_max > lpi_cache_len_[splid]) lpi_max = lpi_cache_len_[splid];
+
+    LmaPsbItem* lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId;
+    for (uint16 pos = 0; pos < lpi_max; pos++) {
+        lpi_items[pos] = lpi_cache_this[pos];
+    }
+    return lpi_max;
+}
+
+}  // namespace ime_pinyin
diff --git a/maintest.cpp b/src/share/maintest.cpp
similarity index 100%
rename from maintest.cpp
rename to src/share/maintest.cpp
diff --git a/src/share/matrixsearch.cpp b/src/share/matrixsearch.cpp
new file mode 100644
index 0000000..4757eb7
--- /dev/null
+++ b/src/share/matrixsearch.cpp
@@ -0,0 +1,1734 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include "../include/lpicache.h"
+#include "../include/matrixsearch.h"
+#include "../include/mystdlib.h"
+#include "../include/ngram.h"
+#include "../include/userdict.h"
+
+namespace ime_pinyin {
+
+#define PRUMING_SCORE 8000.0
+
+MatrixSearch::MatrixSearch() {
+    inited_ = false;
+    spl_trie_ = SpellingTrie::get_cpinstance();
+
+    reset_pointers_to_null();
+
+    pys_decoded_len_ = 0;
+    mtrx_nd_pool_used_ = 0;
+    dmi_pool_used_ = 0;
+    xi_an_enabled_ = false;
+    dmi_c_phrase_ = false;
+
+    assert(kMaxSearchSteps > 0);
+    max_sps_len_ = kMaxSearchSteps - 1;
+    max_hzs_len_ = kMaxSearchSteps;
+}
+
+MatrixSearch::~MatrixSearch() { free_resource(); }
+
+void MatrixSearch::reset_pointers_to_null() {
+    dict_trie_ = NULL;
+    user_dict_ = NULL;
+    spl_parser_ = NULL;
+
+    share_buf_ = NULL;
+
+    // The following four buffers are used for decoding, and they are based on
+    // share_buf_, no need to delete them.
+    mtrx_nd_pool_ = NULL;
+    dmi_pool_ = NULL;
+    matrix_ = NULL;
+    dep_ = NULL;
+
+    // Based on share_buf_, no need to delete them.
+    npre_items_ = NULL;
+}
+
+bool MatrixSearch::alloc_resource() {
+    free_resource();
+
+    dict_trie_ = new DictTrie();
+    user_dict_ = static_cast<AtomDictBase *>(new UserDict());
+    spl_parser_ = new SpellingParser();
+
+    size_t mtrx_nd_size = sizeof(MatrixNode) * kMtrxNdPoolSize;
+    mtrx_nd_size = align_to_size_t(mtrx_nd_size) / sizeof(size_t);
+    size_t dmi_size = sizeof(DictMatchInfo) * kDmiPoolSize;
+    dmi_size = align_to_size_t(dmi_size) / sizeof(size_t);
+    size_t matrix_size = sizeof(MatrixRow) * kMaxRowNum;
+    matrix_size = align_to_size_t(matrix_size) / sizeof(size_t);
+    size_t dep_size = sizeof(DictExtPara);
+    dep_size = align_to_size_t(dep_size) / sizeof(size_t);
+
+    // share_buf's size is determined by the buffers for search.
+    share_buf_ = new size_t[mtrx_nd_size + dmi_size + matrix_size + dep_size];
+
+    if (NULL == dict_trie_ || NULL == user_dict_ || NULL == spl_parser_ || NULL == share_buf_) return false;
+
+    // The buffers for search are based on the share buffer
+    mtrx_nd_pool_ = reinterpret_cast<MatrixNode *>(share_buf_);
+    dmi_pool_ = reinterpret_cast<DictMatchInfo *>(share_buf_ + mtrx_nd_size);
+    matrix_ = reinterpret_cast<MatrixRow *>(share_buf_ + mtrx_nd_size + dmi_size);
+    dep_ = reinterpret_cast<DictExtPara *>(share_buf_ + mtrx_nd_size + dmi_size + matrix_size);
+
+    // The prediction buffer is also based on the share buffer.
+    npre_items_ = reinterpret_cast<NPredictItem *>(share_buf_);
+    npre_items_len_ = (mtrx_nd_size + dmi_size + matrix_size + dep_size) * sizeof(size_t) / sizeof(NPredictItem);
+    return true;
+}
+
+void MatrixSearch::free_resource() {
+    if (NULL != dict_trie_) delete dict_trie_;
+
+    if (NULL != user_dict_) delete user_dict_;
+
+    if (NULL != spl_parser_) delete spl_parser_;
+
+    if (NULL != share_buf_) delete[] share_buf_;
+
+    reset_pointers_to_null();
+}
+
+bool MatrixSearch::init(const char *fn_sys_dict, const char *fn_usr_dict) {
+    if (NULL == fn_sys_dict || NULL == fn_usr_dict) return false;
+
+    if (!alloc_resource()) return false;
+
+    if (!dict_trie_->load_dict(fn_sys_dict, 1, kSysDictIdEnd)) return false;
+
+    // If engine fails to load the user dictionary, reset the user dictionary
+    // to NULL.
+    if (!user_dict_->load_dict(fn_usr_dict, kUserDictIdStart, kUserDictIdEnd)) {
+        delete user_dict_;
+        user_dict_ = NULL;
+    } else {
+        user_dict_->set_total_lemma_count_of_others(NGram::kSysDictTotalFreq);
+    }
+
+    reset_search0();
+
+    inited_ = true;
+    return true;
+}
+
+bool MatrixSearch::init_fd(int sys_fd, long start_offset, long length, const char *fn_usr_dict) {
+    if (NULL == fn_usr_dict) return false;
+
+    if (!alloc_resource()) return false;
+
+    if (!dict_trie_->load_dict_fd(sys_fd, start_offset, length, 1, kSysDictIdEnd)) return false;
+
+    if (!user_dict_->load_dict(fn_usr_dict, kUserDictIdStart, kUserDictIdEnd)) {
+        delete user_dict_;
+        user_dict_ = NULL;
+    } else {
+        user_dict_->set_total_lemma_count_of_others(NGram::kSysDictTotalFreq);
+    }
+
+    reset_search0();
+
+    inited_ = true;
+    return true;
+}
+
+void MatrixSearch::set_max_lens(size_t max_sps_len, size_t max_hzs_len) {
+    if (0 != max_sps_len) max_sps_len_ = max_sps_len;
+    if (0 != max_hzs_len) max_hzs_len_ = max_hzs_len;
+}
+
+void MatrixSearch::close() {
+    flush_cache();
+    free_resource();
+    inited_ = false;
+}
+
+void MatrixSearch::flush_cache() {
+    if (NULL != user_dict_) user_dict_->flush_cache();
+}
+
+void MatrixSearch::set_xi_an_switch(bool xi_an_enabled) { xi_an_enabled_ = xi_an_enabled; }
+
+bool MatrixSearch::get_xi_an_switch() { return xi_an_enabled_; }
+
+bool MatrixSearch::reset_search() {
+    if (!inited_) return false;
+    return reset_search0();
+}
+
+bool MatrixSearch::reset_search0() {
+    if (!inited_) return false;
+
+    pys_decoded_len_ = 0;
+    mtrx_nd_pool_used_ = 0;
+    dmi_pool_used_ = 0;
+
+    // Get a MatrixNode from the pool
+    matrix_[0].mtrx_nd_pos = mtrx_nd_pool_used_;
+    matrix_[0].mtrx_nd_num = 1;
+    mtrx_nd_pool_used_ += 1;
+
+    // Update the node, and make it to be a starting node
+    MatrixNode *node = mtrx_nd_pool_ + matrix_[0].mtrx_nd_pos;
+    node->id = 0;
+    node->score = 0;
+    node->from = NULL;
+    node->step = 0;
+    node->dmi_fr = (PoolPosType)-1;
+
+    matrix_[0].dmi_pos = 0;
+    matrix_[0].dmi_num = 0;
+    matrix_[0].dmi_has_full_id = 1;
+    matrix_[0].mtrx_nd_fixed = node;
+
+    lma_start_[0] = 0;
+    fixed_lmas_ = 0;
+    spl_start_[0] = 0;
+    fixed_hzs_ = 0;
+
+    dict_trie_->reset_milestones(0, 0);
+    if (NULL != user_dict_) user_dict_->reset_milestones(0, 0);
+
+    return true;
+}
+
+bool MatrixSearch::reset_search(size_t ch_pos, bool clear_fixed_this_step, bool clear_dmi_this_step, bool clear_mtrx_this_step) {
+    if (!inited_ || ch_pos > pys_decoded_len_ || ch_pos >= kMaxRowNum) return false;
+
+    if (0 == ch_pos) {
+        reset_search0();
+    } else {
+        // Prepare mile stones of this step to clear.
+        MileStoneHandle *dict_handles_to_clear = NULL;
+        if (clear_dmi_this_step && matrix_[ch_pos].dmi_num > 0) {
+            dict_handles_to_clear = dmi_pool_[matrix_[ch_pos].dmi_pos].dict_handles;
+        }
+
+        // If there are more steps, and this step is not allowed to clear, find
+        // milestones of next step.
+        if (pys_decoded_len_ > ch_pos && !clear_dmi_this_step) {
+            dict_handles_to_clear = NULL;
+            if (matrix_[ch_pos + 1].dmi_num > 0) {
+                dict_handles_to_clear = dmi_pool_[matrix_[ch_pos + 1].dmi_pos].dict_handles;
+            }
+        }
+
+        if (NULL != dict_handles_to_clear) {
+            dict_trie_->reset_milestones(ch_pos, dict_handles_to_clear[0]);
+            if (NULL != user_dict_) user_dict_->reset_milestones(ch_pos, dict_handles_to_clear[1]);
+        }
+
+        pys_decoded_len_ = ch_pos;
+
+        if (clear_dmi_this_step) {
+            dmi_pool_used_ = matrix_[ch_pos - 1].dmi_pos + matrix_[ch_pos - 1].dmi_num;
+            matrix_[ch_pos].dmi_num = 0;
+        } else {
+            dmi_pool_used_ = matrix_[ch_pos].dmi_pos + matrix_[ch_pos].dmi_num;
+        }
+
+        if (clear_mtrx_this_step) {
+            mtrx_nd_pool_used_ = matrix_[ch_pos - 1].mtrx_nd_pos + matrix_[ch_pos - 1].mtrx_nd_num;
+            matrix_[ch_pos].mtrx_nd_num = 0;
+        } else {
+            mtrx_nd_pool_used_ = matrix_[ch_pos].mtrx_nd_pos + matrix_[ch_pos].mtrx_nd_num;
+        }
+
+        // Modify fixed_hzs_
+        if (fixed_hzs_ > 0 && ((kLemmaIdComposing != lma_id_[0]) || (kLemmaIdComposing == lma_id_[0] && spl_start_[c_phrase_.length] <= ch_pos))) {
+            size_t fixed_ch_pos = ch_pos;
+            if (clear_fixed_this_step) fixed_ch_pos = fixed_ch_pos > 0 ? fixed_ch_pos - 1 : 0;
+            while (NULL == matrix_[fixed_ch_pos].mtrx_nd_fixed && fixed_ch_pos > 0) fixed_ch_pos--;
+
+            fixed_lmas_ = 0;
+            fixed_hzs_ = 0;
+            if (fixed_ch_pos > 0) {
+                while (spl_start_[fixed_hzs_] < fixed_ch_pos) fixed_hzs_++;
+                assert(spl_start_[fixed_hzs_] == fixed_ch_pos);
+
+                while (lma_start_[fixed_lmas_] < fixed_hzs_) fixed_lmas_++;
+                assert(lma_start_[fixed_lmas_] == fixed_hzs_);
+            }
+
+            // Re-search the Pinyin string for the unlocked lemma
+            // which was previously fixed.
+            //
+            // Prepare mile stones of this step to clear.
+            MileStoneHandle *dict_handles_to_clear = NULL;
+            if (clear_dmi_this_step && ch_pos == fixed_ch_pos && matrix_[fixed_ch_pos].dmi_num > 0) {
+                dict_handles_to_clear = dmi_pool_[matrix_[fixed_ch_pos].dmi_pos].dict_handles;
+            }
+
+            // If there are more steps, and this step is not allowed to clear, find
+            // milestones of next step.
+            if (pys_decoded_len_ > fixed_ch_pos && !clear_dmi_this_step) {
+                dict_handles_to_clear = NULL;
+                if (matrix_[fixed_ch_pos + 1].dmi_num > 0) {
+                    dict_handles_to_clear = dmi_pool_[matrix_[fixed_ch_pos + 1].dmi_pos].dict_handles;
+                }
+            }
+
+            if (NULL != dict_handles_to_clear) {
+                dict_trie_->reset_milestones(fixed_ch_pos, dict_handles_to_clear[0]);
+                if (NULL != user_dict_) user_dict_->reset_milestones(fixed_ch_pos, dict_handles_to_clear[1]);
+            }
+
+            pys_decoded_len_ = fixed_ch_pos;
+
+            if (clear_dmi_this_step && ch_pos == fixed_ch_pos) {
+                dmi_pool_used_ = matrix_[fixed_ch_pos - 1].dmi_pos + matrix_[fixed_ch_pos - 1].dmi_num;
+                matrix_[fixed_ch_pos].dmi_num = 0;
+            } else {
+                dmi_pool_used_ = matrix_[fixed_ch_pos].dmi_pos + matrix_[fixed_ch_pos].dmi_num;
+            }
+
+            if (clear_mtrx_this_step && ch_pos == fixed_ch_pos) {
+                mtrx_nd_pool_used_ = matrix_[fixed_ch_pos - 1].mtrx_nd_pos + matrix_[fixed_ch_pos - 1].mtrx_nd_num;
+                matrix_[fixed_ch_pos].mtrx_nd_num = 0;
+            } else {
+                mtrx_nd_pool_used_ = matrix_[fixed_ch_pos].mtrx_nd_pos + matrix_[fixed_ch_pos].mtrx_nd_num;
+            }
+
+            for (uint16 re_pos = fixed_ch_pos; re_pos < ch_pos; re_pos++) {
+                add_char(pys_[re_pos]);
+            }
+        } else if (fixed_hzs_ > 0 && kLemmaIdComposing == lma_id_[0]) {
+            for (uint16 subpos = 0; subpos < c_phrase_.sublma_num; subpos++) {
+                uint16 splpos_begin = c_phrase_.sublma_start[subpos];
+                uint16 splpos_end = c_phrase_.sublma_start[subpos + 1];
+                for (uint16 splpos = splpos_begin; splpos < splpos_end; splpos++) {
+                    // If ch_pos is in this spelling
+                    uint16 spl_start = c_phrase_.spl_start[splpos];
+                    uint16 spl_end = c_phrase_.spl_start[splpos + 1];
+                    if (ch_pos >= spl_start && ch_pos < spl_end) {
+                        // Clear everything after this position
+                        c_phrase_.chn_str[splpos] = static_cast<char16>('\0');
+                        c_phrase_.sublma_start[subpos + 1] = splpos;
+                        c_phrase_.sublma_num = subpos + 1;
+                        c_phrase_.length = splpos;
+
+                        if (splpos == splpos_begin) {
+                            c_phrase_.sublma_num = subpos;
+                        }
+                    }
+                }
+            }
+
+            // Extend the composing phrase.
+            reset_search0();
+            dmi_c_phrase_ = true;
+            uint16 c_py_pos = 0;
+            while (c_py_pos < spl_start_[c_phrase_.length]) {
+                bool b_ac_tmp = add_char(pys_[c_py_pos]);
+                assert(b_ac_tmp);
+                c_py_pos++;
+            }
+            dmi_c_phrase_ = false;
+
+            lma_id_num_ = 1;
+            fixed_lmas_ = 1;
+            fixed_lmas_no1_[0] = 0;  // A composing string is always modified.
+            fixed_hzs_ = c_phrase_.length;
+            lma_start_[1] = fixed_hzs_;
+            lma_id_[0] = kLemmaIdComposing;
+            matrix_[spl_start_[fixed_hzs_]].mtrx_nd_fixed = mtrx_nd_pool_ + matrix_[spl_start_[fixed_hzs_]].mtrx_nd_pos;
+        }
+    }
+
+    return true;
+}
+
+void MatrixSearch::del_in_pys(size_t start, size_t len) {
+    while (start < kMaxRowNum - len && '\0' != pys_[start]) {
+        pys_[start] = pys_[start + len];
+        start++;
+    }
+}
+
+size_t MatrixSearch::search(const char *py, size_t py_len) {
+    if (!inited_ || NULL == py) return 0;
+
+    // If the search Pinyin string is too long, it will be truncated.
+    if (py_len > kMaxRowNum - 1) py_len = kMaxRowNum - 1;
+
+    // Compare the new string with the previous one. Find their prefix to
+    // increase search efficiency.
+    size_t ch_pos = 0;
+    for (ch_pos = 0; ch_pos < pys_decoded_len_; ch_pos++) {
+        if ('\0' == py[ch_pos] || py[ch_pos] != pys_[ch_pos]) break;
+    }
+
+    bool clear_fix = true;
+    if (ch_pos == pys_decoded_len_) clear_fix = false;
+
+    reset_search(ch_pos, clear_fix, false, false);
+
+    memcpy(pys_ + ch_pos, py + ch_pos, py_len - ch_pos);
+    pys_[py_len] = '\0';
+
+    while ('\0' != pys_[ch_pos]) {
+        if (!add_char(py[ch_pos])) {
+            pys_decoded_len_ = ch_pos;
+            break;
+        }
+        ch_pos++;
+    }
+
+    // Get spelling ids and starting positions.
+    get_spl_start_id();
+
+    // If there are too many spellings, remove the last letter until the spelling
+    // number is acceptable.
+    while (spl_id_num_ > 9) {
+        py_len--;
+        reset_search(py_len, false, false, false);
+        pys_[py_len] = '\0';
+        get_spl_start_id();
+    }
+
+    prepare_candidates();
+
+    if (kPrintDebug0) {
+        printf("--Matrix Node Pool Used: %d\n", mtrx_nd_pool_used_);
+        printf("--DMI Pool Used: %d\n", dmi_pool_used_);
+
+        if (kPrintDebug1) {
+            for (PoolPosType pos = 0; pos < dmi_pool_used_; pos++) {
+                debug_print_dmi(pos, 1);
+            }
+        }
+    }
+
+    return ch_pos;
+}
+
+size_t MatrixSearch::delsearch(size_t pos, bool is_pos_in_splid, bool clear_fixed_this_step) {
+    if (!inited_) return 0;
+
+    size_t reset_pos = pos;
+
+    // Out of range for both Pinyin mode and Spelling id mode.
+    if (pys_decoded_len_ <= pos) {
+        del_in_pys(pos, 1);
+
+        reset_pos = pys_decoded_len_;
+        // Decode the string after the un-decoded position
+        while ('\0' != pys_[reset_pos]) {
+            if (!add_char(pys_[reset_pos])) {
+                pys_decoded_len_ = reset_pos;
+                break;
+            }
+            reset_pos++;
+        }
+        get_spl_start_id();
+        prepare_candidates();
+        return pys_decoded_len_;
+    }
+
+    // Spelling id mode, but out of range.
+    if (is_pos_in_splid && spl_id_num_ <= pos) return pys_decoded_len_;
+
+    // Begin to handle two modes respectively.
+    // Pinyin mode by default
+    size_t c_py_len = 0;  // The length of composing phrase's Pinyin
+    size_t del_py_len = 1;
+    if (!is_pos_in_splid) {
+        // Pinyin mode is only allowed to delete beyond the fixed lemmas.
+        if (fixed_lmas_ > 0 && pos < spl_start_[lma_start_[fixed_lmas_]]) return pys_decoded_len_;
+
+        del_in_pys(pos, 1);
+
+        // If the deleted character is just the one after the last fixed lemma
+        if (pos == spl_start_[lma_start_[fixed_lmas_]]) {
+            // If all fixed lemmas have been merged, and the caller of the function
+            // request to unlock the last fixed lemma.
+            if (kLemmaIdComposing == lma_id_[0] && clear_fixed_this_step) {
+                // Unlock the last sub lemma in the composing phrase. Because it is not
+                // easy to unlock it directly. Instead, we re-decode the modified
+                // composing phrase.
+                c_phrase_.sublma_num--;
+                c_phrase_.length = c_phrase_.sublma_start[c_phrase_.sublma_num];
+                reset_pos = spl_start_[c_phrase_.length];
+                c_py_len = reset_pos;
+            }
+        }
+    } else {
+        del_py_len = spl_start_[pos + 1] - spl_start_[pos];
+
+        del_in_pys(spl_start_[pos], del_py_len);
+
+        if (pos >= lma_start_[fixed_lmas_]) {
+            c_py_len = 0;
+            reset_pos = spl_start_[pos + 1] - del_py_len;
+        } else {
+            c_py_len = spl_start_[lma_start_[fixed_lmas_]] - del_py_len;
+            reset_pos = c_py_len;
+            if (c_py_len > 0) merge_fixed_lmas(pos);
+        }
+    }
+
+    if (c_py_len > 0) {
+        assert(c_phrase_.length > 0 && c_py_len == c_phrase_.spl_start[c_phrase_.sublma_start[c_phrase_.sublma_num]]);
+        // The composing phrase is valid, reset all search space,
+        // and begin a new search which will only extend the composing
+        // phrase.
+        reset_search0();
+
+        dmi_c_phrase_ = true;
+        // Extend the composing phrase.
+        uint16 c_py_pos = 0;
+        while (c_py_pos < c_py_len) {
+            bool b_ac_tmp = add_char(pys_[c_py_pos]);
+            assert(b_ac_tmp);
+            c_py_pos++;
+        }
+        dmi_c_phrase_ = false;
+
+        // Fixd the composing phrase as the first choice.
+        lma_id_num_ = 1;
+        fixed_lmas_ = 1;
+        fixed_lmas_no1_[0] = 0;  // A composing string is always modified.
+        fixed_hzs_ = c_phrase_.length;
+        lma_start_[1] = fixed_hzs_;
+        lma_id_[0] = kLemmaIdComposing;
+        matrix_[spl_start_[fixed_hzs_]].mtrx_nd_fixed = mtrx_nd_pool_ + matrix_[spl_start_[fixed_hzs_]].mtrx_nd_pos;
+    } else {
+        // Reseting search only clear pys_decoded_len_, but the string is kept.
+        reset_search(reset_pos, clear_fixed_this_step, false, false);
+    }
+
+    // Decode the string after the delete position.
+    while ('\0' != pys_[reset_pos]) {
+        if (!add_char(pys_[reset_pos])) {
+            pys_decoded_len_ = reset_pos;
+            break;
+        }
+        reset_pos++;
+    }
+
+    get_spl_start_id();
+    prepare_candidates();
+    return pys_decoded_len_;
+}
+
+size_t MatrixSearch::get_candidate_num() {
+    if (!inited_ || 0 == pys_decoded_len_ || 0 == matrix_[pys_decoded_len_].mtrx_nd_num) return 0;
+
+    return 1 + lpi_total_;
+}
+
+char16 *MatrixSearch::get_candidate(size_t cand_id, char16 *cand_str, size_t max_len) {
+    if (!inited_ || 0 == pys_decoded_len_ || NULL == cand_str) return NULL;
+
+    if (0 == cand_id) {
+        return get_candidate0(cand_str, max_len, NULL, false);
+    } else {
+        cand_id--;
+    }
+
+    // For this case: the current sentence is a word only, and the user fixed it,
+    // so the result will be fixed to the sentence space, and
+    // lpi_total_ will be set to 0.
+    if (0 == lpi_total_) {
+        return get_candidate0(cand_str, max_len, NULL, false);
+    }
+
+    LemmaIdType id = lpi_items_[cand_id].id;
+    char16 s[kMaxLemmaSize + 1];
+
+    uint16 s_len = lpi_items_[cand_id].lma_len;
+    if (s_len > 1) {
+        s_len = get_lemma_str(id, s, kMaxLemmaSize + 1);
+    } else {
+        // For a single character, Hanzi is ready.
+        s[0] = lpi_items_[cand_id].hanzi;
+        s[1] = static_cast<char16>(0);
+    }
+
+    if (s_len > 0 && max_len > s_len) {
+        utf16_strncpy(cand_str, s, s_len);
+        cand_str[s_len] = (char16)'\0';
+        return cand_str;
+    }
+
+    return NULL;
+}
+
+void MatrixSearch::update_dict_freq() {
+    if (NULL != user_dict_) {
+        // Update the total frequency of all lemmas, including system lemmas and
+        // user dictionary lemmas.
+        size_t total_freq = user_dict_->get_total_lemma_count();
+        dict_trie_->set_total_lemma_count_of_others(total_freq);
+    }
+}
+
+bool MatrixSearch::add_lma_to_userdict(uint16 lma_fr, uint16 lma_to, float score) {
+    if (lma_to - lma_fr <= 1 || NULL == user_dict_) return false;
+
+    char16 word_str[kMaxLemmaSize + 1];
+    uint16 spl_ids[kMaxLemmaSize];
+
+    uint16 spl_id_fr = 0;
+
+    for (uint16 pos = lma_fr; pos < lma_to; pos++) {
+        LemmaIdType lma_id = lma_id_[pos];
+        if (is_user_lemma(lma_id)) {
+            user_dict_->update_lemma(lma_id, 1, true);
+        }
+        uint16 lma_len = lma_start_[pos + 1] - lma_start_[pos];
+        utf16_strncpy(spl_ids + spl_id_fr, spl_id_ + lma_start_[pos], lma_len);
+
+        uint16 tmp = get_lemma_str(lma_id, word_str + spl_id_fr, kMaxLemmaSize + 1 - spl_id_fr);
+        assert(tmp == lma_len);
+
+        tmp = get_lemma_splids(lma_id, spl_ids + spl_id_fr, lma_len, true);
+        if (tmp != lma_len) {
+            return false;
+        }
+
+        spl_id_fr += lma_len;
+    }
+
+    assert(spl_id_fr <= kMaxLemmaSize);
+
+    return user_dict_->put_lemma(static_cast<char16 *>(word_str), spl_ids, spl_id_fr, 1);
+}
+
+void MatrixSearch::debug_print_dmi(PoolPosType dmi_pos, uint16 nest_level) {
+    if (dmi_pos >= dmi_pool_used_) return;
+
+    DictMatchInfo *dmi = dmi_pool_ + dmi_pos;
+
+    if (1 == nest_level) {
+        printf("-----------------%d\'th DMI node begin----------->\n", dmi_pos);
+    }
+    if (dmi->dict_level > 1) {
+        debug_print_dmi(dmi->dmi_fr, nest_level + 1);
+    }
+    printf("---%d\n", dmi->dict_level);
+    printf(" MileStone: %x, %x\n", dmi->dict_handles[0], dmi->dict_handles[1]);
+    printf(" Spelling : %s, %d\n", SpellingTrie::get_instance().get_spelling_str(dmi->spl_id), dmi->spl_id);
+    printf(" Total Pinyin Len: %d\n", dmi->splstr_len);
+    if (1 == nest_level) {
+        printf("<----------------%d\'th DMI node end--------------\n\n", dmi_pos);
+    }
+}
+
+bool MatrixSearch::try_add_cand0_to_userdict() {
+    size_t new_cand_num = get_candidate_num();
+    if (fixed_hzs_ > 0 && 1 == new_cand_num) {
+        float score_from = 0;
+        uint16 lma_id_from = 0;
+        uint16 pos = 0;
+        bool modified = false;
+        while (pos < fixed_lmas_) {
+            if (lma_start_[pos + 1] - lma_start_[lma_id_from] > static_cast<uint16>(kMaxLemmaSize)) {
+                float score_to_add = mtrx_nd_pool_[matrix_[spl_start_[lma_start_[pos]]].mtrx_nd_pos].score - score_from;
+                if (modified) {
+                    score_to_add += 1.0;
+                    if (score_to_add > NGram::kMaxScore) {
+                        score_to_add = NGram::kMaxScore;
+                    }
+                    add_lma_to_userdict(lma_id_from, pos, score_to_add);
+                }
+                lma_id_from = pos;
+                score_from += score_to_add;
+
+                // Clear the flag for next user lemma.
+                modified = false;
+            }
+
+            if (0 == fixed_lmas_no1_[pos]) {
+                modified = true;
+            }
+            pos++;
+        }
+
+        // Single-char word is not allowed to add to userdict.
+        if (lma_start_[pos] - lma_start_[lma_id_from] > 1) {
+            float score_to_add = mtrx_nd_pool_[matrix_[spl_start_[lma_start_[pos]]].mtrx_nd_pos].score - score_from;
+            if (modified) {
+                score_to_add += 1.0;
+                if (score_to_add > NGram::kMaxScore) {
+                    score_to_add = NGram::kMaxScore;
+                }
+                add_lma_to_userdict(lma_id_from, pos, score_to_add);
+            }
+        }
+    }
+    return true;
+}
+
+// Choose a candidate, and give new candidates for next step.
+// If user finishes selection, we will try to communicate with user dictionary
+// to add new items or update score of some existing items.
+//
+// Basic rule:
+// 1. If user selects the first choice:
+//    1.1. If the first choice is not a sentence, instead, it is a lemma:
+//         1.1.1. If the first choice is a user lemma, notify the user
+//                dictionary that a user lemma is hit, and add occuring count
+//                by 1.
+//         1.1.2. If the first choice is a system lemma, do nothing.
+//    1.2. If the first choice is a sentence containing more than one lemma:
+//         1.2.1. The whole sentence will be added as a user lemma. If the
+//                sentence contains user lemmas, -> hit, and add occuring count
+//                by 1.
+size_t MatrixSearch::choose(size_t cand_id) {
+    if (!inited_ || 0 == pys_decoded_len_) return 0;
+
+    if (0 == cand_id) {
+        fixed_hzs_ = spl_id_num_;
+        matrix_[spl_start_[fixed_hzs_]].mtrx_nd_fixed = mtrx_nd_pool_ + matrix_[spl_start_[fixed_hzs_]].mtrx_nd_pos;
+        for (size_t pos = fixed_lmas_; pos < lma_id_num_; pos++) {
+            fixed_lmas_no1_[pos] = 1;
+        }
+        fixed_lmas_ = lma_id_num_;
+        lpi_total_ = 0;  // Clean all other candidates.
+
+        // 1. It is the first choice
+        if (1 == lma_id_num_) {
+            // 1.1. The first choice is not a sentence but a lemma
+            if (is_user_lemma(lma_id_[0])) {
+                // 1.1.1. The first choice is a user lemma, notify the user dictionary
+                // that it is hit.
+                if (NULL != user_dict_) user_dict_->update_lemma(lma_id_[0], 1, true);
+            } else {
+                // 1.1.2. do thing for a system lemma.
+            }
+        } else {
+            // 1.2. The first choice is a sentence.
+            // 1.2.1 Try to add the whole sentence to user dictionary, the whole
+            // sentence may be splitted into many items.
+            if (NULL != user_dict_) {
+                try_add_cand0_to_userdict();
+            }
+        }
+        update_dict_freq();
+        return 1;
+    } else {
+        cand_id--;
+    }
+
+    // 2. It is not the full sentence candidate.
+    // Find the length of the candidate.
+    LemmaIdType id_chosen = lpi_items_[cand_id].id;
+    LmaScoreType score_chosen = lpi_items_[cand_id].psb;
+    size_t cand_len = lpi_items_[cand_id].lma_len;
+
+    assert(cand_len > 0);
+
+    // Notify the atom dictionary that this item is hit.
+    if (is_user_lemma(id_chosen)) {
+        if (NULL != user_dict_) {
+            user_dict_->update_lemma(id_chosen, 1, true);
+        }
+        update_dict_freq();
+    }
+
+    // 3. Fixed the chosen item.
+    // 3.1 Get the steps number.
+    size_t step_fr = spl_start_[fixed_hzs_];
+    size_t step_to = spl_start_[fixed_hzs_ + cand_len];
+
+    // 3.2 Save the length of the original string.
+    size_t pys_decoded_len = pys_decoded_len_;
+
+    // 3.2 Reset the space of the fixed part.
+    reset_search(step_to, false, false, true);
+
+    // 3.3 For the last character of the fixed part, the previous DMI
+    // information will be kept, while the MTRX information will be re-extended,
+    // and only one node will be extended.
+    matrix_[step_to].mtrx_nd_num = 0;
+
+    LmaPsbItem lpi_item;
+    lpi_item.psb = score_chosen;
+    lpi_item.id = id_chosen;
+
+    PoolPosType step_to_dmi_fr = match_dmi(step_to, spl_id_ + fixed_hzs_, cand_len);
+    assert(step_to_dmi_fr != static_cast<PoolPosType>(-1));
+
+    extend_mtrx_nd(matrix_[step_fr].mtrx_nd_fixed, &lpi_item, 1, step_to_dmi_fr, step_to);
+
+    matrix_[step_to].mtrx_nd_fixed = mtrx_nd_pool_ + matrix_[step_to].mtrx_nd_pos;
+    mtrx_nd_pool_used_ = matrix_[step_to].mtrx_nd_pos + matrix_[step_to].mtrx_nd_num;
+
+    if (id_chosen == lma_id_[fixed_lmas_])
+        fixed_lmas_no1_[fixed_lmas_] = 1;
+    else
+        fixed_lmas_no1_[fixed_lmas_] = 0;
+    lma_id_[fixed_lmas_] = id_chosen;
+    lma_start_[fixed_lmas_ + 1] = lma_start_[fixed_lmas_] + cand_len;
+    fixed_lmas_++;
+    fixed_hzs_ = fixed_hzs_ + cand_len;
+
+    while (step_to != pys_decoded_len) {
+        bool b = add_char(pys_[step_to]);
+        assert(b);
+        step_to++;
+    }
+
+    if (fixed_hzs_ < spl_id_num_) {
+        prepare_candidates();
+    } else {
+        lpi_total_ = 0;
+        if (NULL != user_dict_) {
+            try_add_cand0_to_userdict();
+        }
+    }
+
+    return get_candidate_num();
+}
+
+size_t MatrixSearch::cancel_last_choice() {
+    if (!inited_ || 0 == pys_decoded_len_) return 0;
+
+    size_t step_start = 0;
+    if (fixed_hzs_ > 0) {
+        size_t step_end = spl_start_[fixed_hzs_];
+        MatrixNode *end_node = matrix_[step_end].mtrx_nd_fixed;
+        assert(NULL != end_node);
+
+        step_start = end_node->from->step;
+
+        if (step_start > 0) {
+            DictMatchInfo *dmi = dmi_pool_ + end_node->dmi_fr;
+            fixed_hzs_ -= dmi->dict_level;
+        } else {
+            fixed_hzs_ = 0;
+        }
+
+        reset_search(step_start, false, false, false);
+
+        while (pys_[step_start] != '\0') {
+            bool b = add_char(pys_[step_start]);
+            assert(b);
+            step_start++;
+        }
+
+        prepare_candidates();
+    }
+    return get_candidate_num();
+}
+
+size_t MatrixSearch::get_fixedlen() {
+    if (!inited_ || 0 == pys_decoded_len_) return 0;
+    return fixed_hzs_;
+}
+
+bool MatrixSearch::prepare_add_char(char ch) {
+    if (pys_decoded_len_ >= kMaxRowNum - 1 || (!spl_parser_->is_valid_to_parse(ch) && ch != '\'')) return false;
+
+    if (dmi_pool_used_ >= kDmiPoolSize) return false;
+
+    pys_[pys_decoded_len_] = ch;
+    pys_decoded_len_++;
+
+    MatrixRow *mtrx_this_row = matrix_ + pys_decoded_len_;
+    mtrx_this_row->mtrx_nd_pos = mtrx_nd_pool_used_;
+    mtrx_this_row->mtrx_nd_num = 0;
+    mtrx_this_row->dmi_pos = dmi_pool_used_;
+    mtrx_this_row->dmi_num = 0;
+    mtrx_this_row->dmi_has_full_id = 0;
+
+    return true;
+}
+
+bool MatrixSearch::is_split_at(uint16 pos) { return !spl_parser_->is_valid_to_parse(pys_[pos - 1]); }
+
+void MatrixSearch::fill_dmi(DictMatchInfo *dmi, MileStoneHandle *handles, PoolPosType dmi_fr, uint16 spl_id, uint16 node_num, unsigned char dict_level, bool splid_end_split, unsigned char splstr_len, unsigned char all_full_id) {
+    dmi->dict_handles[0] = handles[0];
+    dmi->dict_handles[1] = handles[1];
+    dmi->dmi_fr = dmi_fr;
+    dmi->spl_id = spl_id;
+    dmi->dict_level = dict_level;
+    dmi->splid_end_split = splid_end_split ? 1 : 0;
+    dmi->splstr_len = splstr_len;
+    dmi->all_full_id = all_full_id;
+    dmi->c_phrase = 0;
+}
+
+bool MatrixSearch::add_char(char ch) {
+    if (!prepare_add_char(ch)) return false;
+    return add_char_qwerty();
+}
+
+bool MatrixSearch::add_char_qwerty() {
+    matrix_[pys_decoded_len_].mtrx_nd_num = 0;
+
+    bool spl_matched = false;
+    uint16 longest_ext = 0;
+    // Extend the search matrix, from the oldest unfixed row. ext_len means
+    // extending length.
+    for (uint16 ext_len = kMaxPinyinSize + 1; ext_len > 0; ext_len--) {
+        if (ext_len > pys_decoded_len_ - spl_start_[fixed_hzs_]) continue;
+
+        // Refer to the declaration of the variable dmi_has_full_id for the
+        // explanation of this piece of code. In one word, it is used to prevent
+        // from the unwise extending of "shoud ou" but allow the reasonable
+        // extending of "heng ao", "lang a", etc.
+        if (ext_len > 1 && 0 != longest_ext && 0 == matrix_[pys_decoded_len_ - ext_len].dmi_has_full_id) {
+            if (xi_an_enabled_)
+                continue;
+            else
+                break;
+        }
+
+        uint16 oldrow = pys_decoded_len_ - ext_len;
+
+        // 0. If that row is before the last fixed step, ignore.
+        if (spl_start_[fixed_hzs_] > oldrow) continue;
+
+        // 1. Check if that old row has valid MatrixNode. If no, means that row is
+        // not a boundary, either a word boundary or a spelling boundary.
+        // If it is for extending composing phrase, it's OK to ignore the 0.
+        if (0 == matrix_[oldrow].mtrx_nd_num && !dmi_c_phrase_) continue;
+
+        // 2. Get spelling id(s) for the last ext_len chars.
+        uint16 spl_idx;
+        bool is_pre = false;
+        spl_idx = spl_parser_->get_splid_by_str(pys_ + oldrow, ext_len, &is_pre);
+        if (is_pre) spl_matched = true;
+
+        if (0 == spl_idx) continue;
+
+        bool splid_end_split = is_split_at(oldrow + ext_len);
+
+        // 3. Extend the DMI nodes of that old row
+        // + 1 is to extend an extra node from the root
+        for (PoolPosType dmi_pos = matrix_[oldrow].dmi_pos; dmi_pos < matrix_[oldrow].dmi_pos + matrix_[oldrow].dmi_num + 1; dmi_pos++) {
+            DictMatchInfo *dmi = dmi_pool_ + dmi_pos;
+            if (dmi_pos == matrix_[oldrow].dmi_pos + matrix_[oldrow].dmi_num) {
+                dmi = NULL;  // The last one, NULL means extending from the root.
+            } else {
+                // If the dmi is covered by the fixed arrange, ignore it.
+                if (fixed_hzs_ > 0 && pys_decoded_len_ - ext_len - dmi->splstr_len < spl_start_[fixed_hzs_]) {
+                    continue;
+                }
+                // If it is not in mode for composing phrase, and the source DMI node
+                // is marked for composing phrase, ignore this node.
+                if (dmi->c_phrase != 0 && !dmi_c_phrase_) {
+                    continue;
+                }
+            }
+
+            // For example, if "gao" is extended, "g ao" is not allowed.
+            // or "zh" has been passed, "z h" is not allowed.
+            // Both word and word-connection will be prevented.
+            if (longest_ext > ext_len) {
+                if (NULL == dmi && 0 == matrix_[oldrow].dmi_has_full_id) {
+                    continue;
+                }
+
+                // "z h" is not allowed.
+                if (NULL != dmi && spl_trie_->is_half_id(dmi->spl_id)) {
+                    continue;
+                }
+            }
+
+            dep_->splids_extended = 0;
+            if (NULL != dmi) {
+                uint16 prev_ids_num = dmi->dict_level;
+                if ((!dmi_c_phrase_ && prev_ids_num >= kMaxLemmaSize) || (dmi_c_phrase_ && prev_ids_num >= kMaxRowNum)) {
+                    continue;
+                }
+
+                DictMatchInfo *d = dmi;
+                while (d) {
+                    dep_->splids[--prev_ids_num] = d->spl_id;
+                    if ((PoolPosType)-1 == d->dmi_fr) break;
+                    d = dmi_pool_ + d->dmi_fr;
+                }
+                assert(0 == prev_ids_num);
+                dep_->splids_extended = dmi->dict_level;
+            }
+            dep_->splids[dep_->splids_extended] = spl_idx;
+            dep_->ext_len = ext_len;
+            dep_->splid_end_split = splid_end_split;
+
+            dep_->id_num = 1;
+            dep_->id_start = spl_idx;
+            if (spl_trie_->is_half_id(spl_idx)) {
+                // Get the full id list
+                dep_->id_num = spl_trie_->half_to_full(spl_idx, &(dep_->id_start));
+                assert(dep_->id_num > 0);
+            }
+
+            uint16 new_dmi_num;
+
+            new_dmi_num = extend_dmi(dep_, dmi);
+
+            if (new_dmi_num > 0) {
+                if (dmi_c_phrase_) {
+                    dmi_pool_[dmi_pool_used_].c_phrase = 1;
+                }
+                matrix_[pys_decoded_len_].dmi_num += new_dmi_num;
+                dmi_pool_used_ += new_dmi_num;
+
+                if (!spl_trie_->is_half_id(spl_idx)) matrix_[pys_decoded_len_].dmi_has_full_id = 1;
+            }
+
+            // If get candiate lemmas, try to extend the path
+            if (lpi_total_ > 0) {
+                uint16 fr_row;
+                if (NULL == dmi) {
+                    fr_row = oldrow;
+                } else {
+                    assert(oldrow >= dmi->splstr_len);
+                    fr_row = oldrow - dmi->splstr_len;
+                }
+                for (PoolPosType mtrx_nd_pos = matrix_[fr_row].mtrx_nd_pos; mtrx_nd_pos < matrix_[fr_row].mtrx_nd_pos + matrix_[fr_row].mtrx_nd_num; mtrx_nd_pos++) {
+                    MatrixNode *mtrx_nd = mtrx_nd_pool_ + mtrx_nd_pos;
+
+                    extend_mtrx_nd(mtrx_nd, lpi_items_, lpi_total_, dmi_pool_used_ - new_dmi_num, pys_decoded_len_);
+                    if (longest_ext == 0) longest_ext = ext_len;
+                }
+            }
+        }  // for dmi_pos
+    }  // for ext_len
+    mtrx_nd_pool_used_ += matrix_[pys_decoded_len_].mtrx_nd_num;
+
+    if (dmi_c_phrase_) return true;
+
+    return (matrix_[pys_decoded_len_].mtrx_nd_num != 0 || spl_matched);
+}
+
+void MatrixSearch::prepare_candidates() {
+    // Get candiates from the first un-fixed step.
+    uint16 lma_size_max = kMaxLemmaSize;
+    if (lma_size_max > spl_id_num_ - fixed_hzs_) lma_size_max = spl_id_num_ - fixed_hzs_;
+
+    uint16 lma_size = lma_size_max;
+
+    // If the full sentense candidate's unfixed part may be the same with a normal
+    // lemma. Remove the lemma candidate in this case.
+    char16 fullsent[kMaxLemmaSize + 1];
+    char16 *pfullsent = NULL;
+    uint16 sent_len;
+    pfullsent = get_candidate0(fullsent, kMaxLemmaSize + 1, &sent_len, true);
+
+    // If the unfixed part contains more than one ids, it is not necessary to
+    // check whether a lemma's string is the same to the unfixed part of the full
+    // sentence candidate, so, set it to NULL;
+    if (sent_len > kMaxLemmaSize) pfullsent = NULL;
+
+    lpi_total_ = 0;
+    size_t lpi_num_full_match = 0;  // Number of items which are fully-matched.
+    while (lma_size > 0) {
+        size_t lma_num;
+        lma_num = get_lpis(spl_id_ + fixed_hzs_, lma_size, lpi_items_ + lpi_total_, size_t(kMaxLmaPsbItems - lpi_total_), pfullsent, lma_size == lma_size_max);
+
+        if (lma_num > 0) {
+            lpi_total_ += lma_num;
+            // For next lemma candidates which are not the longest, it is not
+            // necessary to compare with the full sentence candiate.
+            pfullsent = NULL;
+        }
+        if (lma_size == lma_size_max) {
+            lpi_num_full_match = lpi_total_;
+        }
+        lma_size--;
+    }
+
+    // Sort those partially-matched items by their unified scores.
+    myqsort(lpi_items_ + lpi_num_full_match, lpi_total_ - lpi_num_full_match, sizeof(LmaPsbItem), cmp_lpi_with_unified_psb);
+
+    if (kPrintDebug0) {
+        printf("-----Prepare candidates, score:\n");
+        for (size_t a = 0; a < lpi_total_; a++) {
+            printf("[%03d]%d    ", a, lpi_items_[a].psb);
+            if ((a + 1) % 6 == 0) printf("\n");
+        }
+        printf("\n");
+    }
+
+    if (kPrintDebug0) {
+        printf("--- lpi_total_ = %d\n", lpi_total_);
+    }
+}
+
+const char *MatrixSearch::get_pystr(size_t *decoded_len) {
+    if (!inited_ || NULL == decoded_len) return NULL;
+
+    *decoded_len = pys_decoded_len_;
+    return pys_;
+}
+
+void MatrixSearch::merge_fixed_lmas(size_t del_spl_pos) {
+    if (fixed_lmas_ == 0) return;
+    // Update spelling segmentation information first.
+    spl_id_num_ -= 1;
+    uint16 del_py_len = spl_start_[del_spl_pos + 1] - spl_start_[del_spl_pos];
+    for (size_t pos = del_spl_pos; pos <= spl_id_num_; pos++) {
+        spl_start_[pos] = spl_start_[pos + 1] - del_py_len;
+        if (pos == spl_id_num_) break;
+        spl_id_[pos] = spl_id_[pos + 1];
+    }
+
+    // Begin to merge.
+    uint16 phrase_len = 0;
+
+    // Update the spelling ids to the composing phrase.
+    // We need to convert these ids into full id in the future.
+    memcpy(c_phrase_.spl_ids, spl_id_, spl_id_num_ * sizeof(uint16));
+    memcpy(c_phrase_.spl_start, spl_start_, (spl_id_num_ + 1) * sizeof(uint16));
+
+    // If composing phrase has not been created, first merge all fixed
+    //  lemmas into a composing phrase without deletion.
+    if (fixed_lmas_ > 1 || kLemmaIdComposing != lma_id_[0]) {
+        uint16 bp = 1;  // Begin position of real fixed lemmas.
+        // There is no existing composing phrase.
+        if (kLemmaIdComposing != lma_id_[0]) {
+            c_phrase_.sublma_num = 0;
+            bp = 0;
+        }
+
+        uint16 sub_num = c_phrase_.sublma_num;
+        for (uint16 pos = bp; pos <= fixed_lmas_; pos++) {
+            c_phrase_.sublma_start[sub_num + pos - bp] = lma_start_[pos];
+            if (lma_start_[pos] > del_spl_pos) {
+                c_phrase_.sublma_start[sub_num + pos - bp] -= 1;
+            }
+
+            if (pos == fixed_lmas_) break;
+
+            uint16 lma_len;
+            char16 *lma_str = c_phrase_.chn_str + c_phrase_.sublma_start[sub_num] + phrase_len;
+
+            lma_len = get_lemma_str(lma_id_[pos], lma_str, kMaxRowNum - phrase_len);
+            assert(lma_len == lma_start_[pos + 1] - lma_start_[pos]);
+            phrase_len += lma_len;
+        }
+        assert(phrase_len == lma_start_[fixed_lmas_]);
+        c_phrase_.length = phrase_len;  // will be deleted by 1
+        c_phrase_.sublma_num += fixed_lmas_ - bp;
+    } else {
+        for (uint16 pos = 0; pos <= c_phrase_.sublma_num; pos++) {
+            if (c_phrase_.sublma_start[pos] > del_spl_pos) {
+                c_phrase_.sublma_start[pos] -= 1;
+            }
+        }
+        phrase_len = c_phrase_.length;
+    }
+
+    assert(phrase_len > 0);
+    if (1 == phrase_len) {
+        // After the only one is deleted, nothing will be left.
+        fixed_lmas_ = 0;
+        return;
+    }
+
+    // Delete the Chinese character in the merged phrase.
+    // The corresponding elements in spl_ids and spl_start of the
+    // phrase have been deleted.
+    char16 *chn_str = c_phrase_.chn_str + del_spl_pos;
+    for (uint16 pos = 0; pos < c_phrase_.sublma_start[c_phrase_.sublma_num] - del_spl_pos; pos++) {
+        chn_str[pos] = chn_str[pos + 1];
+    }
+    c_phrase_.length -= 1;
+
+    // If the deleted spelling id is in a sub lemma which contains more than
+    // one id, del_a_sub will be false; but if the deleted id is in a sub lemma
+    // which only contains 1 id, the whole sub lemma needs to be deleted, so
+    // del_a_sub will be true.
+    bool del_a_sub = false;
+    for (uint16 pos = 1; pos <= c_phrase_.sublma_num; pos++) {
+        if (c_phrase_.sublma_start[pos - 1] == c_phrase_.sublma_start[pos]) {
+            del_a_sub = true;
+        }
+        if (del_a_sub) {
+            c_phrase_.sublma_start[pos - 1] = c_phrase_.sublma_start[pos];
+        }
+    }
+    if (del_a_sub) c_phrase_.sublma_num -= 1;
+
+    return;
+}
+
+void MatrixSearch::get_spl_start_id() {
+    lma_id_num_ = 0;
+    lma_start_[0] = 0;
+
+    spl_id_num_ = 0;
+    spl_start_[0] = 0;
+    if (!inited_ || 0 == pys_decoded_len_ || 0 == matrix_[pys_decoded_len_].mtrx_nd_num) return;
+
+    // Calculate number of lemmas and spellings
+    // Only scan those part which is not fixed.
+    lma_id_num_ = fixed_lmas_;
+    spl_id_num_ = fixed_hzs_;
+
+    MatrixNode *mtrx_nd = mtrx_nd_pool_ + matrix_[pys_decoded_len_].mtrx_nd_pos;
+    while (mtrx_nd != mtrx_nd_pool_) {
+        if (fixed_hzs_ > 0) {
+            if (mtrx_nd->step <= spl_start_[fixed_hzs_]) break;
+        }
+
+        // Update the spelling segamentation information
+        unsigned char word_splstr_len = 0;
+        PoolPosType dmi_fr = mtrx_nd->dmi_fr;
+        if ((PoolPosType)-1 != dmi_fr) word_splstr_len = dmi_pool_[dmi_fr].splstr_len;
+
+        while ((PoolPosType)-1 != dmi_fr) {
+            spl_start_[spl_id_num_ + 1] = mtrx_nd->step - (word_splstr_len - dmi_pool_[dmi_fr].splstr_len);
+            spl_id_[spl_id_num_] = dmi_pool_[dmi_fr].spl_id;
+            spl_id_num_++;
+            dmi_fr = dmi_pool_[dmi_fr].dmi_fr;
+        }
+
+        // Update the lemma segmentation information
+        lma_start_[lma_id_num_ + 1] = spl_id_num_;
+        lma_id_[lma_id_num_] = mtrx_nd->id;
+        lma_id_num_++;
+
+        mtrx_nd = mtrx_nd->from;
+    }
+
+    // Reverse the result of spelling info
+    for (size_t pos = fixed_hzs_; pos < fixed_hzs_ + (spl_id_num_ - fixed_hzs_ + 1) / 2; pos++) {
+        if (spl_id_num_ + fixed_hzs_ - pos != pos + 1) {
+            spl_start_[pos + 1] ^= spl_start_[spl_id_num_ - pos + fixed_hzs_];
+            spl_start_[spl_id_num_ - pos + fixed_hzs_] ^= spl_start_[pos + 1];
+            spl_start_[pos + 1] ^= spl_start_[spl_id_num_ - pos + fixed_hzs_];
+
+            spl_id_[pos] ^= spl_id_[spl_id_num_ + fixed_hzs_ - pos - 1];
+            spl_id_[spl_id_num_ + fixed_hzs_ - pos - 1] ^= spl_id_[pos];
+            spl_id_[pos] ^= spl_id_[spl_id_num_ + fixed_hzs_ - pos - 1];
+        }
+    }
+
+    // Reverse the result of lemma info
+    for (size_t pos = fixed_lmas_; pos < fixed_lmas_ + (lma_id_num_ - fixed_lmas_ + 1) / 2; pos++) {
+        assert(lma_id_num_ + fixed_lmas_ - pos - 1 >= pos);
+
+        if (lma_id_num_ + fixed_lmas_ - pos > pos + 1) {
+            lma_start_[pos + 1] ^= lma_start_[lma_id_num_ - pos + fixed_lmas_];
+            lma_start_[lma_id_num_ - pos + fixed_lmas_] ^= lma_start_[pos + 1];
+            lma_start_[pos + 1] ^= lma_start_[lma_id_num_ - pos + fixed_lmas_];
+
+            lma_id_[pos] ^= lma_id_[lma_id_num_ - 1 - pos + fixed_lmas_];
+            lma_id_[lma_id_num_ - 1 - pos + fixed_lmas_] ^= lma_id_[pos];
+            lma_id_[pos] ^= lma_id_[lma_id_num_ - 1 - pos + fixed_lmas_];
+        }
+    }
+
+    for (size_t pos = fixed_lmas_ + 1; pos <= lma_id_num_; pos++) {
+        if (pos < lma_id_num_)
+            lma_start_[pos] = lma_start_[pos - 1] + (lma_start_[pos] - lma_start_[pos + 1]);
+        else
+            lma_start_[pos] = lma_start_[pos - 1] + lma_start_[pos] - lma_start_[fixed_lmas_];
+    }
+
+    // Find the last fixed position
+    fixed_hzs_ = 0;
+    for (size_t pos = spl_id_num_; pos > 0; pos--) {
+        if (NULL != matrix_[spl_start_[pos]].mtrx_nd_fixed) {
+            fixed_hzs_ = pos;
+            break;
+        }
+    }
+
+    return;
+}
+
+size_t MatrixSearch::get_spl_start(const uint16 *&spl_start) {
+    get_spl_start_id();
+    spl_start = spl_start_;
+    return spl_id_num_;
+}
+
+size_t MatrixSearch::extend_dmi(DictExtPara *dep, DictMatchInfo *dmi_s) {
+    if (dmi_pool_used_ >= kDmiPoolSize) return 0;
+
+    if (dmi_c_phrase_) return extend_dmi_c(dep, dmi_s);
+
+    LpiCache &lpi_cache = LpiCache::get_instance();
+    uint16 splid = dep->splids[dep->splids_extended];
+
+    bool cached = false;
+    if (0 == dep->splids_extended) cached = lpi_cache.is_cached(splid);
+
+    // 1. If this is a half Id, get its corresponding full starting Id and
+    // number of full Id.
+    size_t ret_val = 0;
+    PoolPosType mtrx_dmi_fr = (PoolPosType)-1;  // From which dmi node
+
+    lpi_total_ = 0;
+
+    MileStoneHandle from_h[3];
+    from_h[0] = 0;
+    from_h[1] = 0;
+
+    if (0 != dep->splids_extended) {
+        from_h[0] = dmi_s->dict_handles[0];
+        from_h[1] = dmi_s->dict_handles[1];
+    }
+
+    // 2. Begin exgtending in the system dictionary
+    size_t lpi_num = 0;
+    MileStoneHandle handles[2];
+    handles[0] = handles[1] = 0;
+    if (from_h[0] > 0 || NULL == dmi_s) {
+        handles[0] = dict_trie_->extend_dict(from_h[0], dep, lpi_items_, kMaxLmaPsbItems, &lpi_num);
+    }
+    if (handles[0] > 0) lpi_total_ = lpi_num;
+
+    if (NULL == dmi_s) {  // from root
+        assert(0 != handles[0]);
+        mtrx_dmi_fr = dmi_pool_used_;
+    }
+
+    // 3. Begin extending in the user dictionary
+    if (NULL != user_dict_ && (from_h[1] > 0 || NULL == dmi_s)) {
+        handles[1] = user_dict_->extend_dict(from_h[1], dep, lpi_items_ + lpi_total_, kMaxLmaPsbItems - lpi_total_, &lpi_num);
+        if (handles[1] > 0) {
+            if (kPrintDebug0) {
+                for (size_t t = 0; t < lpi_num; t++) {
+                    printf("--Extend in user dict: uid:%d uscore:%d\n", lpi_items_[lpi_total_ + t].id, lpi_items_[lpi_total_ + t].psb);
+                }
+            }
+            lpi_total_ += lpi_num;
+        }
+    }
+
+    if (0 != handles[0] || 0 != handles[1]) {
+        if (dmi_pool_used_ >= kDmiPoolSize) return 0;
+
+        DictMatchInfo *dmi_add = dmi_pool_ + dmi_pool_used_;
+        if (NULL == dmi_s) {
+            fill_dmi(dmi_add, handles, (PoolPosType)-1, splid, 1, 1, dep->splid_end_split, dep->ext_len, spl_trie_->is_half_id(splid) ? 0 : 1);
+        } else {
+            fill_dmi(dmi_add, handles, dmi_s - dmi_pool_, splid, 1, dmi_s->dict_level + 1, dep->splid_end_split, dmi_s->splstr_len + dep->ext_len, spl_trie_->is_half_id(splid) ? 0 : dmi_s->all_full_id);
+        }
+
+        ret_val = 1;
+    }
+
+    if (!cached) {
+        if (0 == lpi_total_) return ret_val;
+
+        if (kPrintDebug0) {
+            printf("--- lpi_total_ = %d\n", lpi_total_);
+        }
+
+        myqsort(lpi_items_, lpi_total_, sizeof(LmaPsbItem), cmp_lpi_with_psb);
+        if (NULL == dmi_s && spl_trie_->is_half_id(splid)) lpi_total_ = lpi_cache.put_cache(splid, lpi_items_, lpi_total_);
+    } else {
+        assert(spl_trie_->is_half_id(splid));
+        lpi_total_ = lpi_cache.get_cache(splid, lpi_items_, kMaxLmaPsbItems);
+    }
+
+    return ret_val;
+}
+
+size_t MatrixSearch::extend_dmi_c(DictExtPara *dep, DictMatchInfo *dmi_s) {
+    lpi_total_ = 0;
+
+    uint16 pos = dep->splids_extended;
+    assert(dmi_c_phrase_);
+    if (pos >= c_phrase_.length) return 0;
+
+    uint16 splid = dep->splids[pos];
+    if (splid == c_phrase_.spl_ids[pos]) {
+        DictMatchInfo *dmi_add = dmi_pool_ + dmi_pool_used_;
+        MileStoneHandle handles[2];  // Actually never used.
+        if (NULL == dmi_s)
+            fill_dmi(dmi_add, handles, (PoolPosType)-1, splid, 1, 1, dep->splid_end_split, dep->ext_len, spl_trie_->is_half_id(splid) ? 0 : 1);
+        else
+            fill_dmi(dmi_add, handles, dmi_s - dmi_pool_, splid, 1, dmi_s->dict_level + 1, dep->splid_end_split, dmi_s->splstr_len + dep->ext_len, spl_trie_->is_half_id(splid) ? 0 : dmi_s->all_full_id);
+
+        if (pos == c_phrase_.length - 1) {
+            lpi_items_[0].id = kLemmaIdComposing;
+            lpi_items_[0].psb = 0;  // 0 is bigger than normal lemma score.
+            lpi_total_ = 1;
+        }
+        return 1;
+    }
+    return 0;
+}
+
+size_t MatrixSearch::extend_mtrx_nd(MatrixNode *mtrx_nd, LmaPsbItem lpi_items[], size_t lpi_num, PoolPosType dmi_fr, size_t res_row) {
+    assert(NULL != mtrx_nd);
+    matrix_[res_row].mtrx_nd_fixed = NULL;
+
+    if (mtrx_nd_pool_used_ >= kMtrxNdPoolSize - kMaxNodeARow) return 0;
+
+    if (0 == mtrx_nd->step) {
+        // Because the list is sorted, if the source step is 0, it is only
+        // necessary to pick up the first kMaxNodeARow items.
+        if (lpi_num > kMaxNodeARow) lpi_num = kMaxNodeARow;
+    }
+
+    MatrixNode *mtrx_nd_res_min = mtrx_nd_pool_ + matrix_[res_row].mtrx_nd_pos;
+    for (size_t pos = 0; pos < lpi_num; pos++) {
+        float score = mtrx_nd->score + lpi_items[pos].psb;
+        if (pos > 0 && score - PRUMING_SCORE > mtrx_nd_res_min->score) break;
+
+        // Try to add a new node
+        size_t mtrx_nd_num = matrix_[res_row].mtrx_nd_num;
+        MatrixNode *mtrx_nd_res = mtrx_nd_res_min + mtrx_nd_num;
+        bool replace = false;
+        // Find its position
+        while (mtrx_nd_res > mtrx_nd_res_min && score < (mtrx_nd_res - 1)->score) {
+            if (static_cast<size_t>(mtrx_nd_res - mtrx_nd_res_min) < kMaxNodeARow) *mtrx_nd_res = *(mtrx_nd_res - 1);
+            mtrx_nd_res--;
+            replace = true;
+        }
+        if (replace || (mtrx_nd_num < kMaxNodeARow && matrix_[res_row].mtrx_nd_pos + mtrx_nd_num < kMtrxNdPoolSize)) {
+            mtrx_nd_res->id = lpi_items[pos].id;
+            mtrx_nd_res->score = score;
+            mtrx_nd_res->from = mtrx_nd;
+            mtrx_nd_res->dmi_fr = dmi_fr;
+            mtrx_nd_res->step = res_row;
+            if (matrix_[res_row].mtrx_nd_num < kMaxNodeARow) matrix_[res_row].mtrx_nd_num++;
+        }
+    }
+    return matrix_[res_row].mtrx_nd_num;
+}
+
+PoolPosType MatrixSearch::match_dmi(size_t step_to, uint16 spl_ids[], uint16 spl_id_num) {
+    if (pys_decoded_len_ < step_to || 0 == matrix_[step_to].dmi_num) {
+        return static_cast<PoolPosType>(-1);
+    }
+
+    for (PoolPosType dmi_pos = 0; dmi_pos < matrix_[step_to].dmi_num; dmi_pos++) {
+        DictMatchInfo *dmi = dmi_pool_ + matrix_[step_to].dmi_pos + dmi_pos;
+
+        if (dmi->dict_level != spl_id_num) continue;
+
+        bool matched = true;
+        for (uint16 spl_pos = 0; spl_pos < spl_id_num; spl_pos++) {
+            if (spl_ids[spl_id_num - spl_pos - 1] != dmi->spl_id) {
+                matched = false;
+                break;
+            }
+
+            dmi = dmi_pool_ + dmi->dmi_fr;
+        }
+        if (matched) {
+            return matrix_[step_to].dmi_pos + dmi_pos;
+        }
+    }
+
+    return static_cast<PoolPosType>(-1);
+}
+
+char16 *MatrixSearch::get_candidate0(char16 *cand_str, size_t max_len, uint16 *retstr_len, bool only_unfixed) {
+    if (pys_decoded_len_ == 0 || matrix_[pys_decoded_len_].mtrx_nd_num == 0) return NULL;
+
+    LemmaIdType idxs[kMaxRowNum];
+    size_t id_num = 0;
+
+    MatrixNode *mtrx_nd = mtrx_nd_pool_ + matrix_[pys_decoded_len_].mtrx_nd_pos;
+
+    if (kPrintDebug0) {
+        printf("--- sentence score: %f\n", mtrx_nd->score);
+    }
+
+    if (kPrintDebug1) {
+        printf("==============Sentence DMI (reverse order) begin===========>>\n");
+    }
+
+    while (mtrx_nd != NULL) {
+        idxs[id_num] = mtrx_nd->id;
+        id_num++;
+
+        if (kPrintDebug1) {
+            printf("---MatrixNode [step: %d, lma_idx: %d, total score:%.5f]\n", mtrx_nd->step, mtrx_nd->id, mtrx_nd->score);
+            debug_print_dmi(mtrx_nd->dmi_fr, 1);
+        }
+
+        mtrx_nd = mtrx_nd->from;
+    }
+
+    if (kPrintDebug1) {
+        printf("<<==============Sentence DMI (reverse order) end=============\n");
+    }
+
+    size_t ret_pos = 0;
+    do {
+        id_num--;
+        if (0 == idxs[id_num]) continue;
+
+        char16 str[kMaxLemmaSize + 1];
+        uint16 str_len = get_lemma_str(idxs[id_num], str, kMaxLemmaSize + 1);
+        if (str_len > 0 && ((!only_unfixed && max_len - ret_pos > str_len) || (only_unfixed && max_len - ret_pos + fixed_hzs_ > str_len))) {
+            if (!only_unfixed)
+                utf16_strncpy(cand_str + ret_pos, str, str_len);
+            else if (ret_pos >= fixed_hzs_)
+                utf16_strncpy(cand_str + ret_pos - fixed_hzs_, str, str_len);
+
+            ret_pos += str_len;
+        } else {
+            return NULL;
+        }
+    } while (id_num != 0);
+
+    if (!only_unfixed) {
+        if (NULL != retstr_len) *retstr_len = ret_pos;
+        cand_str[ret_pos] = (char16)'\0';
+    } else {
+        if (NULL != retstr_len) *retstr_len = ret_pos - fixed_hzs_;
+        cand_str[ret_pos - fixed_hzs_] = (char16)'\0';
+    }
+    return cand_str;
+}
+
+size_t MatrixSearch::get_lpis(const uint16 *splid_str, size_t splid_str_len, LmaPsbItem *lma_buf, size_t max_lma_buf, const char16 *pfullsent, bool sort_by_psb) {
+    if (splid_str_len > kMaxLemmaSize) return 0;
+
+    size_t num1 = dict_trie_->get_lpis(splid_str, splid_str_len, lma_buf, max_lma_buf);
+    size_t num2 = 0;
+    if (NULL != user_dict_) {
+        num2 = user_dict_->get_lpis(splid_str, splid_str_len, lma_buf + num1, max_lma_buf - num1);
+    }
+
+    size_t num = num1 + num2;
+
+    if (0 == num) return 0;
+
+    // Remove repeated items.
+    if (splid_str_len > 1) {
+        LmaPsbStrItem *lpsis = reinterpret_cast<LmaPsbStrItem *>(lma_buf + num);
+        size_t lpsi_num = (max_lma_buf - num) * sizeof(LmaPsbItem) / sizeof(LmaPsbStrItem);
+        assert(lpsi_num > num);
+        if (num > lpsi_num) num = lpsi_num;
+        lpsi_num = num;
+
+        for (size_t pos = 0; pos < lpsi_num; pos++) {
+            lpsis[pos].lpi = lma_buf[pos];
+            get_lemma_str(lma_buf[pos].id, lpsis[pos].str, kMaxLemmaSize + 1);
+        }
+
+        myqsort(lpsis, lpsi_num, sizeof(LmaPsbStrItem), cmp_lpsi_with_str);
+
+        size_t remain_num = 0;
+        for (size_t pos = 0; pos < lpsi_num; pos++) {
+            if (pos > 0 && utf16_strcmp(lpsis[pos].str, lpsis[pos - 1].str) == 0) {
+                if (lpsis[pos].lpi.psb < lpsis[pos - 1].lpi.psb) {
+                    assert(remain_num > 0);
+                    lma_buf[remain_num - 1] = lpsis[pos].lpi;
+                }
+                continue;
+            }
+            if (NULL != pfullsent && utf16_strcmp(lpsis[pos].str, pfullsent) == 0) continue;
+
+            lma_buf[remain_num] = lpsis[pos].lpi;
+            remain_num++;
+        }
+
+        // Update the result number
+        num = remain_num;
+    } else {
+        // For single character, some characters have more than one spelling, for
+        // example, "de" and "di" are all valid for a Chinese character, so when
+        // the user input  "d", repeated items are generated.
+        // For single character lemmas, Hanzis will be gotten
+        for (size_t pos = 0; pos < num; pos++) {
+            char16 hanzis[2];
+            get_lemma_str(lma_buf[pos].id, hanzis, 2);
+            lma_buf[pos].hanzi = hanzis[0];
+        }
+
+        myqsort(lma_buf, num, sizeof(LmaPsbItem), cmp_lpi_with_hanzi);
+
+        size_t remain_num = 0;
+        for (size_t pos = 0; pos < num; pos++) {
+            if (pos > 0 && lma_buf[pos].hanzi == lma_buf[pos - 1].hanzi) {
+                if (NULL != pfullsent && static_cast<char16>(0) == pfullsent[1] && lma_buf[pos].hanzi == pfullsent[0]) continue;
+
+                if (lma_buf[pos].psb < lma_buf[pos - 1].psb) {
+                    assert(remain_num > 0);
+                    assert(lma_buf[remain_num - 1].hanzi == lma_buf[pos].hanzi);
+                    lma_buf[remain_num - 1] = lma_buf[pos];
+                }
+                continue;
+            }
+            if (NULL != pfullsent && static_cast<char16>(0) == pfullsent[1] && lma_buf[pos].hanzi == pfullsent[0]) continue;
+
+            lma_buf[remain_num] = lma_buf[pos];
+            remain_num++;
+        }
+
+        num = remain_num;
+    }
+
+    if (sort_by_psb) {
+        myqsort(lma_buf, num, sizeof(LmaPsbItem), cmp_lpi_with_psb);
+    }
+    return num;
+}
+
+uint16 MatrixSearch::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max) {
+    uint16 str_len = 0;
+
+    if (is_system_lemma(id_lemma)) {
+        str_len = dict_trie_->get_lemma_str(id_lemma, str_buf, str_max);
+    } else if (is_user_lemma(id_lemma)) {
+        if (NULL != user_dict_) {
+            str_len = user_dict_->get_lemma_str(id_lemma, str_buf, str_max);
+        } else {
+            str_len = 0;
+            str_buf[0] = static_cast<char16>('\0');
+        }
+    } else if (is_composing_lemma(id_lemma)) {
+        if (str_max <= 1) return 0;
+        str_len = c_phrase_.sublma_start[c_phrase_.sublma_num];
+        if (str_len > str_max - 1) str_len = str_max - 1;
+        utf16_strncpy(str_buf, c_phrase_.chn_str, str_len);
+        str_buf[str_len] = (char16)'\0';
+        return str_len;
+    }
+
+    return str_len;
+}
+
+uint16 MatrixSearch::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid) {
+    uint16 splid_num = 0;
+
+    if (arg_valid) {
+        for (splid_num = 0; splid_num < splids_max; splid_num++) {
+            if (spl_trie_->is_half_id(splids[splid_num])) break;
+        }
+        if (splid_num == splids_max) return splid_num;
+    }
+
+    if (is_system_lemma(id_lemma)) {
+        splid_num = dict_trie_->get_lemma_splids(id_lemma, splids, splids_max, arg_valid);
+    } else if (is_user_lemma(id_lemma)) {
+        if (NULL != user_dict_) {
+            splid_num = user_dict_->get_lemma_splids(id_lemma, splids, splids_max, arg_valid);
+        } else {
+            splid_num = 0;
+        }
+    } else if (is_composing_lemma(id_lemma)) {
+        if (c_phrase_.length > splids_max) {
+            return 0;
+        }
+        for (uint16 pos = 0; pos < c_phrase_.length; pos++) {
+            splids[pos] = c_phrase_.spl_ids[pos];
+            if (spl_trie_->is_half_id(splids[pos])) {
+                return 0;
+            }
+        }
+    }
+    return splid_num;
+}
+
+size_t MatrixSearch::inner_predict(const char16 *fixed_buf, uint16 fixed_len, char16 predict_buf[][kMaxPredictSize + 1], size_t buf_len) {
+    size_t res_total = 0;
+    memset(npre_items_, 0, sizeof(NPredictItem) * npre_items_len_);
+    // In order to shorten the comments, j-character candidates predicted by
+    // i-character prefix are called P(i,j). All candiates predicted by
+    // i-character prefix are called P(i,*)
+    // Step 1. Get P(kMaxPredictSize, *) and sort them, here
+    // P(kMaxPredictSize, *) == P(kMaxPredictSize, 1)
+    for (size_t len = fixed_len; len > 0; len--) {
+        // How many blank items are available
+        size_t this_max = npre_items_len_ - res_total;
+        size_t res_this;
+        // If the history is longer than 1, and we can not get prediction from
+        // lemmas longer than 2, in this case, we will add lemmas with
+        // highest scores as the prediction result.
+        if (fixed_len > 1 && 1 == len && 0 == res_total) {
+            // Try to find if recent n (n>1) characters can be a valid lemma in system
+            // dictionary.
+            bool nearest_n_word = false;
+            for (size_t nlen = 2; nlen <= fixed_len; nlen++) {
+                if (dict_trie_->get_lemma_id(fixed_buf + fixed_len - nlen, nlen) > 0) {
+                    nearest_n_word = true;
+                    break;
+                }
+            }
+            res_this = dict_trie_->predict_top_lmas(nearest_n_word ? len : 0, npre_items_ + res_total, this_max, res_total);
+            res_total += res_this;
+        }
+
+        // How many blank items are available
+        this_max = npre_items_len_ - res_total;
+        res_this = 0;
+        if (!kOnlyUserDictPredict) {
+            res_this = dict_trie_->predict(fixed_buf + fixed_len - len, len, npre_items_ + res_total, this_max, res_total);
+        }
+
+        if (NULL != user_dict_) {
+            res_this = res_this + user_dict_->predict(fixed_buf + fixed_len - len, len, npre_items_ + res_total + res_this, this_max - res_this, res_total + res_this);
+        }
+
+        if (kPredictLimitGt1) {
+            myqsort(npre_items_ + res_total, res_this, sizeof(NPredictItem), cmp_npre_by_score);
+
+            if (len > 3) {
+                if (res_this > kMaxPredictNumByGt3) res_this = kMaxPredictNumByGt3;
+            } else if (3 == len) {
+                if (res_this > kMaxPredictNumBy3) res_this = kMaxPredictNumBy3;
+            } else if (2 == len) {
+                if (res_this > kMaxPredictNumBy2) res_this = kMaxPredictNumBy2;
+            }
+        }
+
+        res_total += res_this;
+    }
+
+    res_total = remove_duplicate_npre(npre_items_, res_total);
+
+    if (kPreferLongHistoryPredict) {
+        myqsort(npre_items_, res_total, sizeof(NPredictItem), cmp_npre_by_hislen_score);
+    } else {
+        myqsort(npre_items_, res_total, sizeof(NPredictItem), cmp_npre_by_score);
+    }
+
+    if (buf_len < res_total) {
+        res_total = buf_len;
+    }
+
+    if (kPrintDebug2) {
+        printf("/////////////////Predicted Items Begin////////////////////>>\n");
+        for (size_t i = 0; i < res_total; i++) {
+            printf("---");
+            for (size_t j = 0; j < kMaxPredictSize; j++) {
+                printf("%d  ", npre_items_[i].pre_hzs[j]);
+            }
+            printf("\n");
+        }
+        printf("<<///////////////Predicted Items End////////////////////////\n");
+    }
+
+    for (size_t i = 0; i < res_total; i++) {
+        utf16_strncpy(predict_buf[i], npre_items_[i].pre_hzs, kMaxPredictSize);
+        predict_buf[i][kMaxPredictSize] = '\0';
+    }
+
+    return res_total;
+}
+
+size_t MatrixSearch::get_predicts(const char16 fixed_buf[], char16 predict_buf[][kMaxPredictSize + 1], size_t buf_len) {
+    size_t fixed_len = utf16_strlen(fixed_buf);
+    if (0 == fixed_len || fixed_len > kMaxPredictSize || 0 == buf_len) return 0;
+
+    return inner_predict(fixed_buf, fixed_len, predict_buf, buf_len);
+}
+
+}  // namespace ime_pinyin
diff --git a/mystdlib.cpp b/src/share/mystdlib.cpp
similarity index 73%
rename from mystdlib.cpp
rename to src/share/mystdlib.cpp
index 93bbcc9..0d5f4c0 100644
--- a/mystdlib.cpp
+++ b/src/share/mystdlib.cpp
@@ -21,14 +21,7 @@ namespace ime_pinyin {
 // For debug purpose. You can add a fixed version of qsort and bsearch functions
 // here so that the output will be totally the same under different platforms.
 
-void myqsort(void *p, size_t n, size_t es,
-             int (*cmp)(const void *, const void *)) {
-  qsort(p,n, es, cmp);
-}
+void myqsort(void *p, size_t n, size_t es, int (*cmp)(const void *, const void *)) { qsort(p, n, es, cmp); }
 
-void *mybsearch(const void *k, const void *b,
-                size_t n, size_t es,
-                int (*cmp)(const void *, const void *)) {
-  return bsearch(k, b, n, es, cmp);
-}
+void *mybsearch(const void *k, const void *b, size_t n, size_t es, int (*cmp)(const void *, const void *)) { return bsearch(k, b, n, es, cmp); }
 }  // namespace ime_pinyin
diff --git a/src/share/ngram.cpp b/src/share/ngram.cpp
new file mode 100644
index 0000000..df462b8
--- /dev/null
+++ b/src/share/ngram.cpp
@@ -0,0 +1,293 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include "../include/mystdlib.h"
+#include "../include/ngram.h"
+
+namespace ime_pinyin {
+
+#define ADD_COUNT 0.3
+
+int comp_double(const void *p1, const void *p2) {
+    if (*static_cast<const double *>(p1) < *static_cast<const double *>(p2)) return -1;
+    if (*static_cast<const double *>(p1) > *static_cast<const double *>(p2)) return 1;
+    return 0;
+}
+
+inline double distance(double freq, double code) {
+    // return fabs(freq - code);
+    return freq * fabs(log(freq) - log(code));
+}
+
+// Find the index of the code value which is nearest to the given freq
+int qsearch_nearest(double code_book[], double freq, int start, int end) {
+    if (start == end) return start;
+
+    if (start + 1 == end) {
+        if (distance(freq, code_book[end]) > distance(freq, code_book[start])) return start;
+        return end;
+    }
+
+    int mid = (start + end) / 2;
+
+    if (code_book[mid] > freq)
+        return qsearch_nearest(code_book, freq, start, mid);
+    else
+        return qsearch_nearest(code_book, freq, mid, end);
+}
+
+size_t update_code_idx(double freqs[], size_t num, double code_book[], CODEBOOK_TYPE *code_idx) {
+    size_t changed = 0;
+    for (size_t pos = 0; pos < num; pos++) {
+        CODEBOOK_TYPE idx;
+        idx = qsearch_nearest(code_book, freqs[pos], 0, kCodeBookSize - 1);
+        if (idx != code_idx[pos]) changed++;
+        code_idx[pos] = idx;
+    }
+    return changed;
+}
+
+double recalculate_kernel(double freqs[], size_t num, double code_book[], CODEBOOK_TYPE *code_idx) {
+    double ret = 0;
+
+    size_t *item_num = new size_t[kCodeBookSize];
+    assert(item_num);
+    memset(item_num, 0, sizeof(size_t) * kCodeBookSize);
+
+    double *cb_new = new double[kCodeBookSize];
+    assert(cb_new);
+    memset(cb_new, 0, sizeof(double) * kCodeBookSize);
+
+    for (size_t pos = 0; pos < num; pos++) {
+        ret += distance(freqs[pos], code_book[code_idx[pos]]);
+
+        cb_new[code_idx[pos]] += freqs[pos];
+        item_num[code_idx[pos]] += 1;
+    }
+
+    for (size_t code = 0; code < kCodeBookSize; code++) {
+        assert(item_num[code] > 0);
+        code_book[code] = cb_new[code] / item_num[code];
+    }
+
+    delete[] item_num;
+    delete[] cb_new;
+
+    return ret;
+}
+
+void iterate_codes(double freqs[], size_t num, double code_book[], CODEBOOK_TYPE *code_idx) {
+    size_t iter_num = 0;
+    double delta_last = 0;
+    do {
+        size_t changed = update_code_idx(freqs, num, code_book, code_idx);
+
+        double delta = recalculate_kernel(freqs, num, code_book, code_idx);
+
+        if (kPrintDebug0) {
+            printf("---Unigram codebook iteration: %d : %d, %.9f\n", iter_num, changed, delta);
+        }
+        iter_num++;
+
+        if (iter_num > 1 && (delta == 0 || fabs(delta_last - delta) / fabs(delta) < 0.000000001)) break;
+        delta_last = delta;
+    } while (true);
+}
+
+NGram *NGram::instance_ = NULL;
+
+NGram::NGram() {
+    initialized_ = false;
+    idx_num_ = 0;
+    lma_freq_idx_ = NULL;
+    sys_score_compensation_ = 0;
+
+#ifdef ___BUILD_MODEL___
+    freq_codes_df_ = NULL;
+#endif
+    freq_codes_ = NULL;
+}
+
+NGram::~NGram() {
+    if (NULL != lma_freq_idx_) free(lma_freq_idx_);
+
+#ifdef ___BUILD_MODEL___
+    if (NULL != freq_codes_df_) free(freq_codes_df_);
+#endif
+
+    if (NULL != freq_codes_) free(freq_codes_);
+}
+
+NGram &NGram::get_instance() {
+    if (NULL == instance_) instance_ = new NGram();
+    return *instance_;
+}
+
+bool NGram::save_ngram(FILE *fp) {
+    if (!initialized_ || NULL == fp) return false;
+
+    if (0 == idx_num_ || NULL == freq_codes_ || NULL == lma_freq_idx_) return false;
+
+    if (fwrite(&idx_num_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fwrite(freq_codes_, sizeof(LmaScoreType), kCodeBookSize, fp) != kCodeBookSize) return false;
+
+    if (fwrite(lma_freq_idx_, sizeof(CODEBOOK_TYPE), idx_num_, fp) != idx_num_) return false;
+
+    return true;
+}
+
+bool NGram::load_ngram(FILE *fp) {
+    if (NULL == fp) return false;
+
+    initialized_ = false;
+
+    if (fread(&idx_num_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (NULL != lma_freq_idx_) free(lma_freq_idx_);
+
+    if (NULL != freq_codes_) free(freq_codes_);
+
+    lma_freq_idx_ = static_cast<CODEBOOK_TYPE *>(malloc(idx_num_ * sizeof(CODEBOOK_TYPE)));
+    freq_codes_ = static_cast<LmaScoreType *>(malloc(kCodeBookSize * sizeof(LmaScoreType)));
+
+    if (NULL == lma_freq_idx_ || NULL == freq_codes_) return false;
+
+    if (fread(freq_codes_, sizeof(LmaScoreType), kCodeBookSize, fp) != kCodeBookSize) return false;
+
+    if (fread(lma_freq_idx_, sizeof(CODEBOOK_TYPE), idx_num_, fp) != idx_num_) return false;
+
+    initialized_ = true;
+
+    total_freq_none_sys_ = 0;
+    return true;
+}
+
+void NGram::set_total_freq_none_sys(size_t freq_none_sys) {
+    total_freq_none_sys_ = freq_none_sys;
+    if (0 == total_freq_none_sys_) {
+        sys_score_compensation_ = 0;
+    } else {
+        double factor = static_cast<double>(kSysDictTotalFreq) / (kSysDictTotalFreq + total_freq_none_sys_);
+        sys_score_compensation_ = static_cast<float>(log(factor) * kLogValueAmplifier);
+    }
+}
+
+// The caller makes sure this oject is initialized.
+float NGram::get_uni_psb(LemmaIdType lma_id) { return static_cast<float>(freq_codes_[lma_freq_idx_[lma_id]]) + sys_score_compensation_; }
+
+float NGram::convert_psb_to_score(double psb) {
+    float score = static_cast<float>(log(psb) * static_cast<double>(kLogValueAmplifier));
+    if (score > static_cast<float>(kMaxScore)) {
+        score = static_cast<float>(kMaxScore);
+    }
+    return score;
+}
+
+#ifdef ___BUILD_MODEL___
+bool NGram::build_unigram(LemmaEntry *lemma_arr, size_t lemma_num, LemmaIdType next_idx_unused) {
+    if (NULL == lemma_arr || 0 == lemma_num || next_idx_unused <= 1) return false;
+
+    double total_freq = 0;
+    double *freqs = new double[next_idx_unused];
+    if (NULL == freqs) return false;
+
+    freqs[0] = ADD_COUNT;
+    total_freq += freqs[0];
+    LemmaIdType idx_now = 0;
+    for (size_t pos = 0; pos < lemma_num; pos++) {
+        if (lemma_arr[pos].idx_by_hz == idx_now) continue;
+        idx_now++;
+
+        assert(lemma_arr[pos].idx_by_hz == idx_now);
+
+        freqs[idx_now] = lemma_arr[pos].freq;
+        if (freqs[idx_now] <= 0) freqs[idx_now] = 0.3;
+
+        total_freq += freqs[idx_now];
+    }
+
+    double max_freq = 0;
+    idx_num_ = idx_now + 1;
+    assert(idx_now + 1 == next_idx_unused);
+
+    for (size_t pos = 0; pos < idx_num_; pos++) {
+        freqs[pos] = freqs[pos] / total_freq;
+        assert(freqs[pos] > 0);
+        if (freqs[pos] > max_freq) max_freq = freqs[pos];
+    }
+
+    // calculate the code book
+    if (NULL == freq_codes_df_) freq_codes_df_ = new double[kCodeBookSize];
+    assert(freq_codes_df_);
+    memset(freq_codes_df_, 0, sizeof(double) * kCodeBookSize);
+
+    if (NULL == freq_codes_) freq_codes_ = new LmaScoreType[kCodeBookSize];
+    assert(freq_codes_);
+    memset(freq_codes_, 0, sizeof(LmaScoreType) * kCodeBookSize);
+
+    size_t freq_pos = 0;
+    for (size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++) {
+        bool found = true;
+
+        while (found) {
+            found = false;
+            double cand = freqs[freq_pos];
+            for (size_t i = 0; i < code_pos; i++)
+                if (freq_codes_df_[i] == cand) {
+                    found = true;
+                    break;
+                }
+            if (found) freq_pos++;
+        }
+
+        freq_codes_df_[code_pos] = freqs[freq_pos];
+        freq_pos++;
+    }
+
+    myqsort(freq_codes_df_, kCodeBookSize, sizeof(double), comp_double);
+
+    if (NULL == lma_freq_idx_) lma_freq_idx_ = new CODEBOOK_TYPE[idx_num_];
+    assert(lma_freq_idx_);
+
+    iterate_codes(freqs, idx_num_, freq_codes_df_, lma_freq_idx_);
+
+    delete[] freqs;
+
+    if (kPrintDebug0) {
+        printf("\n------Language Model Unigram Codebook------\n");
+    }
+
+    for (size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++) {
+        double log_score = log(freq_codes_df_[code_pos]);
+        float final_score = convert_psb_to_score(freq_codes_df_[code_pos]);
+        if (kPrintDebug0) {
+            printf("code:%d, probability:%.9f, log score:%.3f, final score: %.3f\n", code_pos, freq_codes_df_[code_pos], log_score, final_score);
+        }
+        freq_codes_[code_pos] = static_cast<LmaScoreType>(final_score);
+    }
+
+    initialized_ = true;
+    return true;
+}
+#endif
+
+}  // namespace ime_pinyin
diff --git a/src/share/pinyinime.cpp b/src/share/pinyinime.cpp
new file mode 100644
index 0000000..78ab5ea
--- /dev/null
+++ b/src/share/pinyinime.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include "../include/pinyinime.h"
+#include "../include/dicttrie.h"
+#include "../include/matrixsearch.h"
+#include "../include/spellingtrie.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+using namespace ime_pinyin;
+
+// The maximum number of the prediction items.
+static const size_t kMaxPredictNum = 500;
+
+// Used to search Pinyin string and give the best candidate.
+MatrixSearch *matrix_search = NULL;
+
+char16 predict_buf[kMaxPredictNum][kMaxPredictSize + 1];
+
+bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict) {
+    if (NULL != matrix_search) delete matrix_search;
+
+    matrix_search = new MatrixSearch();
+    if (NULL == matrix_search) {
+        return false;
+    }
+
+    return matrix_search->init(fn_sys_dict, fn_usr_dict);
+}
+
+bool im_open_decoder_fd(int sys_fd, long start_offset, long length, const char *fn_usr_dict) {
+    if (NULL != matrix_search) delete matrix_search;
+
+    matrix_search = new MatrixSearch();
+    if (NULL == matrix_search) return false;
+
+    return matrix_search->init_fd(sys_fd, start_offset, length, fn_usr_dict);
+}
+
+void im_close_decoder() {
+    if (NULL != matrix_search) {
+        matrix_search->close();
+        delete matrix_search;
+    }
+    matrix_search = NULL;
+}
+
+void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len) {
+    if (NULL != matrix_search) {
+        matrix_search->set_max_lens(max_sps_len, max_hzs_len);
+    }
+}
+
+void im_flush_cache() {
+    if (NULL != matrix_search) matrix_search->flush_cache();
+}
+
+// To be updated.
+size_t im_search(const char *pybuf, size_t pylen) {
+    if (NULL == matrix_search) return 0;
+
+    matrix_search->search(pybuf, pylen);
+    return matrix_search->get_candidate_num();
+}
+
+size_t im_delsearch(size_t pos, bool is_pos_in_splid, bool clear_fixed_this_step) {
+    if (NULL == matrix_search) return 0;
+    matrix_search->delsearch(pos, is_pos_in_splid, clear_fixed_this_step);
+    return matrix_search->get_candidate_num();
+}
+
+void im_reset_search() {
+    if (NULL == matrix_search) return;
+
+    matrix_search->reset_search();
+}
+
+// To be removed
+size_t im_add_letter(char ch) { return 0; }
+
+const char *im_get_sps_str(size_t *decoded_len) {
+    if (NULL == matrix_search) return NULL;
+
+    return matrix_search->get_pystr(decoded_len);
+}
+
+char16 *im_get_candidate(size_t cand_id, char16 *cand_str, size_t max_len) {
+    if (NULL == matrix_search) return NULL;
+
+    return matrix_search->get_candidate(cand_id, cand_str, max_len);
+}
+
+size_t im_get_spl_start_pos(const uint16 *&spl_start) {
+    if (NULL == matrix_search) return 0;
+
+    return matrix_search->get_spl_start(spl_start);
+}
+
+size_t im_choose(size_t choice_id) {
+    if (NULL == matrix_search) return 0;
+
+    return matrix_search->choose(choice_id);
+}
+
+size_t im_cancel_last_choice() {
+    if (NULL == matrix_search) return 0;
+
+    return matrix_search->cancel_last_choice();
+}
+
+size_t im_get_fixed_len() {
+    if (NULL == matrix_search) return 0;
+
+    return matrix_search->get_fixedlen();
+}
+
+// To be removed
+bool im_cancel_input() { return true; }
+
+size_t im_get_predicts(const char16 *his_buf, char16 (*&pre_buf)[kMaxPredictSize + 1]) {
+    if (NULL == his_buf) return 0;
+
+    size_t fixed_len = utf16_strlen(his_buf);
+    const char16 *fixed_ptr = his_buf;
+    if (fixed_len > kMaxPredictSize) {
+        fixed_ptr += fixed_len - kMaxPredictSize;
+        fixed_len = kMaxPredictSize;
+    }
+
+    pre_buf = predict_buf;
+    return matrix_search->get_predicts(his_buf, pre_buf, kMaxPredictNum);
+}
+
+void im_enable_shm_as_szm(bool enable) {
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    spl_trie.szm_enable_shm(enable);
+}
+
+void im_enable_ym_as_szm(bool enable) {
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    spl_trie.szm_enable_ym(enable);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/share/searchutility.cpp b/src/share/searchutility.cpp
new file mode 100644
index 0000000..74f6f41
--- /dev/null
+++ b/src/share/searchutility.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include "../include/mystdlib.h"
+#include "../include/searchutility.h"
+
+namespace ime_pinyin {
+
+bool is_system_lemma(LemmaIdType lma_id) { return (0 < lma_id && lma_id <= kSysDictIdEnd); }
+
+bool is_user_lemma(LemmaIdType lma_id) { return (kUserDictIdStart <= lma_id && lma_id <= kUserDictIdEnd); }
+
+bool is_composing_lemma(LemmaIdType lma_id) { return (kLemmaIdComposing == lma_id); }
+
+int cmp_lpi_with_psb(const void *p1, const void *p2) {
+    if ((static_cast<const LmaPsbItem *>(p1))->psb > (static_cast<const LmaPsbItem *>(p2))->psb) return 1;
+    if ((static_cast<const LmaPsbItem *>(p1))->psb < (static_cast<const LmaPsbItem *>(p2))->psb) return -1;
+    return 0;
+}
+
+int cmp_lpi_with_unified_psb(const void *p1, const void *p2) {
+    const LmaPsbItem *item1 = static_cast<const LmaPsbItem *>(p1);
+    const LmaPsbItem *item2 = static_cast<const LmaPsbItem *>(p2);
+
+    // The real unified psb is psb1 / lma_len1 and psb2 * lma_len2
+    // But we use psb1 * lma_len2 and psb2 * lma_len1 to get better
+    // precision.
+    size_t up1 = item1->psb * (item2->lma_len);
+    size_t up2 = item2->psb * (item1->lma_len);
+    if (up1 < up2) {
+        return -1;
+    }
+    if (up1 > up2) {
+        return 1;
+    }
+    return 0;
+}
+
+int cmp_lpi_with_id(const void *p1, const void *p2) {
+    if ((static_cast<const LmaPsbItem *>(p1))->id < (static_cast<const LmaPsbItem *>(p2))->id) return -1;
+    if ((static_cast<const LmaPsbItem *>(p1))->id > (static_cast<const LmaPsbItem *>(p2))->id) return 1;
+    return 0;
+}
+
+int cmp_lpi_with_hanzi(const void *p1, const void *p2) {
+    if ((static_cast<const LmaPsbItem *>(p1))->hanzi < (static_cast<const LmaPsbItem *>(p2))->hanzi) return -1;
+    if ((static_cast<const LmaPsbItem *>(p1))->hanzi > (static_cast<const LmaPsbItem *>(p2))->hanzi) return 1;
+
+    return 0;
+}
+
+int cmp_lpsi_with_str(const void *p1, const void *p2) { return utf16_strcmp((static_cast<const LmaPsbStrItem *>(p1))->str, (static_cast<const LmaPsbStrItem *>(p2))->str); }
+
+int cmp_hanzis_1(const void *p1, const void *p2) {
+    if (*static_cast<const char16 *>(p1) < *static_cast<const char16 *>(p2)) return -1;
+
+    if (*static_cast<const char16 *>(p1) > *static_cast<const char16 *>(p2)) return 1;
+    return 0;
+}
+
+int cmp_hanzis_2(const void *p1, const void *p2) { return utf16_strncmp(static_cast<const char16 *>(p1), static_cast<const char16 *>(p2), 2); }
+
+int cmp_hanzis_3(const void *p1, const void *p2) { return utf16_strncmp(static_cast<const char16 *>(p1), static_cast<const char16 *>(p2), 3); }
+
+int cmp_hanzis_4(const void *p1, const void *p2) { return utf16_strncmp(static_cast<const char16 *>(p1), static_cast<const char16 *>(p2), 4); }
+
+int cmp_hanzis_5(const void *p1, const void *p2) { return utf16_strncmp(static_cast<const char16 *>(p1), static_cast<const char16 *>(p2), 5); }
+
+int cmp_hanzis_6(const void *p1, const void *p2) { return utf16_strncmp(static_cast<const char16 *>(p1), static_cast<const char16 *>(p2), 6); }
+
+int cmp_hanzis_7(const void *p1, const void *p2) { return utf16_strncmp(static_cast<const char16 *>(p1), static_cast<const char16 *>(p2), 7); }
+
+int cmp_hanzis_8(const void *p1, const void *p2) { return utf16_strncmp(static_cast<const char16 *>(p1), static_cast<const char16 *>(p2), 8); }
+
+int cmp_npre_by_score(const void *p1, const void *p2) {
+    if ((static_cast<const NPredictItem *>(p1))->psb > (static_cast<const NPredictItem *>(p2))->psb) return 1;
+
+    if ((static_cast<const NPredictItem *>(p1))->psb < (static_cast<const NPredictItem *>(p2))->psb) return -1;
+
+    return 0;
+}
+
+int cmp_npre_by_hislen_score(const void *p1, const void *p2) {
+    if ((static_cast<const NPredictItem *>(p1))->his_len < (static_cast<const NPredictItem *>(p2))->his_len) return 1;
+
+    if ((static_cast<const NPredictItem *>(p1))->his_len > (static_cast<const NPredictItem *>(p2))->his_len) return -1;
+
+    if ((static_cast<const NPredictItem *>(p1))->psb > (static_cast<const NPredictItem *>(p2))->psb) return 1;
+
+    if ((static_cast<const NPredictItem *>(p1))->psb < (static_cast<const NPredictItem *>(p2))->psb) return -1;
+
+    return 0;
+}
+
+int cmp_npre_by_hanzi_score(const void *p1, const void *p2) {
+    int ret_v = (utf16_strncmp((static_cast<const NPredictItem *>(p1))->pre_hzs, (static_cast<const NPredictItem *>(p2))->pre_hzs, kMaxPredictSize));
+    if (0 != ret_v) return ret_v;
+
+    if ((static_cast<const NPredictItem *>(p1))->psb > (static_cast<const NPredictItem *>(p2))->psb) return 1;
+
+    if ((static_cast<const NPredictItem *>(p1))->psb < (static_cast<const NPredictItem *>(p2))->psb) return -1;
+
+    return 0;
+}
+
+size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num) {
+    if (NULL == npre_items || 0 == npre_num) return 0;
+
+    myqsort(npre_items, npre_num, sizeof(NPredictItem), cmp_npre_by_hanzi_score);
+
+    size_t remain_num = 1;  // The first one is reserved.
+    for (size_t pos = 1; pos < npre_num; pos++) {
+        if (utf16_strncmp(npre_items[pos].pre_hzs, npre_items[remain_num - 1].pre_hzs, kMaxPredictSize) != 0) {
+            if (remain_num != pos) {
+                npre_items[remain_num] = npre_items[pos];
+            }
+            remain_num++;
+        }
+    }
+    return remain_num;
+}
+
+size_t align_to_size_t(size_t size) {
+    size_t s = sizeof(size_t);
+    return (size + s - 1) / s * s;
+}
+
+}  // namespace ime_pinyin
diff --git a/src/share/spellingtable.cpp b/src/share/spellingtable.cpp
new file mode 100644
index 0000000..6acb798
--- /dev/null
+++ b/src/share/spellingtable.cpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "../include/spellingtable.h"
+
+namespace ime_pinyin {
+
+#ifdef ___BUILD_MODEL___
+
+const char SpellingTable::kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1] = {"HM", "HNG", "NG"};
+
+// "" is the biggest, so that all empty strings will be moved to the end
+// _eb mean empty is biggest
+int compare_raw_spl_eb(const void* p1, const void* p2) {
+    if ('\0' == (static_cast<const RawSpelling*>(p1))->str[0]) return 1;
+
+    if ('\0' == (static_cast<const RawSpelling*>(p2))->str[0]) return -1;
+
+    return strcmp((static_cast<const RawSpelling*>(p1))->str, (static_cast<const RawSpelling*>(p2))->str);
+}
+
+size_t get_odd_next(size_t value) {
+    size_t v_next = value;
+    while (true) {
+        size_t v_next_sqrt = (size_t)sqrt(v_next);
+
+        bool is_odd = true;
+        for (size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++) {
+            if (v_next % v_dv == 0) {
+                is_odd = false;
+                break;
+            }
+        }
+
+        if (is_odd) return v_next;
+
+        v_next++;
+    }
+
+    // never reach here
+    return 0;
+}
+
+SpellingTable::SpellingTable() {
+    need_score_ = false;
+    raw_spellings_ = NULL;
+    spelling_buf_ = NULL;
+    spelling_num_ = 0;
+    total_freq_ = 0;
+    frozen_ = true;
+}
+
+SpellingTable::~SpellingTable() { free_resource(); }
+
+size_t SpellingTable::get_hash_pos(const char* spelling_str) {
+    size_t hash_pos = 0;
+    for (size_t pos = 0; pos < spelling_size_; pos++) {
+        if ('\0' == spelling_str[pos]) break;
+        hash_pos += (size_t)spelling_str[pos];
+    }
+
+    hash_pos = hash_pos % spelling_max_num_;
+    return hash_pos;
+}
+
+size_t SpellingTable::hash_pos_next(size_t hash_pos) {
+    hash_pos += 123;
+    hash_pos = hash_pos % spelling_max_num_;
+    return hash_pos;
+}
+
+void SpellingTable::free_resource() {
+    if (NULL != raw_spellings_) delete[] raw_spellings_;
+    raw_spellings_ = NULL;
+
+    if (NULL != spelling_buf_) delete[] spelling_buf_;
+    spelling_buf_ = NULL;
+}
+
+bool SpellingTable::init_table(size_t pure_spl_size, size_t spl_max_num, bool need_score) {
+    if (pure_spl_size == 0 || spl_max_num == 0) return false;
+
+    need_score_ = need_score;
+
+    free_resource();
+
+    spelling_size_ = pure_spl_size + 1;
+    if (need_score) spelling_size_ += 1;
+    spelling_max_num_ = get_odd_next(spl_max_num);
+    spelling_num_ = 0;
+
+    raw_spellings_ = new RawSpelling[spelling_max_num_];
+    spelling_buf_ = new char[spelling_max_num_ * (spelling_size_)];
+    if (NULL == raw_spellings_ || NULL == spelling_buf_) {
+        free_resource();
+        return false;
+    }
+
+    memset(raw_spellings_, 0, spelling_max_num_ * sizeof(RawSpelling));
+    memset(spelling_buf_, 0, spelling_max_num_ * (spelling_size_));
+    frozen_ = false;
+    total_freq_ = 0;
+    return true;
+}
+
+bool SpellingTable::put_spelling(const char* spelling_str, double freq) {
+    if (frozen_ || NULL == spelling_str) return false;
+
+    for (size_t pos = 0; pos < kNotSupportNum; pos++) {
+        if (strcmp(spelling_str, kNotSupportList[pos]) == 0) {
+            return false;
+        }
+    }
+
+    total_freq_ += freq;
+
+    size_t hash_pos = get_hash_pos(spelling_str);
+
+    raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
+
+    if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1) == 0) {
+        raw_spellings_[hash_pos].freq += freq;
+        return true;
+    }
+
+    size_t hash_pos_ori = hash_pos;
+
+    while (true) {
+        if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1) == 0) {
+            raw_spellings_[hash_pos].freq += freq;
+            return true;
+        }
+
+        if ('\0' == raw_spellings_[hash_pos].str[0]) {
+            raw_spellings_[hash_pos].freq += freq;
+            strncpy(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1);
+            raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
+            spelling_num_++;
+            return true;
+        }
+
+        hash_pos = hash_pos_next(hash_pos);
+        if (hash_pos_ori == hash_pos) return false;
+    }
+
+    // never reach here
+    return false;
+}
+
+bool SpellingTable::contain(const char* spelling_str) {
+    if (NULL == spelling_str || NULL == spelling_buf_ || frozen_) return false;
+
+    size_t hash_pos = get_hash_pos(spelling_str);
+
+    if ('\0' == raw_spellings_[hash_pos].str[0]) return false;
+
+    if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1) == 0) return true;
+
+    size_t hash_pos_ori = hash_pos;
+
+    while (true) {
+        hash_pos = hash_pos_next(hash_pos);
+        if (hash_pos_ori == hash_pos) return false;
+
+        if ('\0' == raw_spellings_[hash_pos].str[0]) return false;
+
+        if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1) == 0) return true;
+    }
+
+    // never reach here
+    return false;
+}
+
+const char* SpellingTable::arrange(size_t* item_size, size_t* spl_num) {
+    if (NULL == raw_spellings_ || NULL == spelling_buf_ || NULL == item_size || NULL == spl_num) return NULL;
+
+    qsort(raw_spellings_, spelling_max_num_, sizeof(RawSpelling), compare_raw_spl_eb);
+
+    // After sorting, only the first spelling_num_ items are valid.
+    // Copy them to the destination buffer.
+    for (size_t pos = 0; pos < spelling_num_; pos++) {
+        strncpy(spelling_buf_ + pos * spelling_size_, raw_spellings_[pos].str, spelling_size_);
+    }
+
+    if (need_score_) {
+        if (kPrintDebug0) printf("------------Spelling Possiblities--------------\n");
+
+        double max_score = 0;
+        double min_score = 0;
+
+        // After sorting, only the first spelling_num_ items are valid.
+        for (size_t pos = 0; pos < spelling_num_; pos++) {
+            raw_spellings_[pos].freq /= total_freq_;
+            if (need_score_) {
+                if (0 == pos) {
+                    max_score = raw_spellings_[0].freq;
+                    min_score = max_score;
+                } else {
+                    if (raw_spellings_[pos].freq > max_score) max_score = raw_spellings_[pos].freq;
+                    if (raw_spellings_[pos].freq < min_score) min_score = raw_spellings_[pos].freq;
+                }
+            }
+        }
+
+        if (kPrintDebug0) printf("-----max psb: %f, min psb: %f\n", max_score, min_score);
+
+        max_score = log(max_score);
+        min_score = log(min_score);
+
+        if (kPrintDebug0) printf("-----max log value: %f, min log value: %f\n", max_score, min_score);
+
+        // The absolute value of min_score is bigger than that of max_score because
+        // both of them are negative after log function.
+        score_amplifier_ = 1.0 * 255 / min_score;
+
+        double average_score = 0;
+        for (size_t pos = 0; pos < spelling_num_; pos++) {
+            double score = log(raw_spellings_[pos].freq) * score_amplifier_;
+            assert(score >= 0);
+
+            average_score += score;
+
+            // Because of calculation precision issue, score might be a little bigger
+            // than 255 after being amplified.
+            if (score > 255) score = 255;
+            char* this_spl_buf = spelling_buf_ + pos * spelling_size_;
+            this_spl_buf[spelling_size_ - 1] = static_cast<char>((unsigned char)score);
+
+            if (kPrintDebug0) {
+                printf("---pos:%d, %s, psb:%d\n", pos, this_spl_buf, (unsigned char)this_spl_buf[spelling_size_ - 1]);
+            }
+        }
+        average_score /= spelling_num_;
+        assert(average_score <= 255);
+        average_score_ = static_cast<uint8>(average_score);
+
+        if (kPrintDebug0) printf("\n----Score Amplifier: %f, Average Score: %d\n", score_amplifier_, average_score_);
+    }
+
+    *item_size = spelling_size_;
+    *spl_num = spelling_num_;
+    frozen_ = true;
+    return spelling_buf_;
+}
+
+float SpellingTable::get_score_amplifier() { return static_cast<float>(score_amplifier_); }
+
+unsigned char SpellingTable::get_average_score() { return average_score_; }
+
+#endif  // ___BUILD_MODEL___
+}  // namespace ime_pinyin
diff --git a/src/share/spellingtrie.cpp b/src/share/spellingtrie.cpp
new file mode 100644
index 0000000..9701425
--- /dev/null
+++ b/src/share/spellingtrie.cpp
@@ -0,0 +1,708 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "../include/dictdef.h"
+
+#ifdef ___BUILD_MODEL___
+#include "../include/spellingtable.h"
+#endif
+
+#include "../include/spellingtrie.h"
+
+namespace ime_pinyin {
+
+SpellingTrie *SpellingTrie::instance_ = NULL;
+
+// z/c/s is for Zh/Ch/Sh
+const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] = "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
+
+// Bit 0 : is it a Shengmu char?
+// Bit 1 : is it a Yunmu char? (one char is a Yunmu)
+// Bit 2 : is it enabled in ShouZiMu(first char) mode?
+unsigned char SpellingTrie::char_flags_[] = {
+    // a    b      c     d     e     f     g
+    0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
+    // h    i     j      k     l     m    n
+    0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
+    // o    p     q      r     s     t
+    0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
+    // u    v     w      x     y     z
+    0x00, 0x00, 0x01, 0x01, 0x01, 0x01};
+
+int compare_spl(const void *p1, const void *p2) { return strcmp((const char *)(p1), (const char *)(p2)); }
+
+SpellingTrie::SpellingTrie() {
+    spelling_buf_ = NULL;
+    spelling_size_ = 0;
+    spelling_num_ = 0;
+    spl_ym_ids_ = NULL;
+    splstr_queried_ = NULL;
+    splstr16_queried_ = NULL;
+    root_ = NULL;
+    dumb_node_ = NULL;
+    splitter_node_ = NULL;
+    instance_ = NULL;
+    ym_buf_ = NULL;
+    f2h_ = NULL;
+
+    szm_enable_shm(true);
+    szm_enable_ym(true);
+
+#ifdef ___BUILD_MODEL___
+    node_num_ = 0;
+#endif
+}
+
+SpellingTrie::~SpellingTrie() {
+    if (NULL != spelling_buf_) delete[] spelling_buf_;
+
+    if (NULL != splstr_queried_) delete[] splstr_queried_;
+
+    if (NULL != splstr16_queried_) delete[] splstr16_queried_;
+
+    if (NULL != spl_ym_ids_) delete[] spl_ym_ids_;
+
+    if (NULL != root_) {
+        free_son_trie(root_);
+        delete root_;
+    }
+
+    if (NULL != dumb_node_) {
+        delete[] dumb_node_;
+    }
+
+    if (NULL != splitter_node_) {
+        delete[] splitter_node_;
+    }
+
+    if (NULL != instance_) {
+        delete instance_;
+        instance_ = NULL;
+    }
+
+    if (NULL != ym_buf_) delete[] ym_buf_;
+
+    if (NULL != f2h_) delete[] f2h_;
+}
+
+bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
+    if (NULL == splid || 0 == *splid) return false;
+
+    if (*splid >= kFullSplIdStart) return true;
+    if (*splid < kFullSplIdStart) {
+        char ch = kHalfId2Sc_[*splid];
+        if (ch > 'Z') {
+            return true;
+        } else {
+            if (szm_is_enabled(ch)) {
+                return true;
+            } else if (is_yunmu_char(ch)) {
+                assert(h2f_num_[*splid] > 0);
+                *splid = h2f_start_[*splid];
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+bool SpellingTrie::is_half_id(uint16 splid) const {
+    if (0 == splid || splid >= kFullSplIdStart) return false;
+
+    return true;
+}
+
+bool SpellingTrie::is_full_id(uint16 splid) const {
+    if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_) return false;
+    return true;
+}
+
+bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
+    uint16 half_fr_full = full_to_half(full_id);
+
+    if (half_fr_full == half_id) return true;
+
+    // &~0x20 is used to conver the char to upper case.
+    // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
+    char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
+    char ch_h = kHalfId2Sc_[half_id];
+    if (ch_f == ch_h) return true;
+
+    return false;
+}
+
+bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
+    if (0 == splid || splid >= kFullSplIdStart) return false;
+
+    char ch = kHalfId2Sc_[splid];
+    // If ch >= 'a', that means the half id is one of Zh/Ch/Sh
+    if (ch >= 'a') {
+        return false;
+    }
+
+    return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
+}
+
+bool SpellingTrie::is_shengmu_char(char ch) const { return char_flags_[ch - 'A'] & kHalfIdShengmuMask; }
+
+bool SpellingTrie::is_yunmu_char(char ch) const { return char_flags_[ch - 'A'] & kHalfIdYunmuMask; }
+
+bool SpellingTrie::is_szm_char(char ch) const { return is_shengmu_char(ch) || is_yunmu_char(ch); }
+
+bool SpellingTrie::szm_is_enabled(char ch) const { return char_flags_[ch - 'A'] & kHalfIdSzmMask; }
+
+void SpellingTrie::szm_enable_shm(bool enable) {
+    if (enable) {
+        for (char ch = 'A'; ch <= 'Z'; ch++) {
+            if (is_shengmu_char(ch)) char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
+        }
+    } else {
+        for (char ch = 'A'; ch <= 'Z'; ch++) {
+            if (is_shengmu_char(ch)) char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
+        }
+    }
+}
+
+void SpellingTrie::szm_enable_ym(bool enable) {
+    if (enable) {
+        for (char ch = 'A'; ch <= 'Z'; ch++) {
+            if (is_yunmu_char(ch)) char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
+        }
+    } else {
+        for (char ch = 'A'; ch <= 'Z'; ch++) {
+            if (is_yunmu_char(ch)) char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
+        }
+    }
+}
+
+bool SpellingTrie::is_szm_enabled(char ch) const { return char_flags_[ch - 'A'] & kHalfIdSzmMask; }
+
+const SpellingTrie *SpellingTrie::get_cpinstance() { return &get_instance(); }
+
+SpellingTrie &SpellingTrie::get_instance() {
+    if (NULL == instance_) instance_ = new SpellingTrie();
+
+    return *instance_;
+}
+
+uint16 SpellingTrie::half2full_num(uint16 half_id) const {
+    if (NULL == root_ || half_id >= kFullSplIdStart) return 0;
+    return h2f_num_[half_id];
+}
+
+uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
+    if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart) return 0;
+
+    *spl_id_start = h2f_start_[half_id];
+    return h2f_num_[half_id];
+}
+
+uint16 SpellingTrie::full_to_half(uint16 full_id) const {
+    if (NULL == root_ || full_id < kFullSplIdStart || full_id > spelling_num_ + kFullSplIdStart) return 0;
+
+    return f2h_[full_id - kFullSplIdStart];
+}
+
+void SpellingTrie::free_son_trie(SpellingNode *node) {
+    if (NULL == node) return;
+
+    for (size_t pos = 0; pos < node->num_of_son; pos++) {
+        free_son_trie(node->first_son + pos);
+    }
+
+    if (NULL != node->first_son) delete[] node->first_son;
+}
+
+bool SpellingTrie::construct(const char *spelling_arr, size_t item_size, size_t item_num, float score_amplifier, unsigned char average_score) {
+    if (spelling_arr == NULL) return false;
+
+    memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
+    memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
+
+    // If the arr is the same as the buf, means this function is called by
+    // load_table(), the table data are ready; otherwise the array should be
+    // saved.
+    if (spelling_arr != spelling_buf_) {
+        if (NULL != spelling_buf_) delete[] spelling_buf_;
+        spelling_buf_ = new char[item_size * item_num];
+        if (NULL == spelling_buf_) return false;
+        memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
+    }
+
+    spelling_size_ = item_size;
+    spelling_num_ = item_num;
+
+    score_amplifier_ = score_amplifier;
+    average_score_ = average_score;
+
+    if (NULL != splstr_queried_) delete[] splstr_queried_;
+    splstr_queried_ = new char[spelling_size_];
+    if (NULL == splstr_queried_) return false;
+
+    if (NULL != splstr16_queried_) delete[] splstr16_queried_;
+    splstr16_queried_ = new char16[spelling_size_];
+    if (NULL == splstr16_queried_) return false;
+
+    // First, sort the buf to ensure they are in ascendant order
+    qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
+
+#ifdef ___BUILD_MODEL___
+    node_num_ = 1;
+#endif
+
+    root_ = new SpellingNode();
+    memset(root_, 0, sizeof(SpellingNode));
+
+    dumb_node_ = new SpellingNode();
+    memset(dumb_node_, 0, sizeof(SpellingNode));
+    dumb_node_->score = average_score_;
+
+    splitter_node_ = new SpellingNode();
+    memset(splitter_node_, 0, sizeof(SpellingNode));
+    splitter_node_->score = average_score_;
+
+    memset(level1_sons_, 0, sizeof(SpellingNode *) * kValidSplCharNum);
+
+    root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
+
+    // Root's score should be cleared.
+    root_->score = 0;
+
+    if (NULL == root_->first_son) return false;
+
+    h2f_start_[0] = h2f_num_[0] = 0;
+
+    if (!build_f2h()) return false;
+
+#ifdef ___BUILD_MODEL___
+    if (kPrintDebug0) {
+        printf("---SpellingTrie Nodes: %d\n", node_num_);
+    }
+    return build_ym_info();
+#else
+    return true;
+#endif
+}
+
+#ifdef ___BUILD_MODEL___
+const char *SpellingTrie::get_ym_str(const char *spl_str) {
+    bool start_ZCS = false;
+    if (is_shengmu_char(*spl_str)) {
+        if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str) start_ZCS = true;
+        spl_str += 1;
+        if (start_ZCS && 'h' == *spl_str) spl_str += 1;
+    }
+    return spl_str;
+}
+
+bool SpellingTrie::build_ym_info() {
+    bool sucess;
+    SpellingTable *spl_table = new SpellingTable();
+
+    sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
+    assert(sucess);
+
+    for (uint16 pos = 0; pos < spelling_num_; pos++) {
+        const char *spl_str = spelling_buf_ + spelling_size_ * pos;
+        spl_str = get_ym_str(spl_str);
+        if ('\0' != spl_str[0]) {
+            sucess = spl_table->put_spelling(spl_str, 0);
+            assert(sucess);
+        }
+    }
+
+    size_t ym_item_size;  // '\0' is included
+    size_t ym_num;
+    const char *ym_buf;
+    ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
+
+    if (NULL != ym_buf_) delete[] ym_buf_;
+    ym_buf_ = new char[ym_item_size * ym_num];
+    if (NULL == ym_buf_) {
+        delete spl_table;
+        return false;
+    }
+
+    memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
+    ym_size_ = ym_item_size;
+    ym_num_ = ym_num;
+
+    delete spl_table;
+
+    // Generate the maping from the spelling ids to the Yunmu ids.
+    if (spl_ym_ids_) delete spl_ym_ids_;
+    spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
+    if (NULL == spl_ym_ids_) return false;
+
+    memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
+
+    for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
+        const char *str = get_spelling_str(id);
+
+        str = get_ym_str(str);
+        if ('\0' != str[0]) {
+            uint8 ym_id = get_ym_id(str);
+            spl_ym_ids_[id] = ym_id;
+            assert(ym_id > 0);
+        } else {
+            spl_ym_ids_[id] = 0;
+        }
+    }
+    return true;
+}
+#endif
+
+SpellingNode *SpellingTrie::construct_spellings_subset(size_t item_start, size_t item_end, size_t level, SpellingNode *parent) {
+    if (level >= spelling_size_ || item_end <= item_start || NULL == parent) return NULL;
+
+    SpellingNode *first_son = NULL;
+    uint16 num_of_son = 0;
+    unsigned char min_son_score = 255;
+
+    const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
+    char char_for_node = spelling_last_start[level];
+    assert(char_for_node >= 'A' && char_for_node <= 'Z' || 'h' == char_for_node);
+
+    // Scan the array to find how many sons
+    for (size_t i = item_start + 1; i < item_end; i++) {
+        const char *spelling_current = spelling_buf_ + spelling_size_ * i;
+        char char_current = spelling_current[level];
+        if (char_current != char_for_node) {
+            num_of_son++;
+            char_for_node = char_current;
+        }
+    }
+    num_of_son++;
+
+    // Allocate memory
+#ifdef ___BUILD_MODEL___
+    node_num_ += num_of_son;
+#endif
+    first_son = new SpellingNode[num_of_son];
+    memset(first_son, 0, sizeof(SpellingNode) * num_of_son);
+
+    // Now begin construct tree
+    size_t son_pos = 0;
+
+    spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
+    char_for_node = spelling_last_start[level];
+
+    bool spelling_endable = true;
+    if (spelling_last_start[level + 1] != '\0') spelling_endable = false;
+
+    size_t item_start_next = item_start;
+
+    for (size_t i = item_start + 1; i < item_end; i++) {
+        const char *spelling_current = spelling_buf_ + spelling_size_ * i;
+        char char_current = spelling_current[level];
+        assert(is_valid_spl_char(char_current));
+
+        if (char_current != char_for_node) {
+            // Construct a node
+            SpellingNode *node_current = first_son + son_pos;
+            node_current->char_this_node = char_for_node;
+
+            // For quick search in the first level
+            if (0 == level) level1_sons_[char_for_node - 'A'] = node_current;
+
+            if (spelling_endable) {
+                node_current->spelling_idx = kFullSplIdStart + item_start_next;
+            }
+
+            if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
+                size_t real_start = item_start_next;
+                if (spelling_last_start[level + 1] == '\0') real_start++;
+
+                node_current->first_son = construct_spellings_subset(real_start, i, level + 1, node_current);
+
+                if (real_start == item_start_next + 1) {
+                    uint16 score_this = static_cast<unsigned char>(spelling_last_start[spelling_size_ - 1]);
+                    if (score_this < node_current->score) node_current->score = score_this;
+                }
+            } else {
+                node_current->first_son = NULL;
+                node_current->score = static_cast<unsigned char>(spelling_last_start[spelling_size_ - 1]);
+            }
+
+            if (node_current->score < min_son_score) min_son_score = node_current->score;
+
+            bool is_half = false;
+            if (level == 0 && is_szm_char(char_for_node)) {
+                node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
+
+                if (char_for_node > 'C') node_current->spelling_idx++;
+                if (char_for_node > 'S') node_current->spelling_idx++;
+
+                h2f_num_[node_current->spelling_idx] = i - item_start_next;
+                is_half = true;
+            } else if (level == 1 && char_for_node == 'h') {
+                char ch_level0 = spelling_last_start[0];
+                uint16 part_id = 0;
+                if (ch_level0 == 'C')
+                    part_id = 'C' - 'A' + 1 + 1;
+                else if (ch_level0 == 'S')
+                    part_id = 'S' - 'A' + 1 + 2;
+                else if (ch_level0 == 'Z')
+                    part_id = 'Z' - 'A' + 1 + 3;
+                if (0 != part_id) {
+                    node_current->spelling_idx = part_id;
+                    h2f_num_[node_current->spelling_idx] = i - item_start_next;
+                    is_half = true;
+                }
+            }
+
+            if (is_half) {
+                if (h2f_num_[node_current->spelling_idx] > 0)
+                    h2f_start_[node_current->spelling_idx] = item_start_next + kFullSplIdStart;
+                else
+                    h2f_start_[node_current->spelling_idx] = 0;
+            }
+
+            // for next sibling
+            spelling_last_start = spelling_current;
+            char_for_node = char_current;
+            item_start_next = i;
+            spelling_endable = true;
+            if (spelling_current[level + 1] != '\0') spelling_endable = false;
+
+            son_pos++;
+        }
+    }
+
+    // the last one
+    SpellingNode *node_current = first_son + son_pos;
+    node_current->char_this_node = char_for_node;
+
+    // For quick search in the first level
+    if (0 == level) level1_sons_[char_for_node - 'A'] = node_current;
+
+    if (spelling_endable) {
+        node_current->spelling_idx = kFullSplIdStart + item_start_next;
+    }
+
+    if (spelling_last_start[level + 1] != '\0' || item_end - item_start_next > 1) {
+        size_t real_start = item_start_next;
+        if (spelling_last_start[level + 1] == '\0') real_start++;
+
+        node_current->first_son = construct_spellings_subset(real_start, item_end, level + 1, node_current);
+
+        if (real_start == item_start_next + 1) {
+            uint16 score_this = static_cast<unsigned char>(spelling_last_start[spelling_size_ - 1]);
+            if (score_this < node_current->score) node_current->score = score_this;
+        }
+    } else {
+        node_current->first_son = NULL;
+        node_current->score = static_cast<unsigned char>(spelling_last_start[spelling_size_ - 1]);
+    }
+
+    if (node_current->score < min_son_score) min_son_score = node_current->score;
+
+    assert(son_pos + 1 == num_of_son);
+
+    bool is_half = false;
+    if (level == 0 && szm_is_enabled(char_for_node)) {
+        node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
+
+        if (char_for_node > 'C') node_current->spelling_idx++;
+        if (char_for_node > 'S') node_current->spelling_idx++;
+
+        h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
+        is_half = true;
+    } else if (level == 1 && char_for_node == 'h') {
+        char ch_level0 = spelling_last_start[0];
+        uint16 part_id = 0;
+        if (ch_level0 == 'C')
+            part_id = 'C' - 'A' + 1 + 1;
+        else if (ch_level0 == 'S')
+            part_id = 'S' - 'A' + 1 + 2;
+        else if (ch_level0 == 'Z')
+            part_id = 'Z' - 'A' + 1 + 3;
+        if (0 != part_id) {
+            node_current->spelling_idx = part_id;
+            h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
+            is_half = true;
+        }
+    }
+    if (is_half) {
+        if (h2f_num_[node_current->spelling_idx] > 0)
+            h2f_start_[node_current->spelling_idx] = item_start_next + kFullSplIdStart;
+        else
+            h2f_start_[node_current->spelling_idx] = 0;
+    }
+
+    parent->num_of_son = num_of_son;
+    parent->score = min_son_score;
+    return first_son;
+}
+
+bool SpellingTrie::save_spl_trie(FILE *fp) {
+    if (NULL == fp || NULL == spelling_buf_) return false;
+
+    if (fwrite(&spelling_size_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fwrite(&spelling_num_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1) return false;
+
+    if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1) return false;
+
+    if (fwrite(spelling_buf_, sizeof(char) * spelling_size_, spelling_num_, fp) != spelling_num_) return false;
+
+    return true;
+}
+
+bool SpellingTrie::load_spl_trie(FILE *fp) {
+    if (NULL == fp) return false;
+
+    if (fread(&spelling_size_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fread(&spelling_num_, sizeof(uint32), 1, fp) != 1) return false;
+
+    if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1) return false;
+
+    if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1) return false;
+
+    if (NULL != spelling_buf_) delete[] spelling_buf_;
+
+    spelling_buf_ = new char[spelling_size_ * spelling_num_];
+    if (NULL == spelling_buf_) return false;
+
+    if (fread(spelling_buf_, sizeof(char) * spelling_size_, spelling_num_, fp) != spelling_num_) return false;
+
+    return construct(spelling_buf_, spelling_size_, spelling_num_, score_amplifier_, average_score_);
+}
+
+bool SpellingTrie::build_f2h() {
+    if (NULL != f2h_) delete[] f2h_;
+    f2h_ = new uint16[spelling_num_];
+    if (NULL == f2h_) return false;
+
+    for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
+        for (uint16 fid = h2f_start_[hid]; fid < h2f_start_[hid] + h2f_num_[hid]; fid++) f2h_[fid - kFullSplIdStart] = hid;
+    }
+
+    return true;
+}
+
+size_t SpellingTrie::get_spelling_num() { return spelling_num_; }
+
+uint8 SpellingTrie::get_ym_id(const char *ym_str) {
+    if (NULL == ym_str || NULL == ym_buf_) return 0;
+
+    for (uint8 pos = 0; pos < ym_num_; pos++)
+        if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0) return pos + 1;
+
+    return 0;
+}
+
+const char *SpellingTrie::get_spelling_str(uint16 splid) {
+    splstr_queried_[0] = '\0';
+
+    if (splid >= kFullSplIdStart) {
+        splid -= kFullSplIdStart;
+        snprintf(splstr_queried_, spelling_size_, "%s", spelling_buf_ + splid * spelling_size_);
+    } else {
+        if (splid == 'C' - 'A' + 1 + 1) {
+            snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
+        } else if (splid == 'S' - 'A' + 1 + 2) {
+            snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
+        } else if (splid == 'Z' - 'A' + 1 + 3) {
+            snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
+        } else {
+            if (splid > 'C' - 'A' + 1) splid--;
+            if (splid > 'S' - 'A' + 1) splid--;
+            splstr_queried_[0] = 'A' + splid - 1;
+            splstr_queried_[1] = '\0';
+        }
+    }
+    return splstr_queried_;
+}
+
+const char16 *SpellingTrie::get_spelling_str16(uint16 splid) {
+    splstr16_queried_[0] = '\0';
+
+    if (splid >= kFullSplIdStart) {
+        splid -= kFullSplIdStart;
+        for (size_t pos = 0; pos < spelling_size_; pos++) {
+            splstr16_queried_[pos] = static_cast<char16>(spelling_buf_[splid * spelling_size_ + pos]);
+        }
+    } else {
+        if (splid == 'C' - 'A' + 1 + 1) {
+            splstr16_queried_[0] = static_cast<char16>('C');
+            splstr16_queried_[1] = static_cast<char16>('h');
+            splstr16_queried_[2] = static_cast<char16>('\0');
+        } else if (splid == 'S' - 'A' + 1 + 2) {
+            splstr16_queried_[0] = static_cast<char16>('S');
+            splstr16_queried_[1] = static_cast<char16>('h');
+            splstr16_queried_[2] = static_cast<char16>('\0');
+        } else if (splid == 'Z' - 'A' + 1 + 3) {
+            splstr16_queried_[0] = static_cast<char16>('Z');
+            splstr16_queried_[1] = static_cast<char16>('h');
+            splstr16_queried_[2] = static_cast<char16>('\0');
+        } else {
+            if (splid > 'C' - 'A' + 1) splid--;
+            if (splid > 'S' - 'A' + 1) splid--;
+            splstr16_queried_[0] = 'A' + splid - 1;
+            splstr16_queried_[1] = '\0';
+        }
+    }
+    return splstr16_queried_;
+}
+
+size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16, size_t splstr16_len) {
+    if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
+
+    if (splid >= kFullSplIdStart) {
+        splid -= kFullSplIdStart;
+        for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
+            splstr16[pos] = static_cast<char16>(spelling_buf_[splid * spelling_size_ + pos]);
+            if (static_cast<char16>('\0') == splstr16[pos]) {
+                return pos;
+            }
+        }
+    } else {
+        if (splid == 'C' - 'A' + 1 + 1) {
+            splstr16[0] = static_cast<char16>('C');
+            splstr16[1] = static_cast<char16>('h');
+            splstr16[2] = static_cast<char16>('\0');
+            return 2;
+        } else if (splid == 'S' - 'A' + 1 + 2) {
+            splstr16[0] = static_cast<char16>('S');
+            splstr16[1] = static_cast<char16>('h');
+            splstr16[2] = static_cast<char16>('\0');
+            return 2;
+        } else if (splid == 'Z' - 'A' + 1 + 3) {
+            splstr16[0] = static_cast<char16>('Z');
+            splstr16[1] = static_cast<char16>('h');
+            splstr16[2] = static_cast<char16>('\0');
+            return 2;
+        } else {
+            if (splid > 'C' - 'A' + 1) splid--;
+            if (splid > 'S' - 'A' + 1) splid--;
+            splstr16[0] = 'A' + splid - 1;
+            splstr16[1] = '\0';
+            return 1;
+        }
+    }
+
+    // Not reachable.
+    return 0;
+}
+
+}  // namespace ime_pinyin
diff --git a/src/share/splparser.cpp b/src/share/splparser.cpp
new file mode 100644
index 0000000..be8c205
--- /dev/null
+++ b/src/share/splparser.cpp
@@ -0,0 +1,295 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include "../include/splparser.h"
+
+namespace ime_pinyin {
+
+SpellingParser::SpellingParser() { spl_trie_ = SpellingTrie::get_cpinstance(); }
+
+bool SpellingParser::is_valid_to_parse(char ch) { return SpellingTrie::is_valid_spl_char(ch); }
+
+uint16 SpellingParser::splstr_to_idxs(const char *splstr, uint16 str_len, uint16 spl_idx[], uint16 start_pos[], uint16 max_size, bool &last_is_pre) {
+    if (NULL == splstr || 0 == max_size || 0 == str_len) return 0;
+
+    if (!SpellingTrie::is_valid_spl_char(splstr[0])) return 0;
+
+    last_is_pre = false;
+
+    const SpellingNode *node_this = spl_trie_->root_;
+
+    uint16 str_pos = 0;
+    uint16 idx_num = 0;
+    if (NULL != start_pos) start_pos[0] = 0;
+    bool last_is_splitter = false;
+
+    while (str_pos < str_len) {
+        char char_this = splstr[str_pos];
+        // all characters outside of [a, z] are considered as splitters
+        if (!SpellingTrie::is_valid_spl_char(char_this)) {
+            // test if the current node is endable
+            uint16 id_this = node_this->spelling_idx;
+            if (spl_trie_->if_valid_id_update(&id_this)) {
+                spl_idx[idx_num] = id_this;
+
+                idx_num++;
+                str_pos++;
+                if (NULL != start_pos) start_pos[idx_num] = str_pos;
+                if (idx_num >= max_size) return idx_num;
+
+                node_this = spl_trie_->root_;
+                last_is_splitter = true;
+                continue;
+            } else {
+                if (last_is_splitter) {
+                    str_pos++;
+                    if (NULL != start_pos) start_pos[idx_num] = str_pos;
+                    continue;
+                } else {
+                    return idx_num;
+                }
+            }
+        }
+
+        last_is_splitter = false;
+
+        SpellingNode *found_son = NULL;
+
+        if (0 == str_pos) {
+            if (char_this >= 'a')
+                found_son = spl_trie_->level1_sons_[char_this - 'a'];
+            else
+                found_son = spl_trie_->level1_sons_[char_this - 'A'];
+        } else {
+            SpellingNode *first_son = node_this->first_son;
+            // Because for Zh/Ch/Sh nodes, they are the last in the buffer and
+            // frequently used, so we scan from the end.
+            for (int i = 0; i < node_this->num_of_son; i++) {
+                SpellingNode *this_son = first_son + i;
+                if (SpellingTrie::is_same_spl_char(this_son->char_this_node, char_this)) {
+                    found_son = this_son;
+                    break;
+                }
+            }
+        }
+
+        // found, just move the current node pointer to the the son
+        if (NULL != found_son) {
+            node_this = found_son;
+        } else {
+            // not found, test if it is endable
+            uint16 id_this = node_this->spelling_idx;
+            if (spl_trie_->if_valid_id_update(&id_this)) {
+                // endable, remember the index
+                spl_idx[idx_num] = id_this;
+
+                idx_num++;
+                if (NULL != start_pos) start_pos[idx_num] = str_pos;
+                if (idx_num >= max_size) return idx_num;
+                node_this = spl_trie_->root_;
+                continue;
+            } else {
+                return idx_num;
+            }
+        }
+
+        str_pos++;
+    }
+
+    uint16 id_this = node_this->spelling_idx;
+    if (spl_trie_->if_valid_id_update(&id_this)) {
+        // endable, remember the index
+        spl_idx[idx_num] = id_this;
+
+        idx_num++;
+        if (NULL != start_pos) start_pos[idx_num] = str_pos;
+    }
+
+    last_is_pre = !last_is_splitter;
+
+    return idx_num;
+}
+
+uint16 SpellingParser::splstr_to_idxs_f(const char *splstr, uint16 str_len, uint16 spl_idx[], uint16 start_pos[], uint16 max_size, bool &last_is_pre) {
+    uint16 idx_num = splstr_to_idxs(splstr, str_len, spl_idx, start_pos, max_size, last_is_pre);
+    for (uint16 pos = 0; pos < idx_num; pos++) {
+        if (spl_trie_->is_half_id_yunmu(spl_idx[pos])) {
+            spl_trie_->half_to_full(spl_idx[pos], spl_idx + pos);
+            if (pos == idx_num - 1) {
+                last_is_pre = false;
+            }
+        }
+    }
+    return idx_num;
+}
+
+uint16 SpellingParser::splstr16_to_idxs(const char16 *splstr, uint16 str_len, uint16 spl_idx[], uint16 start_pos[], uint16 max_size, bool &last_is_pre) {
+    if (NULL == splstr || 0 == max_size || 0 == str_len) return 0;
+
+    if (!SpellingTrie::is_valid_spl_char(splstr[0])) return 0;
+
+    last_is_pre = false;
+
+    const SpellingNode *node_this = spl_trie_->root_;
+
+    uint16 str_pos = 0;
+    uint16 idx_num = 0;
+    if (NULL != start_pos) start_pos[0] = 0;
+    bool last_is_splitter = false;
+
+    while (str_pos < str_len) {
+        char16 char_this = splstr[str_pos];
+        // all characters outside of [a, z] are considered as splitters
+        if (!SpellingTrie::is_valid_spl_char(char_this)) {
+            // test if the current node is endable
+            uint16 id_this = node_this->spelling_idx;
+            if (spl_trie_->if_valid_id_update(&id_this)) {
+                spl_idx[idx_num] = id_this;
+
+                idx_num++;
+                str_pos++;
+                if (NULL != start_pos) start_pos[idx_num] = str_pos;
+                if (idx_num >= max_size) return idx_num;
+
+                node_this = spl_trie_->root_;
+                last_is_splitter = true;
+                continue;
+            } else {
+                if (last_is_splitter) {
+                    str_pos++;
+                    if (NULL != start_pos) start_pos[idx_num] = str_pos;
+                    continue;
+                } else {
+                    return idx_num;
+                }
+            }
+        }
+
+        last_is_splitter = false;
+
+        SpellingNode *found_son = NULL;
+
+        if (0 == str_pos) {
+            if (char_this >= 'a')
+                found_son = spl_trie_->level1_sons_[char_this - 'a'];
+            else
+                found_son = spl_trie_->level1_sons_[char_this - 'A'];
+        } else {
+            SpellingNode *first_son = node_this->first_son;
+            // Because for Zh/Ch/Sh nodes, they are the last in the buffer and
+            // frequently used, so we scan from the end.
+            for (int i = 0; i < node_this->num_of_son; i++) {
+                SpellingNode *this_son = first_son + i;
+                if (SpellingTrie::is_same_spl_char(this_son->char_this_node, char_this)) {
+                    found_son = this_son;
+                    break;
+                }
+            }
+        }
+
+        // found, just move the current node pointer to the the son
+        if (NULL != found_son) {
+            node_this = found_son;
+        } else {
+            // not found, test if it is endable
+            uint16 id_this = node_this->spelling_idx;
+            if (spl_trie_->if_valid_id_update(&id_this)) {
+                // endable, remember the index
+                spl_idx[idx_num] = id_this;
+
+                idx_num++;
+                if (NULL != start_pos) start_pos[idx_num] = str_pos;
+                if (idx_num >= max_size) return idx_num;
+                node_this = spl_trie_->root_;
+                continue;
+            } else {
+                return idx_num;
+            }
+        }
+
+        str_pos++;
+    }
+
+    uint16 id_this = node_this->spelling_idx;
+    if (spl_trie_->if_valid_id_update(&id_this)) {
+        // endable, remember the index
+        spl_idx[idx_num] = id_this;
+
+        idx_num++;
+        if (NULL != start_pos) start_pos[idx_num] = str_pos;
+    }
+
+    last_is_pre = !last_is_splitter;
+
+    return idx_num;
+}
+
+uint16 SpellingParser::splstr16_to_idxs_f(const char16 *splstr, uint16 str_len, uint16 spl_idx[], uint16 start_pos[], uint16 max_size, bool &last_is_pre) {
+    uint16 idx_num = splstr16_to_idxs(splstr, str_len, spl_idx, start_pos, max_size, last_is_pre);
+    for (uint16 pos = 0; pos < idx_num; pos++) {
+        if (spl_trie_->is_half_id_yunmu(spl_idx[pos])) {
+            spl_trie_->half_to_full(spl_idx[pos], spl_idx + pos);
+            if (pos == idx_num - 1) {
+                last_is_pre = false;
+            }
+        }
+    }
+    return idx_num;
+}
+
+uint16 SpellingParser::get_splid_by_str(const char *splstr, uint16 str_len, bool *is_pre) {
+    if (NULL == is_pre) return 0;
+
+    uint16 spl_idx[2];
+    uint16 start_pos[3];
+
+    if (splstr_to_idxs(splstr, str_len, spl_idx, start_pos, 2, *is_pre) != 1) return 0;
+
+    if (start_pos[1] != str_len) return 0;
+    return spl_idx[0];
+}
+
+uint16 SpellingParser::get_splid_by_str_f(const char *splstr, uint16 str_len, bool *is_pre) {
+    if (NULL == is_pre) return 0;
+
+    uint16 spl_idx[2];
+    uint16 start_pos[3];
+
+    if (splstr_to_idxs(splstr, str_len, spl_idx, start_pos, 2, *is_pre) != 1) return 0;
+
+    if (start_pos[1] != str_len) return 0;
+    if (spl_trie_->is_half_id_yunmu(spl_idx[0])) {
+        spl_trie_->half_to_full(spl_idx[0], spl_idx);
+        *is_pre = false;
+    }
+
+    return spl_idx[0];
+}
+
+uint16 SpellingParser::get_splids_parallel(const char *splstr, uint16 str_len, uint16 splidx[], uint16 max_size, uint16 &full_id_num, bool &is_pre) {
+    if (max_size <= 0 || !is_valid_to_parse(splstr[0])) return 0;
+
+    splidx[0] = get_splid_by_str(splstr, str_len, &is_pre);
+    full_id_num = 0;
+    if (0 != splidx[0]) {
+        if (splidx[0] >= kFullSplIdStart) full_id_num = 1;
+        return 1;
+    }
+    return 0;
+}
+
+}  // namespace ime_pinyin
diff --git a/src/share/sync.cpp b/src/share/sync.cpp
new file mode 100644
index 0000000..a572822
--- /dev/null
+++ b/src/share/sync.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../include/sync.h"
+#include <assert.h>
+#include <string.h>
+
+#ifdef ___SYNC_ENABLED___
+
+namespace ime_pinyin {
+
+Sync::Sync() : userdict_(NULL), dictfile_(NULL), last_count_(0) {};
+
+Sync::~Sync() {}
+
+bool Sync::begin(const char* filename) {
+    if (userdict_) {
+        finish();
+    }
+
+    if (!filename) {
+        return false;
+    }
+
+    dictfile_ = strdup(filename);
+    if (!dictfile_) {
+        return false;
+    }
+
+    userdict_ = new UserDict();
+    if (!userdict_) {
+        free(dictfile_);
+        dictfile_ = NULL;
+        return false;
+    }
+
+    if (userdict_->load_dict((const char*)dictfile_, kUserDictIdStart, kUserDictIdEnd) == false) {
+        delete userdict_;
+        userdict_ = NULL;
+        free(dictfile_);
+        dictfile_ = NULL;
+        return false;
+    }
+
+    userdict_->set_limit(kUserDictMaxLemmaCount, kUserDictMaxLemmaSize, kUserDictRatio);
+
+    return true;
+}
+
+int Sync::put_lemmas(char16* lemmas, int len) { return userdict_->put_lemmas_no_sync_from_utf16le_string(lemmas, len); }
+
+int Sync::get_lemmas(char16* str, int size) { return userdict_->get_sync_lemmas_in_utf16le_string_from_beginning(str, size, &last_count_); }
+
+int Sync::get_last_got_count() { return last_count_; }
+
+int Sync::get_total_count() { return userdict_->get_sync_count(); }
+
+void Sync::clear_last_got() {
+    if (last_count_ < 0) {
+        return;
+    }
+    userdict_->clear_sync_lemmas(0, last_count_);
+    last_count_ = 0;
+}
+
+void Sync::finish() {
+    if (userdict_) {
+        userdict_->close_dict();
+        delete userdict_;
+        userdict_ = NULL;
+        free(dictfile_);
+        dictfile_ = NULL;
+        last_count_ = 0;
+    }
+}
+
+int Sync::get_capacity() {
+    UserDict::UserDictStat stat;
+    userdict_->state(&stat);
+    return stat.limit_lemma_count - stat.lemma_count;
+}
+
+}  // namespace ime_pinyin
+#endif
diff --git a/src/share/userdict.cpp b/src/share/userdict.cpp
new file mode 100644
index 0000000..5adc430
--- /dev/null
+++ b/src/share/userdict.cpp
@@ -0,0 +1,2063 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../include/userdict.h"
+#include "../include/splparser.h"
+#include "../include/ngram.h"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <assert.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+#include <pthread.h>
+#include <math.h>
+
+namespace ime_pinyin {
+
+#ifdef ___DEBUG_PERF___
+static uint64 _ellapse_ = 0;
+static struct timeval _tv_start_, _tv_end_;
+#define DEBUG_PERF_BEGIN                 \
+    do {                                 \
+        gettimeofday(&_tv_start_, NULL); \
+    } while (0)
+#define DEBUG_PERF_END                                                                                         \
+    do {                                                                                                       \
+        gettimeofday(&_tv_end_, NULL);                                                                         \
+        _ellapse_ = (_tv_end_.tv_sec - _tv_start_.tv_sec) * 1000000 + (_tv_end_.tv_usec - _tv_start_.tv_usec); \
+    } while (0)
+#define LOGD_PERF(message) ALOGD("PERFORMANCE[%s] %llu usec.", message, _ellapse_);
+#else
+#define DEBUG_PERF_BEGIN
+#define DEBUG_PERF_END
+#define LOGD_PERF(message)
+#endif
+
+// XXX File load and write are thread-safe by g_mutex_
+static pthread_mutex_t g_mutex_ = PTHREAD_MUTEX_INITIALIZER;
+static struct timeval g_last_update_ = {0, 0};
+
+inline uint32 UserDict::get_dict_file_size(UserDictInfo *info) {
+    return (4 + info->lemma_size + (info->lemma_count << 3)
+#ifdef ___PREDICT_ENABLED___
+            + (info->lemma_count << 2)
+#endif
+#ifdef ___SYNC_ENABLED___
+            + (info->sync_count << 2)
+#endif
+            + sizeof(*info));
+}
+
+inline LmaScoreType UserDict::translate_score(int raw_score) {
+    // 1) ori_freq: original user frequency
+    uint32 ori_freq = extract_score_freq(raw_score);
+    // 2) lmt_off: lmt index (week offset for example)
+    uint64 lmt_off = ((raw_score & 0xffff0000) >> 16);
+    if (kUserDictLMTBitWidth < 16) {
+        uint64 mask = ~(1 << kUserDictLMTBitWidth);
+        lmt_off &= mask;
+    }
+    // 3) now_off: current time index (current week offset for example)
+    // assuming load_time_ is around current time
+    uint64 now_off = load_time_.tv_sec;
+    now_off = (now_off - kUserDictLMTSince) / kUserDictLMTGranularity;
+    now_off = (now_off << (64 - kUserDictLMTBitWidth));
+    now_off = (now_off >> (64 - kUserDictLMTBitWidth));
+    // 4) factor: decide expand-factor
+    int delta = now_off - lmt_off;
+    if (delta > 4) delta = 4;
+    int factor = 80 - (delta << 4);
+
+    double tf = (double)(dict_info_.total_nfreq + total_other_nfreq_);
+    return (LmaScoreType)(log((double)factor * (double)ori_freq / tf) * NGram::kLogValueAmplifier);
+}
+
+inline int UserDict::extract_score_freq(int raw_score) {
+    // Frequence stored in lowest 16 bits
+    int freq = (raw_score & 0x0000ffff);
+    return freq;
+}
+
+inline uint64 UserDict::extract_score_lmt(int raw_score) {
+    uint64 lmt = ((raw_score & 0xffff0000) >> 16);
+    if (kUserDictLMTBitWidth < 16) {
+        uint64 mask = ~(1 << kUserDictLMTBitWidth);
+        lmt &= mask;
+    }
+    lmt = lmt * kUserDictLMTGranularity + kUserDictLMTSince;
+    return lmt;
+}
+
+inline int UserDict::build_score(uint64 lmt, int freq) {
+    lmt = (lmt - kUserDictLMTSince) / kUserDictLMTGranularity;
+    lmt = (lmt << (64 - kUserDictLMTBitWidth));
+    lmt = (lmt >> (64 - kUserDictLMTBitWidth));
+    uint16 lmt16 = (uint16)lmt;
+    int s = freq;
+    s &= 0x0000ffff;
+    s = (lmt16 << 16) | s;
+    return s;
+}
+
+inline int64 UserDict::utf16le_atoll(uint16 *s, int len) {
+    int64 ret = 0;
+    if (len <= 0) return ret;
+
+    int flag = 1;
+    const uint16 *endp = s + len;
+    if (*s == '-') {
+        flag = -1;
+        s++;
+    } else if (*s == '+') {
+        s++;
+    }
+
+    while (*s >= '0' && *s <= '9' && s < endp) {
+        ret += ret * 10 + (*s) - '0';
+        s++;
+    }
+    return ret * flag;
+}
+
+inline int UserDict::utf16le_lltoa(int64 v, uint16 *s, int size) {
+    if (!s || size <= 0) return 0;
+    uint16 *endp = s + size;
+    int ret_len = 0;
+    if (v < 0) {
+        *(s++) = '-';
+        ++ret_len;
+        v *= -1;
+    }
+
+    uint16 *b = s;
+    while (s < endp && v != 0) {
+        *(s++) = '0' + (v % 10);
+        v = v / 10;
+        ++ret_len;
+    }
+
+    if (v != 0) return 0;
+
+    --s;
+
+    while (b < s) {
+        *b = *s;
+        ++b, --s;
+    }
+
+    return ret_len;
+}
+
+inline void UserDict::set_lemma_flag(uint32 offset, uint8 flag) {
+    offset &= kUserDictOffsetMask;
+    lemmas_[offset] |= flag;
+}
+
+inline char UserDict::get_lemma_flag(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    return (char)(lemmas_[offset]);
+}
+
+inline char UserDict::get_lemma_nchar(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    return (char)(lemmas_[offset + 1]);
+}
+
+inline uint16 *UserDict::get_lemma_spell_ids(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    return (uint16 *)(lemmas_ + offset + 2);
+}
+
+inline uint16 *UserDict::get_lemma_word(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    uint8 nchar = get_lemma_nchar(offset);
+    return (uint16 *)(lemmas_ + offset + 2 + (nchar << 1));
+}
+
+inline LemmaIdType UserDict::get_max_lemma_id() {
+    // When a lemma is deleted, we don't not claim its id back for
+    // simplicity and performance
+    return start_id_ + dict_info_.lemma_count - 1;
+}
+
+inline bool UserDict::is_valid_lemma_id(LemmaIdType id) {
+    if (id >= start_id_ && id <= get_max_lemma_id()) return true;
+    return false;
+}
+
+inline bool UserDict::is_valid_state() {
+    if (state_ == USER_DICT_NONE) return false;
+    return true;
+}
+
+UserDict::UserDict()
+    : start_id_(0),
+      version_(0),
+      lemmas_(NULL),
+      offsets_(NULL),
+      scores_(NULL),
+      ids_(NULL),
+#ifdef ___PREDICT_ENABLED___
+      predicts_(NULL),
+#endif
+#ifdef ___SYNC_ENABLED___
+      syncs_(NULL),
+      sync_count_size_(0),
+#endif
+      offsets_by_id_(NULL),
+      lemma_count_left_(0),
+      lemma_size_left_(0),
+      dict_file_(NULL),
+      state_(USER_DICT_NONE) {
+    memset(&dict_info_, 0, sizeof(dict_info_));
+    memset(&load_time_, 0, sizeof(load_time_));
+#ifdef ___CACHE_ENABLED___
+    cache_init();
+#endif
+}
+
+UserDict::~UserDict() { close_dict(); }
+
+bool UserDict::load_dict(const char *file_name, LemmaIdType start_id, LemmaIdType end_id) {
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    dict_file_ = strdup(file_name);
+    if (!dict_file_) return false;
+
+    start_id_ = start_id;
+
+    if (false == validate(file_name) && false == reset(file_name)) {
+        goto error;
+    }
+    if (false == load(file_name, start_id)) {
+        goto error;
+    }
+
+    state_ = USER_DICT_SYNC;
+
+    gettimeofday(&load_time_, NULL);
+
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_END;
+    LOGD_PERF("load_dict");
+#endif
+    return true;
+error:
+    free((void *)dict_file_);
+    start_id_ = 0;
+    return false;
+}
+
+bool UserDict::close_dict() {
+    if (state_ == USER_DICT_NONE) return true;
+    if (state_ == USER_DICT_SYNC) goto out;
+
+    // If dictionary is written back by others,
+    // we can not simply write back here
+    // To do a safe flush, we have to discard all newly added
+    // lemmas and try to reload dict file.
+    pthread_mutex_lock(&g_mutex_);
+    if (load_time_.tv_sec > g_last_update_.tv_sec || (load_time_.tv_sec == g_last_update_.tv_sec && load_time_.tv_usec > g_last_update_.tv_usec)) {
+        write_back();
+        gettimeofday(&g_last_update_, NULL);
+    }
+    pthread_mutex_unlock(&g_mutex_);
+
+out:
+    free((void *)dict_file_);
+    free(lemmas_);
+    free(offsets_);
+    free(offsets_by_id_);
+    free(scores_);
+    free(ids_);
+#ifdef ___PREDICT_ENABLED___
+    free(predicts_);
+#endif
+
+    version_ = 0;
+    dict_file_ = NULL;
+    lemmas_ = NULL;
+#ifdef ___SYNC_ENABLED___
+    syncs_ = NULL;
+    sync_count_size_ = 0;
+#endif
+    offsets_ = NULL;
+    offsets_by_id_ = NULL;
+    scores_ = NULL;
+    ids_ = NULL;
+#ifdef ___PREDICT_ENABLED___
+    predicts_ = NULL;
+#endif
+
+    memset(&dict_info_, 0, sizeof(dict_info_));
+    lemma_count_left_ = 0;
+    lemma_size_left_ = 0;
+    state_ = USER_DICT_NONE;
+
+    return true;
+}
+
+size_t UserDict::number_of_lemmas() { return dict_info_.lemma_count; }
+
+void UserDict::reset_milestones(uint16 from_step, MileStoneHandle from_handle) { return; }
+
+MileStoneHandle UserDict::extend_dict(MileStoneHandle from_handle, const DictExtPara *dep, LmaPsbItem *lpi_items, size_t lpi_max, size_t *lpi_num) {
+    if (is_valid_state() == false) return 0;
+
+    bool need_extend = false;
+
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    *lpi_num = _get_lpis(dep->splids, dep->splids_extended + 1, lpi_items, lpi_max, &need_extend);
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_END;
+    LOGD_PERF("extend_dict");
+#endif
+    return ((*lpi_num > 0 || need_extend) ? 1 : 0);
+}
+
+int UserDict::is_fuzzy_prefix_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable) {
+    if (len1 < searchable->splids_len) return 0;
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    uint32 i = 0;
+    for (i = 0; i < searchable->splids_len; i++) {
+        const char py1 = *spl_trie.get_spelling_str(id1[i]);
+        uint16 off = 8 * (i % 4);
+        const char py2 = ((searchable->signature[i / 4] & (0xff << off)) >> off);
+        if (py1 == py2) continue;
+        return 0;
+    }
+    return 1;
+}
+
+int UserDict::fuzzy_compare_spell_id(const uint16 *id1, uint16 len1, const UserDictSearchable *searchable) {
+    if (len1 < searchable->splids_len) return -1;
+    if (len1 > searchable->splids_len) return 1;
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    uint32 i = 0;
+    for (i = 0; i < len1; i++) {
+        const char py1 = *spl_trie.get_spelling_str(id1[i]);
+        uint16 off = 8 * (i % 4);
+        const char py2 = ((searchable->signature[i / 4] & (0xff << off)) >> off);
+        if (py1 == py2) continue;
+        if (py1 > py2) return 1;
+        return -1;
+    }
+    return 0;
+}
+
+bool UserDict::is_prefix_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable) {
+    if (fulllen < searchable->splids_len) return false;
+
+    uint32 i = 0;
+    for (; i < searchable->splids_len; i++) {
+        uint16 start_id = searchable->splid_start[i];
+        uint16 count = searchable->splid_count[i];
+        if (fullids[i] >= start_id && fullids[i] < start_id + count)
+            continue;
+        else
+            return false;
+    }
+    return true;
+}
+
+bool UserDict::equal_spell_id(const uint16 *fullids, uint16 fulllen, const UserDictSearchable *searchable) {
+    if (fulllen != searchable->splids_len) return false;
+
+    uint32 i = 0;
+    for (; i < fulllen; i++) {
+        uint16 start_id = searchable->splid_start[i];
+        uint16 count = searchable->splid_count[i];
+        if (fullids[i] >= start_id && fullids[i] < start_id + count)
+            continue;
+        else
+            return false;
+    }
+    return true;
+}
+
+int32 UserDict::locate_first_in_offsets(const UserDictSearchable *searchable) {
+    int32 begin = 0;
+    int32 end = dict_info_.lemma_count - 1;
+    int32 middle = -1;
+
+    int32 first_prefix = middle;
+    int32 last_matched = middle;
+
+    while (begin <= end) {
+        middle = (begin + end) >> 1;
+        uint32 offset = offsets_[middle];
+        uint8 nchar = get_lemma_nchar(offset);
+        const uint16 *splids = get_lemma_spell_ids(offset);
+        int cmp = fuzzy_compare_spell_id(splids, nchar, searchable);
+        int pre = is_fuzzy_prefix_spell_id(splids, nchar, searchable);
+
+        if (pre) first_prefix = middle;
+
+        if (cmp < 0) {
+            begin = middle + 1;
+        } else if (cmp > 0) {
+            end = middle - 1;
+        } else {
+            end = middle - 1;
+            last_matched = middle;
+        }
+    }
+
+    return first_prefix;
+}
+
+void UserDict::prepare_locate(UserDictSearchable *searchable, const uint16 *splid_str, uint16 splid_str_len) {
+    searchable->splids_len = splid_str_len;
+    memset(searchable->signature, 0, sizeof(searchable->signature));
+
+    SpellingTrie &spl_trie = SpellingTrie::get_instance();
+    uint32 i = 0;
+    for (; i < splid_str_len; i++) {
+        if (spl_trie.is_half_id(splid_str[i])) {
+            searchable->splid_count[i] = spl_trie.half_to_full(splid_str[i], &(searchable->splid_start[i]));
+        } else {
+            searchable->splid_count[i] = 1;
+            searchable->splid_start[i] = splid_str[i];
+        }
+        const unsigned char py = *spl_trie.get_spelling_str(splid_str[i]);
+        searchable->signature[i >> 2] |= (py << (8 * (i % 4)));
+    }
+}
+
+size_t UserDict::get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max) { return _get_lpis(splid_str, splid_str_len, lpi_items, lpi_max, NULL); }
+
+size_t UserDict::_get_lpis(const uint16 *splid_str, uint16 splid_str_len, LmaPsbItem *lpi_items, size_t lpi_max, bool *need_extend) {
+    bool tmp_extend;
+    if (!need_extend) need_extend = &tmp_extend;
+
+    *need_extend = false;
+
+    if (is_valid_state() == false) return 0;
+    if (lpi_max <= 0) return 0;
+
+    if (0 == pthread_mutex_trylock(&g_mutex_)) {
+        if (load_time_.tv_sec < g_last_update_.tv_sec || (load_time_.tv_sec == g_last_update_.tv_sec && load_time_.tv_usec < g_last_update_.tv_usec)) {
+            // Others updated disk file, have to reload
+            pthread_mutex_unlock(&g_mutex_);
+            flush_cache();
+        } else {
+            pthread_mutex_unlock(&g_mutex_);
+        }
+    } else {
+    }
+
+    UserDictSearchable searchable;
+    prepare_locate(&searchable, splid_str, splid_str_len);
+
+    uint32 max_off = dict_info_.lemma_count;
+#ifdef ___CACHE_ENABLED___
+    int32 middle;
+    uint32 start, count;
+    bool cached = cache_hit(&searchable, &start, &count);
+    if (cached) {
+        middle = start;
+        max_off = start + count;
+    } else {
+        middle = locate_first_in_offsets(&searchable);
+        start = middle;
+    }
+#else
+    int32 middle = locate_first_in_offsets(&searchable);
+#endif
+
+    if (middle == -1) {
+#ifdef ___CACHE_ENABLED___
+        if (!cached) cache_push(USER_DICT_MISS_CACHE, &searchable, 0, 0);
+#endif
+        return 0;
+    }
+
+    size_t lpi_current = 0;
+
+    bool fuzzy_break = false;
+    bool prefix_break = false;
+    while ((size_t)middle < max_off && !fuzzy_break && !prefix_break) {
+        if (lpi_current >= lpi_max) break;
+        uint32 offset = offsets_[middle];
+        // Ignore deleted lemmas
+        if (offset & kUserDictOffsetFlagRemove) {
+            middle++;
+            continue;
+        }
+        uint8 nchar = get_lemma_nchar(offset);
+        uint16 *splids = get_lemma_spell_ids(offset);
+#ifdef ___CACHE_ENABLED___
+        if (!cached && 0 != fuzzy_compare_spell_id(splids, nchar, &searchable)) {
+#else
+        if (0 != fuzzy_compare_spell_id(splids, nchar, &searchable)) {
+#endif
+            fuzzy_break = true;
+        }
+
+        if (prefix_break == false) {
+            if (is_fuzzy_prefix_spell_id(splids, nchar, &searchable)) {
+                if (*need_extend == false && is_prefix_spell_id(splids, nchar, &searchable)) {
+                    *need_extend = true;
+                }
+            } else {
+                prefix_break = true;
+            }
+        }
+
+        if (equal_spell_id(splids, nchar, &searchable) == true) {
+            lpi_items[lpi_current].psb = translate_score(scores_[middle]);
+            lpi_items[lpi_current].id = ids_[middle];
+            lpi_items[lpi_current].lma_len = nchar;
+            lpi_current++;
+        }
+        middle++;
+    }
+
+#ifdef ___CACHE_ENABLED___
+    if (!cached) {
+        count = middle - start;
+        cache_push(USER_DICT_CACHE, &searchable, start, count);
+    }
+#endif
+
+    return lpi_current;
+}
+
+uint16 UserDict::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max) {
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(id_lemma) == false) return 0;
+    uint32 offset = offsets_by_id_[id_lemma - start_id_];
+    uint8 nchar = get_lemma_nchar(offset);
+    char16 *str = get_lemma_word(offset);
+    uint16 m = nchar < str_max - 1 ? nchar : str_max - 1;
+    int i = 0;
+    for (; i < m; i++) {
+        str_buf[i] = str[i];
+    }
+    str_buf[i] = 0;
+    return m;
+}
+
+uint16 UserDict::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, uint16 splids_max, bool arg_valid) {
+    if (is_valid_lemma_id(id_lemma) == false) return 0;
+    uint32 offset = offsets_by_id_[id_lemma - start_id_];
+    uint8 nchar = get_lemma_nchar(offset);
+    const uint16 *ids = get_lemma_spell_ids(offset);
+    int i = 0;
+    for (; i < nchar && i < splids_max; i++) splids[i] = ids[i];
+    return i;
+}
+
+size_t UserDict::predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used) {
+    uint32 new_added = 0;
+#ifdef ___PREDICT_ENABLED___
+    int32 end = dict_info_.lemma_count - 1;
+    int j = locate_first_in_predicts((const uint16 *)last_hzs, hzs_len);
+    if (j == -1) return 0;
+
+    while (j <= end) {
+        uint32 offset = predicts_[j];
+        // Ignore deleted lemmas
+        if (offset & kUserDictOffsetFlagRemove) {
+            j++;
+            continue;
+        }
+        uint32 nchar = get_lemma_nchar(offset);
+        uint16 *words = get_lemma_word(offset);
+        uint16 *splids = get_lemma_spell_ids(offset);
+
+        if (nchar <= hzs_len) {
+            j++;
+            continue;
+        }
+
+        if (memcmp(words, last_hzs, hzs_len << 1) == 0) {
+            if (new_added >= npre_max) {
+                return new_added;
+            }
+            uint32 cpy_len = (nchar < kMaxPredictSize ? (nchar << 1) : (kMaxPredictSize << 1)) - (hzs_len << 1);
+            npre_items[new_added].his_len = hzs_len;
+            npre_items[new_added].psb = get_lemma_score(words, splids, nchar);
+            memcpy(npre_items[new_added].pre_hzs, words + hzs_len, cpy_len);
+            if ((cpy_len >> 1) < kMaxPredictSize) {
+                npre_items[new_added].pre_hzs[cpy_len >> 1] = 0;
+            }
+            new_added++;
+        } else {
+            break;
+        }
+
+        j++;
+    }
+#endif
+    return new_added;
+}
+
+int32 UserDict::locate_in_offsets(char16 lemma_str[], uint16 splid_str[], uint16 lemma_len) {
+    int32 max_off = dict_info_.lemma_count;
+
+    UserDictSearchable searchable;
+    prepare_locate(&searchable, splid_str, lemma_len);
+#ifdef ___CACHE_ENABLED___
+    int32 off;
+    uint32 start, count;
+    bool cached = load_cache(&searchable, &start, &count);
+    if (cached) {
+        off = start;
+        max_off = start + count;
+    } else {
+        off = locate_first_in_offsets(&searchable);
+        start = off;
+    }
+#else
+    int32 off = locate_first_in_offsets(&searchable);
+#endif
+
+    if (off == -1) {
+        return off;
+    }
+
+    while (off < max_off) {
+        uint32 offset = offsets_[off];
+        if (offset & kUserDictOffsetFlagRemove) {
+            off++;
+            continue;
+        }
+        uint16 *splids = get_lemma_spell_ids(offset);
+#ifdef ___CACHE_ENABLED___
+        if (!cached && 0 != fuzzy_compare_spell_id(splids, lemma_len, &searchable)) break;
+#else
+        if (0 != fuzzy_compare_spell_id(splids, lemma_len, &searchable)) break;
+#endif
+        if (equal_spell_id(splids, lemma_len, &searchable) == true) {
+            uint16 *str = get_lemma_word(offset);
+            uint32 i = 0;
+            for (i = 0; i < lemma_len; i++) {
+                if (str[i] == lemma_str[i]) continue;
+                break;
+            }
+            if (i < lemma_len) {
+                off++;
+                continue;
+            }
+#ifdef ___CACHE_ENABLED___
+            // No need to save_cache here, since current function is invoked by
+            // put_lemma. It's rarely possible for a user input same lemma twice.
+            // That means first time user type a new lemma, it is newly added into
+            // user dictionary, then it's possible that user type the same lemma
+            // again.
+            // Another reason save_cache can not be invoked here is this function
+            // aborts when lemma is found, and it never knows the count.
+#endif
+            return off;
+        }
+        off++;
+    }
+
+    return -1;
+}
+
+#ifdef ___PREDICT_ENABLED___
+uint32 UserDict::locate_where_to_insert_in_predicts(const uint16 *words, int lemma_len) {
+    int32 begin = 0;
+    int32 end = dict_info_.lemma_count - 1;
+    int32 middle = end;
+
+    uint32 last_matched = middle;
+
+    while (begin <= end) {
+        middle = (begin + end) >> 1;
+        uint32 offset = offsets_[middle];
+        uint8 nchar = get_lemma_nchar(offset);
+        const uint16 *ws = get_lemma_word(offset);
+
+        uint32 minl = nchar < lemma_len ? nchar : lemma_len;
+        uint32 k = 0;
+        int cmp = 0;
+
+        for (; k < minl; k++) {
+            if (ws[k] < words[k]) {
+                cmp = -1;
+                break;
+            } else if (ws[k] > words[k]) {
+                cmp = 1;
+                break;
+            }
+        }
+        if (cmp == 0) {
+            if (nchar < lemma_len)
+                cmp = -1;
+            else if (nchar > lemma_len)
+                cmp = 1;
+        }
+
+        if (cmp < 0) {
+            begin = middle + 1;
+            last_matched = middle;
+        } else if (cmp > 0) {
+            end = middle - 1;
+        } else {
+            end = middle - 1;
+            last_matched = middle;
+        }
+    }
+
+    return last_matched;
+}
+
+int32 UserDict::locate_first_in_predicts(const uint16 *words, int lemma_len) {
+    int32 begin = 0;
+    int32 end = dict_info_.lemma_count - 1;
+    int32 middle = -1;
+
+    int32 last_matched = middle;
+
+    while (begin <= end) {
+        middle = (begin + end) >> 1;
+        uint32 offset = offsets_[middle];
+        uint8 nchar = get_lemma_nchar(offset);
+        const uint16 *ws = get_lemma_word(offset);
+
+        uint32 minl = nchar < lemma_len ? nchar : lemma_len;
+        uint32 k = 0;
+        int cmp = 0;
+
+        for (; k < minl; k++) {
+            if (ws[k] < words[k]) {
+                cmp = -1;
+                break;
+            } else if (ws[k] > words[k]) {
+                cmp = 1;
+                break;
+            }
+        }
+        if (cmp == 0) {
+            if (nchar >= lemma_len) last_matched = middle;
+            if (nchar < lemma_len)
+                cmp = -1;
+            else if (nchar > lemma_len)
+                cmp = 1;
+        }
+
+        if (cmp < 0) {
+            begin = middle + 1;
+        } else if (cmp > 0) {
+            end = middle - 1;
+        } else {
+            end = middle - 1;
+        }
+    }
+
+    return last_matched;
+}
+
+#endif
+
+LemmaIdType UserDict::get_lemma_id(char16 lemma_str[], uint16 splids[], uint16 lemma_len) {
+    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
+    if (off == -1) {
+        return 0;
+    }
+
+    return ids_[off];
+}
+
+LmaScoreType UserDict::get_lemma_score(LemmaIdType lemma_id) {
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(lemma_id) == false) return 0;
+
+    return translate_score(_get_lemma_score(lemma_id));
+}
+
+LmaScoreType UserDict::get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len) {
+    if (is_valid_state() == false) return 0;
+    return translate_score(_get_lemma_score(lemma_str, splids, lemma_len));
+}
+
+int UserDict::_get_lemma_score(LemmaIdType lemma_id) {
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(lemma_id) == false) return 0;
+
+    uint32 offset = offsets_by_id_[lemma_id - start_id_];
+
+    uint32 nchar = get_lemma_nchar(offset);
+    uint16 *spl = get_lemma_spell_ids(offset);
+    uint16 *wrd = get_lemma_word(offset);
+
+    int32 off = locate_in_offsets(wrd, spl, nchar);
+    if (off == -1) {
+        return 0;
+    }
+
+    return scores_[off];
+}
+
+int UserDict::_get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len) {
+    if (is_valid_state() == false) return 0;
+
+    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
+    if (off == -1) {
+        return 0;
+    }
+
+    return scores_[off];
+}
+
+#ifdef ___SYNC_ENABLED___
+void UserDict::remove_lemma_from_sync_list(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    uint32 i = 0;
+    for (; i < dict_info_.sync_count; i++) {
+        unsigned int off = (syncs_[i] & kUserDictOffsetMask);
+        if (off == offset) break;
+    }
+    if (i < dict_info_.sync_count) {
+        syncs_[i] = syncs_[dict_info_.sync_count - 1];
+        dict_info_.sync_count--;
+    }
+}
+#endif
+
+#ifdef ___PREDICT_ENABLED___
+void UserDict::remove_lemma_from_predict_list(uint32 offset) {
+    offset &= kUserDictOffsetMask;
+    uint32 i = 0;
+    for (; i < dict_info_.lemma_count; i++) {
+        unsigned int off = (predicts_[i] & kUserDictOffsetMask);
+        if (off == offset) {
+            predicts_[i] |= kUserDictOffsetFlagRemove;
+            break;
+        }
+    }
+}
+#endif
+
+bool UserDict::remove_lemma_by_offset_index(int offset_index) {
+    if (is_valid_state() == false) return 0;
+
+    int32 off = offset_index;
+    if (off == -1) {
+        return false;
+    }
+
+    uint32 offset = offsets_[off];
+    uint32 nchar = get_lemma_nchar(offset);
+
+    offsets_[off] |= kUserDictOffsetFlagRemove;
+
+#ifdef ___SYNC_ENABLED___
+    // Remove corresponding sync item
+    remove_lemma_from_sync_list(offset);
+#endif
+
+#ifdef ___PREDICT_ENABLED___
+    remove_lemma_from_predict_list(offset);
+#endif
+    dict_info_.free_count++;
+    dict_info_.free_size += (2 + (nchar << 2));
+
+    if (state_ < USER_DICT_OFFSET_DIRTY) state_ = USER_DICT_OFFSET_DIRTY;
+    return true;
+}
+
+bool UserDict::remove_lemma(LemmaIdType lemma_id) {
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(lemma_id) == false) return false;
+    uint32 offset = offsets_by_id_[lemma_id - start_id_];
+
+    uint32 nchar = get_lemma_nchar(offset);
+    uint16 *spl = get_lemma_spell_ids(offset);
+    uint16 *wrd = get_lemma_word(offset);
+
+    int32 off = locate_in_offsets(wrd, spl, nchar);
+
+    return remove_lemma_by_offset_index(off);
+}
+
+void UserDict::flush_cache() {
+    LemmaIdType start_id = start_id_;
+    const char *file = strdup(dict_file_);
+    if (!file) return;
+    close_dict();
+    load_dict(file, start_id, kUserDictIdEnd);
+    free((void *)file);
+#ifdef ___CACHE_ENABLED___
+    cache_init();
+#endif
+    return;
+}
+
+bool UserDict::reset(const char *file) {
+    FILE *fp = fopen(file, "w+");
+    if (!fp) {
+        return false;
+    }
+    uint32 version = kUserDictVersion;
+    size_t wred = fwrite(&version, 1, 4, fp);
+    UserDictInfo info;
+    memset(&info, 0, sizeof(info));
+    // By default, no limitation for lemma count and size
+    // thereby, reclaim_ratio is never used
+    wred += fwrite(&info, 1, sizeof(info), fp);
+    if (wred != sizeof(info) + sizeof(version)) {
+        fclose(fp);
+        unlink(file);
+        return false;
+    }
+    fclose(fp);
+    return true;
+}
+
+bool UserDict::validate(const char *file) {
+    // b is ignored in POSIX compatible os including Linux
+    // while b is important flag for Windows to specify binary mode
+    FILE *fp = fopen(file, "rb");
+    if (!fp) {
+        return false;
+    }
+
+    size_t size;
+    size_t readed;
+    uint32 version;
+    UserDictInfo dict_info;
+
+    // validate
+    int err = fseek(fp, 0, SEEK_END);
+    if (err) {
+        goto error;
+    }
+
+    size = ftell(fp);
+    if (size < 4 + sizeof(dict_info)) {
+        goto error;
+    }
+
+    err = fseek(fp, 0, SEEK_SET);
+    if (err) {
+        goto error;
+    }
+
+    readed = fread(&version, 1, sizeof(version), fp);
+    if (readed < sizeof(version)) {
+        goto error;
+    }
+    if (version != kUserDictVersion) {
+        goto error;
+    }
+
+    err = fseek(fp, -1 * sizeof(dict_info), SEEK_END);
+    if (err) {
+        goto error;
+    }
+
+    readed = fread(&dict_info, 1, sizeof(dict_info), fp);
+    if (readed != sizeof(dict_info)) {
+        goto error;
+    }
+
+    if (size != get_dict_file_size(&dict_info)) {
+        goto error;
+    }
+
+    fclose(fp);
+    return true;
+
+error:
+    fclose(fp);
+    return false;
+}
+
+bool UserDict::load(const char *file, LemmaIdType start_id) {
+    if (0 != pthread_mutex_trylock(&g_mutex_)) {
+        return false;
+    }
+    // b is ignored in POSIX compatible os including Linux
+    // while b is important flag for Windows to specify binary mode
+    FILE *fp = fopen(file, "rb");
+    if (!fp) {
+        pthread_mutex_unlock(&g_mutex_);
+        return false;
+    }
+
+    size_t readed, toread;
+    UserDictInfo dict_info;
+    uint8 *lemmas = NULL;
+    uint32 *offsets = NULL;
+#ifdef ___SYNC_ENABLED___
+    uint32 *syncs = NULL;
+#endif
+    uint32 *scores = NULL;
+    uint32 *ids = NULL;
+    uint32 *offsets_by_id = NULL;
+#ifdef ___PREDICT_ENABLED___
+    uint32 *predicts = NULL;
+#endif
+    size_t i;
+    int err;
+
+    err = fseek(fp, -1 * sizeof(dict_info), SEEK_END);
+    if (err) goto error;
+
+    readed = fread(&dict_info, 1, sizeof(dict_info), fp);
+    if (readed != sizeof(dict_info)) goto error;
+
+    lemmas = (uint8 *)malloc(dict_info.lemma_size + (kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2))));
+
+    if (!lemmas) goto error;
+
+    offsets = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!offsets) goto error;
+
+#ifdef ___PREDICT_ENABLED___
+    predicts = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!predicts) goto error;
+#endif
+
+#ifdef ___SYNC_ENABLED___
+    syncs = (uint32 *)malloc((dict_info.sync_count + kUserDictPreAlloc) << 2);
+    if (!syncs) goto error;
+#endif
+
+    scores = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!scores) goto error;
+
+    ids = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!ids) goto error;
+
+    offsets_by_id = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
+    if (!offsets_by_id) goto error;
+
+    err = fseek(fp, 4, SEEK_SET);
+    if (err) goto error;
+
+    readed = 0;
+    while (readed < dict_info.lemma_size && !ferror(fp) && !feof(fp)) {
+        readed += fread(lemmas + readed, 1, dict_info.lemma_size - readed, fp);
+    }
+    if (readed < dict_info.lemma_size) goto error;
+
+    toread = (dict_info.lemma_count << 2);
+    readed = 0;
+    while (readed < toread && !ferror(fp) && !feof(fp)) {
+        readed += fread((((uint8 *)offsets) + readed), 1, toread - readed, fp);
+    }
+    if (readed < toread) goto error;
+
+#ifdef ___PREDICT_ENABLED___
+    toread = (dict_info.lemma_count << 2);
+    readed = 0;
+    while (readed < toread && !ferror(fp) && !feof(fp)) {
+        readed += fread((((uint8 *)predicts) + readed), 1, toread - readed, fp);
+    }
+    if (readed < toread) goto error;
+#endif
+
+    readed = 0;
+    while (readed < toread && !ferror(fp) && !feof(fp)) {
+        readed += fread((((uint8 *)scores) + readed), 1, toread - readed, fp);
+    }
+    if (readed < toread) goto error;
+
+#ifdef ___SYNC_ENABLED___
+    toread = (dict_info.sync_count << 2);
+    readed = 0;
+    while (readed < toread && !ferror(fp) && !feof(fp)) {
+        readed += fread((((uint8 *)syncs) + readed), 1, toread - readed, fp);
+    }
+    if (readed < toread) goto error;
+#endif
+
+    for (i = 0; i < dict_info.lemma_count; i++) {
+        ids[i] = start_id + i;
+        offsets_by_id[i] = offsets[i];
+    }
+
+    lemmas_ = lemmas;
+    offsets_ = offsets;
+#ifdef ___SYNC_ENABLED___
+    syncs_ = syncs;
+    sync_count_size_ = dict_info.sync_count + kUserDictPreAlloc;
+#endif
+    offsets_by_id_ = offsets_by_id;
+    scores_ = scores;
+    ids_ = ids;
+#ifdef ___PREDICT_ENABLED___
+    predicts_ = predicts;
+#endif
+    lemma_count_left_ = kUserDictPreAlloc;
+    lemma_size_left_ = kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2));
+    memcpy(&dict_info_, &dict_info, sizeof(dict_info));
+    state_ = USER_DICT_SYNC;
+
+    fclose(fp);
+
+    pthread_mutex_unlock(&g_mutex_);
+    return true;
+
+error:
+    if (lemmas) free(lemmas);
+    if (offsets) free(offsets);
+#ifdef ___SYNC_ENABLED___
+    if (syncs) free(syncs);
+#endif
+    if (scores) free(scores);
+    if (ids) free(ids);
+    if (offsets_by_id) free(offsets_by_id);
+#ifdef ___PREDICT_ENABLED___
+    if (predicts) free(predicts);
+#endif
+    fclose(fp);
+    pthread_mutex_unlock(&g_mutex_);
+    return false;
+}
+
+void UserDict::write_back() {
+    // XXX write back is only allowed from close_dict due to thread-safe sake
+    if (state_ == USER_DICT_NONE || state_ == USER_DICT_SYNC) return;
+    int fd = open(dict_file_, O_WRONLY);
+    if (fd == -1) return;
+    switch (state_) {
+        case USER_DICT_DEFRAGMENTED:
+            write_back_all(fd);
+            break;
+        case USER_DICT_LEMMA_DIRTY:
+            write_back_lemma(fd);
+            break;
+        case USER_DICT_OFFSET_DIRTY:
+            write_back_offset(fd);
+            break;
+        case USER_DICT_SCORE_DIRTY:
+            write_back_score(fd);
+            break;
+#ifdef ___SYNC_ENABLED___
+        case USER_DICT_SYNC_DIRTY:
+            write_back_sync(fd);
+            break;
+#endif
+        default:
+            break;
+    }
+    // It seems truncate is not need on Linux, Windows except Mac
+    // I am doing it here anyway for safety.
+    off_t cur = lseek(fd, 0, SEEK_CUR);
+    ftruncate(fd, cur);
+    close(fd);
+    state_ = USER_DICT_SYNC;
+}
+
+#ifdef ___SYNC_ENABLED___
+void UserDict::write_back_sync(int fd) {
+    int err = lseek(fd,
+                    4 + dict_info_.lemma_size + (dict_info_.lemma_count << 3)
+#ifdef ___PREDICT_ENABLED___
+                        + (dict_info_.lemma_count << 2)
+#endif
+                        ,
+                    SEEK_SET);
+    if (err == -1) return;
+    write(fd, syncs_, dict_info_.sync_count << 2);
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+#endif
+
+void UserDict::write_back_offset(int fd) {
+    int err = lseek(fd, 4 + dict_info_.lemma_size, SEEK_SET);
+    if (err == -1) return;
+    write(fd, offsets_, dict_info_.lemma_count << 2);
+#ifdef ___PREDICT_ENABLED___
+    write(fd, predicts_, dict_info_.lemma_count << 2);
+#endif
+    write(fd, scores_, dict_info_.lemma_count << 2);
+#ifdef ___SYNC_ENABLED___
+    write(fd, syncs_, dict_info_.sync_count << 2);
+#endif
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+
+void UserDict::write_back_score(int fd) {
+    int err = lseek(fd,
+                    4 + dict_info_.lemma_size + (dict_info_.lemma_count << 2)
+#ifdef ___PREDICT_ENABLED___
+                        + (dict_info_.lemma_count << 2)
+#endif
+                        ,
+                    SEEK_SET);
+    if (err == -1) return;
+    write(fd, scores_, dict_info_.lemma_count << 2);
+#ifdef ___SYNC_ENABLED___
+    write(fd, syncs_, dict_info_.sync_count << 2);
+#endif
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+
+void UserDict::write_back_lemma(int fd) {
+    int err = lseek(fd, 4, SEEK_SET);
+    if (err == -1) return;
+    // New lemmas are always appended, no need to write whole lemma block
+    size_t need_write = kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2)) - lemma_size_left_;
+    err = lseek(fd, dict_info_.lemma_size - need_write, SEEK_CUR);
+    if (err == -1) return;
+    write(fd, lemmas_ + dict_info_.lemma_size - need_write, need_write);
+
+    write(fd, offsets_, dict_info_.lemma_count << 2);
+#ifdef ___PREDICT_ENABLED___
+    write(fd, predicts_, dict_info_.lemma_count << 2);
+#endif
+    write(fd, scores_, dict_info_.lemma_count << 2);
+#ifdef ___SYNC_ENABLED___
+    write(fd, syncs_, dict_info_.sync_count << 2);
+#endif
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+
+void UserDict::write_back_all(int fd) {
+    // XXX lemma_size is handled differently in writeall
+    // and writelemma. I update lemma_size and lemma_count in different
+    // places for these two cases. Should fix it to make it consistent.
+    int err = lseek(fd, 4, SEEK_SET);
+    if (err == -1) return;
+    write(fd, lemmas_, dict_info_.lemma_size);
+    write(fd, offsets_, dict_info_.lemma_count << 2);
+#ifdef ___PREDICT_ENABLED___
+    write(fd, predicts_, dict_info_.lemma_count << 2);
+#endif
+    write(fd, scores_, dict_info_.lemma_count << 2);
+#ifdef ___SYNC_ENABLED___
+    write(fd, syncs_, dict_info_.sync_count << 2);
+#endif
+    write(fd, &dict_info_, sizeof(dict_info_));
+}
+
+#ifdef ___CACHE_ENABLED___
+bool UserDict::load_cache(UserDictSearchable *searchable, uint32 *offset, uint32 *length) {
+    UserDictCache *cache = &caches_[searchable->splids_len - 1];
+    if (cache->head == cache->tail) return false;
+
+    uint16 j, sig_len = kMaxLemmaSize / 4;
+    uint16 i = cache->head;
+    while (1) {
+        j = 0;
+        for (; j < sig_len; j++) {
+            if (cache->signatures[i][j] != searchable->signature[j]) break;
+        }
+        if (j < sig_len) {
+            i++;
+            if (i >= kUserDictCacheSize) i -= kUserDictCacheSize;
+            if (i == cache->tail) break;
+            continue;
+        }
+        *offset = cache->offsets[i];
+        *length = cache->lengths[i];
+        return true;
+    }
+    return false;
+}
+
+void UserDict::save_cache(UserDictSearchable *searchable, uint32 offset, uint32 length) {
+    UserDictCache *cache = &caches_[searchable->splids_len - 1];
+    uint16 next = cache->tail;
+
+    cache->offsets[next] = offset;
+    cache->lengths[next] = length;
+    uint16 sig_len = kMaxLemmaSize / 4;
+    uint16 j = 0;
+    for (; j < sig_len; j++) {
+        cache->signatures[next][j] = searchable->signature[j];
+    }
+
+    if (++next >= kUserDictCacheSize) {
+        next -= kUserDictCacheSize;
+    }
+    if (next == cache->head) {
+        cache->head++;
+        if (cache->head >= kUserDictCacheSize) {
+            cache->head -= kUserDictCacheSize;
+        }
+    }
+    cache->tail = next;
+}
+
+void UserDict::reset_cache() { memset(caches_, 0, sizeof(caches_)); }
+
+bool UserDict::load_miss_cache(UserDictSearchable *searchable) {
+    UserDictMissCache *cache = &miss_caches_[searchable->splids_len - 1];
+    if (cache->head == cache->tail) return false;
+
+    uint16 j, sig_len = kMaxLemmaSize / 4;
+    uint16 i = cache->head;
+    while (1) {
+        j = 0;
+        for (; j < sig_len; j++) {
+            if (cache->signatures[i][j] != searchable->signature[j]) break;
+        }
+        if (j < sig_len) {
+            i++;
+            if (i >= kUserDictMissCacheSize) i -= kUserDictMissCacheSize;
+            if (i == cache->tail) break;
+            continue;
+        }
+        return true;
+    }
+    return false;
+}
+
+void UserDict::save_miss_cache(UserDictSearchable *searchable) {
+    UserDictMissCache *cache = &miss_caches_[searchable->splids_len - 1];
+    uint16 next = cache->tail;
+
+    uint16 sig_len = kMaxLemmaSize / 4;
+    uint16 j = 0;
+    for (; j < sig_len; j++) {
+        cache->signatures[next][j] = searchable->signature[j];
+    }
+
+    if (++next >= kUserDictMissCacheSize) {
+        next -= kUserDictMissCacheSize;
+    }
+    if (next == cache->head) {
+        cache->head++;
+        if (cache->head >= kUserDictMissCacheSize) {
+            cache->head -= kUserDictMissCacheSize;
+        }
+    }
+    cache->tail = next;
+}
+
+void UserDict::reset_miss_cache() { memset(miss_caches_, 0, sizeof(miss_caches_)); }
+
+void UserDict::cache_init() {
+    reset_cache();
+    reset_miss_cache();
+}
+
+bool UserDict::cache_hit(UserDictSearchable *searchable, uint32 *offset, uint32 *length) {
+    bool hit = load_miss_cache(searchable);
+    if (hit) {
+        *offset = 0;
+        *length = 0;
+        return true;
+    }
+    hit = load_cache(searchable, offset, length);
+    if (hit) {
+        return true;
+    }
+    return false;
+}
+
+void UserDict::cache_push(UserDictCacheType type, UserDictSearchable *searchable, uint32 offset, uint32 length) {
+    switch (type) {
+        case USER_DICT_MISS_CACHE:
+            save_miss_cache(searchable);
+            break;
+        case USER_DICT_CACHE:
+            save_cache(searchable, offset, length);
+            break;
+        default:
+            break;
+    }
+}
+
+#endif
+
+void UserDict::defragment(void) {
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    if (is_valid_state() == false) return;
+    // Fixup offsets_, set REMOVE flag to lemma's flag if needed
+    size_t first_freed = 0;
+    size_t first_inuse = 0;
+    while (first_freed < dict_info_.lemma_count) {
+        // Find first freed offset
+        while ((offsets_[first_freed] & kUserDictOffsetFlagRemove) == 0 && first_freed < dict_info_.lemma_count) {
+            first_freed++;
+        }
+        if (first_freed < dict_info_.lemma_count) {
+            // Save REMOVE flag to lemma flag
+            int off = offsets_[first_freed];
+            set_lemma_flag(off, kUserDictLemmaFlagRemove);
+        } else {
+            break;
+        }
+        // Find first inuse offse after first_freed
+        first_inuse = first_freed + 1;
+        while ((offsets_[first_inuse] & kUserDictOffsetFlagRemove) && (first_inuse < dict_info_.lemma_count)) {
+            // Save REMOVE flag to lemma flag
+            int off = offsets_[first_inuse];
+            set_lemma_flag(off, kUserDictLemmaFlagRemove);
+            first_inuse++;
+        }
+        if (first_inuse >= dict_info_.lemma_count) {
+            break;
+        }
+        // Swap offsets_
+        int tmp = offsets_[first_inuse];
+        offsets_[first_inuse] = offsets_[first_freed];
+        offsets_[first_freed] = tmp;
+        // Move scores_, no need to swap
+        tmp = scores_[first_inuse];
+        scores_[first_inuse] = scores_[first_freed];
+        scores_[first_freed] = tmp;
+        // Swap ids_
+        LemmaIdType tmpid = ids_[first_inuse];
+        ids_[first_inuse] = ids_[first_freed];
+        ids_[first_freed] = tmpid;
+        // Go on
+        first_freed++;
+    }
+#ifdef ___PREDICT_ENABLED___
+    // Fixup predicts_
+    first_freed = 0;
+    first_inuse = 0;
+    while (first_freed < dict_info_.lemma_count) {
+        // Find first freed offset
+        while ((predicts_[first_freed] & kUserDictOffsetFlagRemove) == 0 && first_freed < dict_info_.lemma_count) {
+            first_freed++;
+        }
+        if (first_freed >= dict_info_.lemma_count) break;
+        // Find first inuse offse after first_freed
+        first_inuse = first_freed + 1;
+        while ((predicts_[first_inuse] & kUserDictOffsetFlagRemove) && (first_inuse < dict_info_.lemma_count)) {
+            first_inuse++;
+        }
+        if (first_inuse >= dict_info_.lemma_count) {
+            break;
+        }
+        // Swap offsets_
+        int tmp = predicts_[first_inuse];
+        predicts_[first_inuse] = predicts_[first_freed];
+        predicts_[first_freed] = tmp;
+        // Go on
+        first_freed++;
+    }
+#endif
+    dict_info_.lemma_count = first_freed;
+    // Fixup lemmas_
+    size_t begin = 0;
+    size_t end = 0;
+    size_t dst = 0;
+    int total_size = dict_info_.lemma_size + lemma_size_left_;
+    int total_count = dict_info_.lemma_count + lemma_count_left_;
+    size_t real_size = total_size - lemma_size_left_;
+    while (dst < real_size) {
+        unsigned char flag = get_lemma_flag(dst);
+        unsigned char nchr = get_lemma_nchar(dst);
+        if ((flag & kUserDictLemmaFlagRemove) == 0) {
+            dst += nchr * 4 + 2;
+            continue;
+        }
+        break;
+    }
+    if (dst >= real_size) return;
+
+    end = dst;
+    while (end < real_size) {
+        begin = end + get_lemma_nchar(end) * 4 + 2;
+    repeat:
+        // not used any more
+        if (begin >= real_size) break;
+        unsigned char flag = get_lemma_flag(begin);
+        unsigned char nchr = get_lemma_nchar(begin);
+        if (flag & kUserDictLemmaFlagRemove) {
+            begin += nchr * 4 + 2;
+            goto repeat;
+        }
+        end = begin + nchr * 4 + 2;
+        while (end < real_size) {
+            unsigned char eflag = get_lemma_flag(end);
+            unsigned char enchr = get_lemma_nchar(end);
+            if ((eflag & kUserDictLemmaFlagRemove) == 0) {
+                end += enchr * 4 + 2;
+                continue;
+            }
+            break;
+        }
+        memmove(lemmas_ + dst, lemmas_ + begin, end - begin);
+        for (size_t j = 0; j < dict_info_.lemma_count; j++) {
+            if (offsets_[j] >= begin && offsets_[j] < end) {
+                offsets_[j] -= (begin - dst);
+                offsets_by_id_[ids_[j] - start_id_] = offsets_[j];
+            }
+#ifdef ___PREDICT_ENABLED___
+            if (predicts_[j] >= begin && predicts_[j] < end) {
+                predicts_[j] -= (begin - dst);
+            }
+#endif
+        }
+#ifdef ___SYNC_ENABLED___
+        for (size_t j = 0; j < dict_info_.sync_count; j++) {
+            if (syncs_[j] >= begin && syncs_[j] < end) {
+                syncs_[j] -= (begin - dst);
+            }
+        }
+#endif
+        dst += (end - begin);
+    }
+
+    dict_info_.free_count = 0;
+    dict_info_.free_size = 0;
+    dict_info_.lemma_size = dst;
+    lemma_size_left_ = total_size - dict_info_.lemma_size;
+    lemma_count_left_ = total_count - dict_info_.lemma_count;
+
+    // XXX Without following code,
+    // offsets_by_id_ is not reordered.
+    // That's to say, all removed lemmas' ids are not collected back.
+    // There may not be room for addition of new lemmas due to
+    // offsests_by_id_ reason, although lemma_size_left_ is fixed.
+    // By default, we do want defrag as fast as possible, because
+    // during defrag procedure, other peers can not write new lemmas
+    // to user dictionary file.
+    // XXX If write-back is invoked immediately after
+    // this defragment, no need to fix up following in-mem data.
+    for (uint32 i = 0; i < dict_info_.lemma_count; i++) {
+        ids_[i] = start_id_ + i;
+        offsets_by_id_[i] = offsets_[i];
+    }
+
+    state_ = USER_DICT_DEFRAGMENTED;
+
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_END;
+    LOGD_PERF("defragment");
+#endif
+}
+
+#ifdef ___SYNC_ENABLED___
+void UserDict::clear_sync_lemmas(unsigned int start, unsigned int end) {
+    if (is_valid_state() == false) return;
+    if (end > dict_info_.sync_count) end = dict_info_.sync_count;
+    memmove(syncs_ + start, syncs_ + end, (dict_info_.sync_count - end) << 2);
+    dict_info_.sync_count -= (end - start);
+    if (state_ < USER_DICT_SYNC_DIRTY) state_ = USER_DICT_SYNC_DIRTY;
+}
+
+int UserDict::get_sync_count() {
+    if (is_valid_state() == false) return 0;
+    return dict_info_.sync_count;
+}
+
+LemmaIdType UserDict::put_lemma_no_sync(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt) {
+    int again = 0;
+begin:
+    LemmaIdType id;
+    uint32 *syncs_bak = syncs_;
+    syncs_ = NULL;
+    id = _put_lemma(lemma_str, splids, lemma_len, count, lmt);
+    syncs_ = syncs_bak;
+    if (id == 0 && again == 0) {
+        if ((dict_info_.limit_lemma_count > 0 && dict_info_.lemma_count >= dict_info_.limit_lemma_count) || (dict_info_.limit_lemma_size > 0 && dict_info_.lemma_size + (2 + (lemma_len << 2)) > dict_info_.limit_lemma_size)) {
+            // XXX Always reclaim and defrag in sync code path
+            //     sync thread is background thread and ok with heavy work
+            reclaim();
+            defragment();
+            flush_cache();
+            again = 1;
+            goto begin;
+        }
+    }
+    return id;
+}
+
+int UserDict::put_lemmas_no_sync_from_utf16le_string(char16 *lemmas, int len) {
+    int newly_added = 0;
+
+    SpellingParser *spl_parser = new SpellingParser();
+    if (!spl_parser) {
+        return 0;
+    }
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    char16 *ptr = lemmas;
+
+    // Extract pinyin,words,frequence,last_mod_time
+    char16 *p = ptr, *py16 = ptr;
+    char16 *hz16 = NULL;
+    int py16_len = 0;
+    uint16 splid[kMaxLemmaSize];
+    int splid_len = 0;
+    int hz16_len = 0;
+    char16 *fr16 = NULL;
+    int fr16_len = 0;
+
+    while (p - ptr < len) {
+        // Pinyin
+        py16 = p;
+        splid_len = 0;
+        while (*p != 0x2c && (p - ptr) < len) {
+            if (*p == 0x20) splid_len++;
+            p++;
+        }
+        splid_len++;
+        if (p - ptr == len) break;
+        py16_len = p - py16;
+        if (kMaxLemmaSize < splid_len) {
+            break;
+        }
+        bool is_pre;
+        int splidl = spl_parser->splstr16_to_idxs_f(py16, py16_len, splid, NULL, kMaxLemmaSize, is_pre);
+        if (splidl != splid_len) break;
+        // Phrase
+        hz16 = ++p;
+        while (*p != 0x2c && (p - ptr) < len) {
+            p++;
+        }
+        hz16_len = p - hz16;
+        if (hz16_len != splid_len) break;
+        // Frequency
+        fr16 = ++p;
+        fr16_len = 0;
+        while (*p != 0x2c && (p - ptr) < len) {
+            p++;
+        }
+        fr16_len = p - fr16;
+        uint32 intf = (uint32)utf16le_atoll(fr16, fr16_len);
+        // Last modified time
+        fr16 = ++p;
+        fr16_len = 0;
+        while (*p != 0x3b && (p - ptr) < len) {
+            p++;
+        }
+        fr16_len = p - fr16;
+        uint64 last_mod = utf16le_atoll(fr16, fr16_len);
+
+        put_lemma_no_sync(hz16, splid, splid_len, intf, last_mod);
+        newly_added++;
+
+        p++;
+    }
+
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_END;
+    LOGD_PERF("put_lemmas_no_sync_from_utf16le_string");
+#endif
+    return newly_added;
+}
+
+int UserDict::get_sync_lemmas_in_utf16le_string_from_beginning(char16 *str, int size, int *count) {
+    int len = 0;
+    *count = 0;
+
+    int left_len = size;
+
+    if (is_valid_state() == false) return len;
+
+    SpellingTrie *spl_trie = &SpellingTrie::get_instance();
+    if (!spl_trie) {
+        return 0;
+    }
+
+    uint32 i;
+    for (i = 0; i < dict_info_.sync_count; i++) {
+        int offset = syncs_[i];
+        uint32 nchar = get_lemma_nchar(offset);
+        uint16 *spl = get_lemma_spell_ids(offset);
+        uint16 *wrd = get_lemma_word(offset);
+        int score = _get_lemma_score(wrd, spl, nchar);
+
+        static char score_temp[32], *pscore_temp = score_temp;
+        static char16 temp[256], *ptemp = temp;
+
+        pscore_temp = score_temp;
+        ptemp = temp;
+
+        uint32 j;
+        // Add pinyin
+        for (j = 0; j < nchar; j++) {
+            int ret_len = spl_trie->get_spelling_str16(spl[j], ptemp, temp + sizeof(temp) - ptemp);
+            if (ret_len <= 0) break;
+            ptemp += ret_len;
+            if (ptemp < temp + sizeof(temp) - 1) {
+                *(ptemp++) = ' ';
+            } else {
+                j = 0;
+                break;
+            }
+        }
+        if (j < nchar) {
+            continue;
+        }
+        ptemp--;
+        if (ptemp < temp + sizeof(temp) - 1) {
+            *(ptemp++) = ',';
+        } else {
+            continue;
+        }
+        // Add phrase
+        for (j = 0; j < nchar; j++) {
+            if (ptemp < temp + sizeof(temp) - 1) {
+                *(ptemp++) = wrd[j];
+            } else {
+                break;
+            }
+        }
+        if (j < nchar) {
+            continue;
+        }
+        if (ptemp < temp + sizeof(temp) - 1) {
+            *(ptemp++) = ',';
+        } else {
+            continue;
+        }
+        // Add frequency
+        uint32 intf = extract_score_freq(score);
+        int ret_len = utf16le_lltoa(intf, ptemp, temp + sizeof(temp) - ptemp);
+        if (ret_len <= 0) continue;
+        ptemp += ret_len;
+        if (ptemp < temp + sizeof(temp) - 1) {
+            *(ptemp++) = ',';
+        } else {
+            continue;
+        }
+        // Add last modified time
+        uint64 last_mod = extract_score_lmt(score);
+        ret_len = utf16le_lltoa(last_mod, ptemp, temp + sizeof(temp) - ptemp);
+        if (ret_len <= 0) continue;
+        ptemp += ret_len;
+        if (ptemp < temp + sizeof(temp) - 1) {
+            *(ptemp++) = ';';
+        } else {
+            continue;
+        }
+
+        // Write to string
+        int need_len = ptemp - temp;
+        if (need_len > left_len) break;
+        memcpy(str + len, temp, need_len * 2);
+        left_len -= need_len;
+
+        len += need_len;
+        (*count)++;
+    }
+
+    if (len > 0) {
+        if (state_ < USER_DICT_SYNC_DIRTY) state_ = USER_DICT_SYNC_DIRTY;
+    }
+    return len;
+}
+
+#endif
+
+bool UserDict::state(UserDictStat *stat) {
+    if (is_valid_state() == false) return false;
+    if (!stat) return false;
+    stat->version = version_;
+    stat->file_name = dict_file_;
+    stat->load_time.tv_sec = load_time_.tv_sec;
+    stat->load_time.tv_usec = load_time_.tv_usec;
+    pthread_mutex_lock(&g_mutex_);
+    stat->last_update.tv_sec = g_last_update_.tv_sec;
+    stat->last_update.tv_usec = g_last_update_.tv_usec;
+    pthread_mutex_unlock(&g_mutex_);
+    stat->disk_size = get_dict_file_size(&dict_info_);
+    stat->lemma_count = dict_info_.lemma_count;
+    stat->lemma_size = dict_info_.lemma_size;
+    stat->delete_count = dict_info_.free_count;
+    stat->delete_size = dict_info_.free_size;
+#ifdef ___SYNC_ENABLED___
+    stat->sync_count = dict_info_.sync_count;
+#endif
+    stat->limit_lemma_count = dict_info_.limit_lemma_count;
+    stat->limit_lemma_size = dict_info_.limit_lemma_size;
+    stat->reclaim_ratio = dict_info_.reclaim_ratio;
+    return true;
+}
+
+void UserDict::set_limit(uint32 max_lemma_count, uint32 max_lemma_size, uint32 reclaim_ratio) {
+    dict_info_.limit_lemma_count = max_lemma_count;
+    dict_info_.limit_lemma_size = max_lemma_size;
+    if (reclaim_ratio > 100) reclaim_ratio = 100;
+    dict_info_.reclaim_ratio = reclaim_ratio;
+}
+
+void UserDict::reclaim() {
+    if (is_valid_state() == false) return;
+
+    switch (dict_info_.reclaim_ratio) {
+        case 0:
+            return;
+        case 100:
+            // TODO: CLEAR to be implemented
+            assert(false);
+            return;
+        default:
+            break;
+    }
+
+    // XXX Reclaim is only based on count, not size
+    uint32 count = dict_info_.lemma_count;
+    int rc = count * dict_info_.reclaim_ratio / 100;
+
+    UserDictScoreOffsetPair *score_offset_pairs = NULL;
+    score_offset_pairs = (UserDictScoreOffsetPair *)malloc(sizeof(UserDictScoreOffsetPair) * rc);
+    if (score_offset_pairs == NULL) {
+        return;
+    }
+
+    for (int i = 0; i < rc; i++) {
+        int s = scores_[i];
+        score_offset_pairs[i].score = s;
+        score_offset_pairs[i].offset_index = i;
+    }
+
+    for (int i = (rc + 1) / 2; i >= 0; i--) shift_down(score_offset_pairs, i, rc);
+
+    for (uint32 i = rc; i < dict_info_.lemma_count; i++) {
+        int s = scores_[i];
+        if (s < score_offset_pairs[0].score) {
+            score_offset_pairs[0].score = s;
+            score_offset_pairs[0].offset_index = i;
+            shift_down(score_offset_pairs, 0, rc);
+        }
+    }
+
+    for (int i = 0; i < rc; i++) {
+        int off = score_offset_pairs[i].offset_index;
+        remove_lemma_by_offset_index(off);
+    }
+    if (rc > 0) {
+        if (state_ < USER_DICT_OFFSET_DIRTY) state_ = USER_DICT_OFFSET_DIRTY;
+    }
+
+    free(score_offset_pairs);
+}
+
+inline void UserDict::swap(UserDictScoreOffsetPair *sop, int i, int j) {
+    int s = sop[i].score;
+    int p = sop[i].offset_index;
+    sop[i].score = sop[j].score;
+    sop[i].offset_index = sop[j].offset_index;
+    sop[j].score = s;
+    sop[j].offset_index = p;
+}
+
+void UserDict::shift_down(UserDictScoreOffsetPair *sop, int i, int n) {
+    int par = i;
+    while (par < n) {
+        int left = par * 2 + 1;
+        int right = left + 1;
+        if (left >= n && right >= n) break;
+        if (right >= n) {
+            if (sop[left].score > sop[par].score) {
+                swap(sop, left, par);
+                par = left;
+                continue;
+            }
+        } else if (sop[left].score > sop[right].score && sop[left].score > sop[par].score) {
+            swap(sop, left, par);
+            par = left;
+            continue;
+        } else if (sop[right].score > sop[left].score && sop[right].score > sop[par].score) {
+            swap(sop, right, par);
+            par = right;
+            continue;
+        }
+        break;
+    }
+}
+
+LemmaIdType UserDict::put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count) { return _put_lemma(lemma_str, splids, lemma_len, count, time(NULL)); }
+
+LemmaIdType UserDict::_put_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt) {
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    if (is_valid_state() == false) return 0;
+    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
+    if (off != -1) {
+        int delta_score = count - scores_[off];
+        dict_info_.total_nfreq += delta_score;
+        scores_[off] = build_score(lmt, count);
+        if (state_ < USER_DICT_SCORE_DIRTY) state_ = USER_DICT_SCORE_DIRTY;
+#ifdef ___DEBUG_PERF___
+        DEBUG_PERF_END;
+        LOGD_PERF("_put_lemma(update)");
+#endif
+        return ids_[off];
+    } else {
+        if ((dict_info_.limit_lemma_count > 0 && dict_info_.lemma_count >= dict_info_.limit_lemma_count) || (dict_info_.limit_lemma_size > 0 && dict_info_.lemma_size + (2 + (lemma_len << 2)) > dict_info_.limit_lemma_size)) {
+            // XXX Don't defragment here, it's too time-consuming.
+            return 0;
+        }
+        int flushed = 0;
+        if (lemma_count_left_ == 0 || lemma_size_left_ < (size_t)(2 + (lemma_len << 2))) {
+            // XXX When there is no space for new lemma, we flush to disk
+            // flush_cache() may be called by upper user
+            // and better place shoule be found instead of here
+            flush_cache();
+            flushed = 1;
+            // Or simply return and do nothing
+            // return 0;
+        }
+#ifdef ___DEBUG_PERF___
+        DEBUG_PERF_END;
+        LOGD_PERF(flushed ? "_put_lemma(flush+add)" : "_put_lemma(add)");
+#endif
+        LemmaIdType id = append_a_lemma(lemma_str, splids, lemma_len, count, lmt);
+#ifdef ___SYNC_ENABLED___
+        if (syncs_ && id != 0) {
+            queue_lemma_for_sync(id);
+        }
+#endif
+        return id;
+    }
+    return 0;
+}
+
+#ifdef ___SYNC_ENABLED___
+void UserDict::queue_lemma_for_sync(LemmaIdType id) {
+    if (dict_info_.sync_count < sync_count_size_) {
+        syncs_[dict_info_.sync_count++] = offsets_by_id_[id - start_id_];
+    } else {
+        uint32 *syncs = (uint32 *)realloc(syncs_, (sync_count_size_ + kUserDictPreAlloc) << 2);
+        if (syncs) {
+            sync_count_size_ += kUserDictPreAlloc;
+            syncs_ = syncs;
+            syncs_[dict_info_.sync_count++] = offsets_by_id_[id - start_id_];
+        }
+    }
+}
+#endif
+
+LemmaIdType UserDict::update_lemma(LemmaIdType lemma_id, int16 delta_count, bool selected) {
+#ifdef ___DEBUG_PERF___
+    DEBUG_PERF_BEGIN;
+#endif
+    if (is_valid_state() == false) return 0;
+    if (is_valid_lemma_id(lemma_id) == false) return 0;
+    uint32 offset = offsets_by_id_[lemma_id - start_id_];
+    uint8 lemma_len = get_lemma_nchar(offset);
+    char16 *lemma_str = get_lemma_word(offset);
+    uint16 *splids = get_lemma_spell_ids(offset);
+
+    int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
+    if (off != -1) {
+        int score = scores_[off];
+        int count = extract_score_freq(score);
+        uint64 lmt = extract_score_lmt(score);
+        if (count + delta_count > kUserDictMaxFrequency || count + delta_count < count) {
+            delta_count = kUserDictMaxFrequency - count;
+        }
+        count += delta_count;
+        dict_info_.total_nfreq += delta_count;
+        if (selected) {
+            lmt = time(NULL);
+        }
+        scores_[off] = build_score(lmt, count);
+        if (state_ < USER_DICT_SCORE_DIRTY) state_ = USER_DICT_SCORE_DIRTY;
+#ifdef ___DEBUG_PERF___
+        DEBUG_PERF_END;
+        LOGD_PERF("update_lemma");
+#endif
+#ifdef ___SYNC_ENABLED___
+        queue_lemma_for_sync(ids_[off]);
+#endif
+        return ids_[off];
+    }
+    return 0;
+}
+
+size_t UserDict::get_total_lemma_count() { return dict_info_.total_nfreq; }
+
+void UserDict::set_total_lemma_count_of_others(size_t count) { total_other_nfreq_ = count; }
+
+LemmaIdType UserDict::append_a_lemma(char16 lemma_str[], uint16 splids[], uint16 lemma_len, uint16 count, uint64 lmt) {
+    LemmaIdType id = get_max_lemma_id() + 1;
+    size_t offset = dict_info_.lemma_size;
+    if (offset > kUserDictOffsetMask) return 0;
+
+    lemmas_[offset] = 0;
+    lemmas_[offset + 1] = (uint8)lemma_len;
+    for (size_t i = 0; i < lemma_len; i++) {
+        *((uint16 *)&lemmas_[offset + 2 + (i << 1)]) = splids[i];
+        *((char16 *)&lemmas_[offset + 2 + (lemma_len << 1) + (i << 1)]) = lemma_str[i];
+    }
+    uint32 off = dict_info_.lemma_count;
+    offsets_[off] = offset;
+    scores_[off] = build_score(lmt, count);
+    ids_[off] = id;
+#ifdef ___PREDICT_ENABLED___
+    predicts_[off] = offset;
+#endif
+
+    offsets_by_id_[id - start_id_] = offset;
+
+    dict_info_.lemma_count++;
+    dict_info_.lemma_size += (2 + (lemma_len << 2));
+    lemma_count_left_--;
+    lemma_size_left_ -= (2 + (lemma_len << 2));
+
+    // Sort
+
+    UserDictSearchable searchable;
+    prepare_locate(&searchable, splids, lemma_len);
+
+    size_t i = 0;
+    while (i < off) {
+        offset = offsets_[i];
+        uint32 nchar = get_lemma_nchar(offset);
+        uint16 *spl = get_lemma_spell_ids(offset);
+
+        if (0 <= fuzzy_compare_spell_id(spl, nchar, &searchable)) break;
+        i++;
+    }
+    if (i != off) {
+        uint32 temp = offsets_[off];
+        memmove(offsets_ + i + 1, offsets_ + i, (off - i) << 2);
+        offsets_[i] = temp;
+
+        temp = scores_[off];
+        memmove(scores_ + i + 1, scores_ + i, (off - i) << 2);
+        scores_[i] = temp;
+
+        temp = ids_[off];
+        memmove(ids_ + i + 1, ids_ + i, (off - i) << 2);
+        ids_[i] = temp;
+    }
+
+#ifdef ___PREDICT_ENABLED___
+    uint32 j = 0;
+    uint16 *words_new = get_lemma_word(predicts_[off]);
+    j = locate_where_to_insert_in_predicts(words_new, lemma_len);
+    if (j != off) {
+        uint32 temp = predicts_[off];
+        memmove(predicts_ + j + 1, predicts_ + j, (off - j) << 2);
+        predicts_[j] = temp;
+    }
+#endif
+
+    if (state_ < USER_DICT_LEMMA_DIRTY) state_ = USER_DICT_LEMMA_DIRTY;
+
+#ifdef ___CACHE_ENABLED___
+    cache_init();
+#endif
+
+    dict_info_.total_nfreq += count;
+    return id;
+}
+}  // namespace ime_pinyin
diff --git a/utf16char.cpp b/src/share/utf16char.cpp
similarity index 50%
rename from utf16char.cpp
rename to src/share/utf16char.cpp
index 848cf2c..d6420f1 100644
--- a/utf16char.cpp
+++ b/src/share/utf16char.cpp
@@ -15,7 +15,7 @@
  */
 
 #include <stdlib.h>
-#include "./utf16char.h"
+#include "../include/utf16char.h"
 
 namespace ime_pinyin {
 
@@ -23,158 +23,139 @@ namespace ime_pinyin {
 extern "C" {
 #endif
 
-  char16* utf16_strtok(char16 *utf16_str, size_t *token_size,
-                       char16 **utf16_str_next) {
+char16 *utf16_strtok(char16 *utf16_str, size_t *token_size, char16 **utf16_str_next) {
     if (NULL == utf16_str || NULL == token_size || NULL == utf16_str_next) {
-      return NULL;
+        return NULL;
     }
 
     // Skip the splitters
     size_t pos = 0;
-    while ((char16)' ' == utf16_str[pos] || (char16)'\n' == utf16_str[pos]
-           || (char16)'\t' == utf16_str[pos])
-      pos++;
+    while ((char16)' ' == utf16_str[pos] || (char16)'\n' == utf16_str[pos] || (char16)'\t' == utf16_str[pos]) pos++;
 
     utf16_str += pos;
     pos = 0;
 
-    while ((char16)'\0' != utf16_str[pos] && (char16)' ' != utf16_str[pos]
-           && (char16)'\n' != utf16_str[pos]
-           && (char16)'\t' != utf16_str[pos]) {
-      pos++;
+    while ((char16)'\0' != utf16_str[pos] && (char16)' ' != utf16_str[pos] && (char16)'\n' != utf16_str[pos] && (char16)'\t' != utf16_str[pos]) {
+        pos++;
     }
 
     char16 *ret_val = utf16_str;
     if ((char16)'\0' == utf16_str[pos]) {
-      *utf16_str_next = NULL;
-      if (0 == pos)
-        return NULL;
+        *utf16_str_next = NULL;
+        if (0 == pos) return NULL;
     } else {
-      *utf16_str_next = utf16_str + pos + 1;
+        *utf16_str_next = utf16_str + pos + 1;
     }
 
     utf16_str[pos] = (char16)'\0';
     *token_size = pos;
 
     return ret_val;
-  }
+}
 
-  int utf16_atoi(const char16 *utf16_str) {
-    if (NULL == utf16_str)
-      return 0;
+int utf16_atoi(const char16 *utf16_str) {
+    if (NULL == utf16_str) return 0;
 
     int value = 0;
     int sign = 1;
     size_t pos = 0;
 
     if ((char16)'-' == utf16_str[pos]) {
-      sign = -1;
-      pos++;
+        sign = -1;
+        pos++;
     }
 
-    while ((char16)'0' <=  utf16_str[pos] &&
-           (char16)'9' >= utf16_str[pos]) {
-      value = value * 10 + static_cast<int>(utf16_str[pos] - (char16)'0');
-      pos++;
+    while ((char16)'0' <= utf16_str[pos] && (char16)'9' >= utf16_str[pos]) {
+        value = value * 10 + static_cast<int>(utf16_str[pos] - (char16)'0');
+        pos++;
     }
 
-    return value*sign;
-  }
+    return value * sign;
+}
 
-  float utf16_atof(const char16 *utf16_str) {
+float utf16_atof(const char16 *utf16_str) {
     // A temporary implemetation.
     char char8[256];
     if (utf16_strlen(utf16_str) >= 256) return 0;
 
     utf16_strcpy_tochar(char8, utf16_str);
     return atof(char8);
-  }
+}
 
-  size_t utf16_strlen(const char16 *utf16_str) {
-    if (NULL == utf16_str)
-      return 0;
+size_t utf16_strlen(const char16 *utf16_str) {
+    if (NULL == utf16_str) return 0;
 
     size_t size = 0;
-    while ((char16)'\0' != utf16_str[size])
-      size++;
+    while ((char16)'\0' != utf16_str[size]) size++;
     return size;
-  }
+}
 
-  int utf16_strcmp(const char16* str1, const char16* str2) {
+int utf16_strcmp(const char16 *str1, const char16 *str2) {
     size_t pos = 0;
-    while (str1[pos] == str2[pos] && (char16)'\0' != str1[pos])
-      pos++;
+    while (str1[pos] == str2[pos] && (char16)'\0' != str1[pos]) pos++;
 
     return static_cast<int>(str1[pos]) - static_cast<int>(str2[pos]);
-  }
+}
 
-  int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size) {
+int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size) {
     size_t pos = 0;
-    while (pos < size && str1[pos] == str2[pos] && (char16)'\0' != str1[pos])
-      pos++;
+    while (pos < size && str1[pos] == str2[pos] && (char16)'\0' != str1[pos]) pos++;
 
-    if (pos == size)
-      return 0;
+    if (pos == size) return 0;
 
     return static_cast<int>(str1[pos]) - static_cast<int>(str2[pos]);
-  }
+}
 
-  // we do not consider overlapping
-  char16* utf16_strcpy(char16 *dst, const char16 *src) {
-    if (NULL == src || NULL == dst)
-      return NULL;
+// we do not consider overlapping
+char16 *utf16_strcpy(char16 *dst, const char16 *src) {
+    if (NULL == src || NULL == dst) return NULL;
 
-    char16* cp = dst;
+    char16 *cp = dst;
 
     while ((char16)'\0' != *src) {
-      *cp = *src;
-      cp++;
-      src++;
+        *cp = *src;
+        cp++;
+        src++;
     }
 
     *cp = *src;
 
     return dst;
-  }
+}
 
-  char16* utf16_strncpy(char16 *dst, const char16 *src, size_t size) {
-    if (NULL == src || NULL == dst || 0 == size)
-      return NULL;
+char16 *utf16_strncpy(char16 *dst, const char16 *src, size_t size) {
+    if (NULL == src || NULL == dst || 0 == size) return NULL;
 
-    if (src == dst)
-      return dst;
+    if (src == dst) return dst;
 
-    char16* cp = dst;
+    char16 *cp = dst;
 
     if (dst < src || (dst > src && dst >= src + size)) {
-      while (size-- && (*cp++ = *src++))
-        ;
+        while (size-- && (*cp++ = *src++));
     } else {
-      cp += size - 1;
-      src += size - 1;
-      while (size-- && (*cp-- == *src--))
-        ;
+        cp += size - 1;
+        src += size - 1;
+        while (size-- && (*cp-- == *src--));
     }
     return dst;
-  }
+}
 
-  // We do not handle complicated cases like overlapping, because in this
-  // codebase, it is not necessary.
-  char* utf16_strcpy_tochar(char *dst, const char16 *src) {
-    if (NULL == src || NULL == dst)
-      return NULL;
+// We do not handle complicated cases like overlapping, because in this
+// codebase, it is not necessary.
+char *utf16_strcpy_tochar(char *dst, const char16 *src) {
+    if (NULL == src || NULL == dst) return NULL;
 
-    char* cp = dst;
+    char *cp = dst;
 
     while ((char16)'\0' != *src) {
-      *cp = static_cast<char>(*src);
-      cp++;
-      src++;
+        *cp = static_cast<char>(*src);
+        cp++;
+        src++;
     }
     *cp = *src;
 
     return dst;
-  }
+}
 
 #ifdef __cplusplus
 }
diff --git a/utf16reader.cpp b/src/share/utf16reader.cpp
similarity index 82%
rename from utf16reader.cpp
rename to src/share/utf16reader.cpp
index 6cc0f96..b44fe39 100644
--- a/utf16reader.cpp
+++ b/src/share/utf16reader.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "./utf16reader.h"
+#include "../include/utf16reader.h"
 
 namespace ime_pinyin {
 
@@ -30,16 +30,13 @@ Utf16Reader::Utf16Reader() {
 }
 
 Utf16Reader::~Utf16Reader() {
-    if (NULL != fp_)
-        fclose(fp_);
+    if (NULL != fp_) fclose(fp_);
 
-    if (NULL != buffer_)
-        delete[] buffer_;
+    if (NULL != buffer_) delete[] buffer_;
 }
 
 bool Utf16Reader::open(const char *filename, size_t buffer_len) {
-    if (filename == NULL)
-        return false;
+    if (filename == NULL) return false;
 
     if (buffer_len < MIN_BUF_LEN)
         buffer_len = MIN_BUF_LEN;
@@ -48,14 +45,11 @@ bool Utf16Reader::open(const char *filename, size_t buffer_len) {
 
     buffer_total_len_ = buffer_len;
 
-    if (NULL != buffer_)
-        delete[] buffer_;
+    if (NULL != buffer_) delete[] buffer_;
     buffer_ = new char16[buffer_total_len_];
-    if (NULL == buffer_)
-        return false;
+    if (NULL == buffer_) return false;
 
-    if ((fp_ = fopen(filename, "rb")) == NULL)
-        return false;
+    if ((fp_ = fopen(filename, "rb")) == NULL) return false;
 
     // the UTF16 file header, skip
     char16 header;
@@ -69,8 +63,7 @@ bool Utf16Reader::open(const char *filename, size_t buffer_len) {
 }
 
 char16 *Utf16Reader::readline(char16 *read_buf, size_t max_len) {
-    if (NULL == fp_ || NULL == read_buf || 0 == max_len)
-        return NULL;
+    if (NULL == fp_ || NULL == read_buf || 0 == max_len) return NULL;
 
     size_t ret_len = 0;
 
@@ -79,8 +72,7 @@ char16 *Utf16Reader::readline(char16 *read_buf, size_t max_len) {
             buffer_next_pos_ = 0;
             buffer_valid_len_ = fread(buffer_, sizeof(char16), buffer_total_len_, fp_);
             if (buffer_valid_len_ == 0) {
-                if (0 == ret_len)
-                    return NULL;
+                if (0 == ret_len) return NULL;
                 read_buf[ret_len] = (char16)'\0';
                 return read_buf;
             }
@@ -116,13 +108,11 @@ char16 *Utf16Reader::readline(char16 *read_buf, size_t max_len) {
 }
 
 bool Utf16Reader::close() {
-    if (NULL != fp_)
-        fclose(fp_);
+    if (NULL != fp_) fclose(fp_);
     fp_ = NULL;
 
-    if (NULL != buffer_)
-        delete[] buffer_;
+    if (NULL != buffer_) delete[] buffer_;
     buffer_ = NULL;
     return true;
 }
-} // namespace ime_pinyin
+}  // namespace ime_pinyin
diff --git a/sync.cpp b/sync.cpp
deleted file mode 100644
index f021a61..0000000
--- a/sync.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "./sync.h"
-#include <assert.h>
-#include <string.h>
-
-#ifdef ___SYNC_ENABLED___
-
-namespace ime_pinyin {
-
-Sync::Sync()
-  : userdict_(NULL),
-    dictfile_(NULL),
-    last_count_(0) {
-};
-
-Sync::~Sync() {
-}
-
-
-bool Sync::begin(const char * filename) {
-  if (userdict_) {
-    finish();
-  }
-
-  if (!filename) {
-    return false;
-  }
-
-  dictfile_ = strdup(filename);
-  if (!dictfile_) {
-    return false;
-  }
-
-  userdict_ = new UserDict();
-  if (!userdict_) {
-    free(dictfile_);
-    dictfile_ = NULL;
-    return false;
-  }
-
-  if (userdict_->load_dict((const char*)dictfile_, kUserDictIdStart,
-                           kUserDictIdEnd) == false) {
-    delete userdict_;
-    userdict_ = NULL;
-    free(dictfile_);
-    dictfile_ = NULL;
-    return false;
-  }
-
-  userdict_->set_limit(kUserDictMaxLemmaCount, kUserDictMaxLemmaSize, kUserDictRatio);
-
-  return true;
-}
-
-int Sync::put_lemmas(char16 * lemmas, int len) {
-  return userdict_->put_lemmas_no_sync_from_utf16le_string(lemmas, len);
-}
-
-int Sync::get_lemmas(char16 * str, int size) {
-  return userdict_->get_sync_lemmas_in_utf16le_string_from_beginning(str, size, &last_count_);
-}
-
-int Sync::get_last_got_count() {
-  return last_count_;
-}
-
-int Sync::get_total_count() {
-  return userdict_->get_sync_count();
-}
-
-void Sync::clear_last_got() {
-  if (last_count_ < 0) {
-    return;
-  }
-  userdict_->clear_sync_lemmas(0, last_count_);
-  last_count_ = 0;
-}
-
-void Sync::finish() {
-  if (userdict_) {
-    userdict_->close_dict();
-    delete userdict_;
-    userdict_ = NULL;
-    free(dictfile_);
-    dictfile_ = NULL;
-    last_count_ = 0;
-  }
-}
-
-int Sync::get_capacity() {
-  UserDict::UserDictStat stat;
-  userdict_->state(&stat);
-  return stat.limit_lemma_count - stat.lemma_count;
-}
-
-}
-#endif
diff --git a/main.cpp b/tests/main.cpp
similarity index 69%
rename from main.cpp
rename to tests/main.cpp
index 73fd69c..5284f0f 100644
--- a/main.cpp
+++ b/tests/main.cpp
@@ -1,4 +1,4 @@
-#include "./pinyinime.h"
+#include "../src/include/pinyinime.h"
 #include <codecvt>
 #include <iostream>
 #include <locale>
@@ -12,15 +12,15 @@ std::string fromUtf16(const ime_pinyin::char16 *buf, size_t len) {
 }
 
 int main() {
-    if (!ime_pinyin::im_open_decoder("./dict_pinyin.dat", "./user_dict.dat")) {
+    if (!ime_pinyin::im_open_decoder("./data/dict_pinyin.dat", "./data/user_dict.dat")) {
         std::cout << "fany bug.\n";
         return 0;
     }
 
     std::string pinyin = "ni'ma'si'le";
     pinyin = "ni'ma'mei'si";
-    pinyin = "huang'yi";
-    size_t qty = ime_pinyin::im_search(pinyin.c_str(), pinyin.size());
+    pinyin = "ni'shuo'ni'ma'ne";
+    size_t cand_cnt = ime_pinyin::im_search(pinyin.c_str(), pinyin.size());
     ime_pinyin::char16 buf[256] = {0};
     std::string msg;
     for (size_t i = 0; i < 100; ++i) {
@@ -29,6 +29,7 @@ int main() {
         while (buf[len] != 0 && len < 255) ++len;
         msg.append(fromUtf16(buf, len) + " ");
     }
-    std::cout << "候选项: " << msg << std::endl;
+    std::cout << "候选项数量: " << cand_cnt << std::endl;
+    std::cout << "候选项本体: " << msg << std::endl;
     return 0;
 }
diff --git a/userdict.cpp b/userdict.cpp
deleted file mode 100644
index 38ea144..0000000
--- a/userdict.cpp
+++ /dev/null
@@ -1,2253 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "./userdict.h"
-#include "./splparser.h"
-#include "./ngram.h"
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <assert.h>
-#include <ctype.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <time.h>
-#include <pthread.h>
-#include <math.h>
-
-namespace ime_pinyin {
-
-#ifdef ___DEBUG_PERF___
-static uint64 _ellapse_ = 0;
-static struct timeval _tv_start_, _tv_end_;
-#define DEBUG_PERF_BEGIN \
-    do { \
-      gettimeofday(&_tv_start_, NULL); \
-    } while(0)
-#define DEBUG_PERF_END \
-    do { \
-      gettimeofday(&_tv_end_, NULL); \
-      _ellapse_ = (_tv_end_.tv_sec - _tv_start_.tv_sec) * 1000000 + \
-                  (_tv_end_.tv_usec - _tv_start_.tv_usec); \
-    } while(0)
-#define LOGD_PERF(message) \
-    ALOGD("PERFORMANCE[%s] %llu usec.", message, _ellapse_);
-#else
-#define DEBUG_PERF_BEGIN
-#define DEBUG_PERF_END
-#define LOGD_PERF(message)
-#endif
-
-// XXX File load and write are thread-safe by g_mutex_
-static pthread_mutex_t g_mutex_ = PTHREAD_MUTEX_INITIALIZER;
-static struct timeval g_last_update_ = {0, 0};
-
-inline uint32 UserDict::get_dict_file_size(UserDictInfo * info) {
-  return (4 + info->lemma_size + (info->lemma_count << 3)
-#ifdef ___PREDICT_ENABLED___
-          + (info->lemma_count << 2)
-#endif
-#ifdef ___SYNC_ENABLED___
-          + (info->sync_count << 2)
-#endif
-          + sizeof(*info));
-}
-
-inline LmaScoreType UserDict::translate_score(int raw_score) {
-  // 1) ori_freq: original user frequency
-  uint32 ori_freq = extract_score_freq(raw_score);
-  // 2) lmt_off: lmt index (week offset for example)
-  uint64 lmt_off = ((raw_score & 0xffff0000) >> 16);
-  if (kUserDictLMTBitWidth < 16) {
-    uint64 mask = ~(1 << kUserDictLMTBitWidth);
-    lmt_off &= mask;
-  }
-  // 3) now_off: current time index (current week offset for example)
-  // assuming load_time_ is around current time
-  uint64 now_off = load_time_.tv_sec;
-  now_off = (now_off - kUserDictLMTSince) / kUserDictLMTGranularity;
-  now_off = (now_off << (64 - kUserDictLMTBitWidth));
-  now_off = (now_off >> (64 - kUserDictLMTBitWidth));
-  // 4) factor: decide expand-factor
-  int delta = now_off - lmt_off;
-  if (delta > 4)
-    delta = 4;
-  int factor = 80 - (delta << 4);
-
-  double tf = (double)(dict_info_.total_nfreq + total_other_nfreq_);
-  return (LmaScoreType)(log((double)factor * (double)ori_freq / tf)
-                        * NGram::kLogValueAmplifier);
-}
-
-inline int UserDict::extract_score_freq(int raw_score) {
-  // Frequence stored in lowest 16 bits
-  int freq = (raw_score & 0x0000ffff);
-  return freq;
-}
-
-inline uint64 UserDict::extract_score_lmt(int raw_score) {
-  uint64 lmt = ((raw_score & 0xffff0000) >> 16);
-  if (kUserDictLMTBitWidth < 16) {
-    uint64 mask = ~(1 << kUserDictLMTBitWidth);
-    lmt &= mask;
-  }
-  lmt = lmt * kUserDictLMTGranularity + kUserDictLMTSince;
-  return lmt;
-}
-
-inline int UserDict::build_score(uint64 lmt, int freq) {
-  lmt = (lmt - kUserDictLMTSince) / kUserDictLMTGranularity;
-  lmt = (lmt << (64 - kUserDictLMTBitWidth));
-  lmt = (lmt >> (64 - kUserDictLMTBitWidth));
-  uint16 lmt16 = (uint16)lmt;
-  int s = freq;
-  s &= 0x0000ffff;
-  s = (lmt16 << 16) | s;
-  return s;
-}
-
-inline int64 UserDict::utf16le_atoll(uint16 *s, int len) {
-  int64 ret = 0;
-  if (len <= 0)
-    return ret;
-
-  int flag = 1;
-  const uint16 * endp = s + len;
-  if (*s == '-') {
-    flag = -1;
-    s++;
-  } else if (*s == '+') {
-    s++;
-  }
-
-  while (*s >= '0' && *s <= '9' && s < endp) {
-    ret += ret * 10 + (*s) - '0';
-    s++;
-  }
-  return ret * flag;
-}
-
-inline int UserDict::utf16le_lltoa(int64 v, uint16 *s, int size) {
-  if (!s || size <= 0)
-    return 0;
-  uint16 *endp = s + size;
-  int ret_len = 0;
-  if (v < 0) {
-    *(s++) = '-';
-    ++ret_len;
-    v *= -1;
-  }
-
-  uint16 *b = s;
-  while (s < endp && v != 0) {
-    *(s++) = '0' + (v % 10);
-    v = v / 10;
-    ++ret_len;
-  }
-
-  if (v != 0)
-    return 0;
-
-  --s;
-
-  while (b < s) {
-    *b = *s;
-    ++b, --s;
-  }
-
-  return ret_len;
-}
-
-inline void UserDict::set_lemma_flag(uint32 offset, uint8 flag) {
-  offset &= kUserDictOffsetMask;
-  lemmas_[offset] |= flag;
-}
-
-inline char UserDict::get_lemma_flag(uint32 offset) {
-  offset &= kUserDictOffsetMask;
-  return (char)(lemmas_[offset]);
-}
-
-inline char UserDict::get_lemma_nchar(uint32 offset) {
-  offset &= kUserDictOffsetMask;
-  return (char)(lemmas_[offset + 1]);
-}
-
-inline uint16 * UserDict::get_lemma_spell_ids(uint32 offset) {
-  offset &= kUserDictOffsetMask;
-  return (uint16 *)(lemmas_ + offset + 2);
-}
-
-inline uint16 * UserDict::get_lemma_word(uint32 offset) {
-  offset &= kUserDictOffsetMask;
-  uint8 nchar = get_lemma_nchar(offset);
-  return (uint16 *)(lemmas_ + offset + 2 + (nchar << 1));
-}
-
-inline LemmaIdType UserDict::get_max_lemma_id() {
-  // When a lemma is deleted, we don't not claim its id back for
-  // simplicity and performance
-  return start_id_ + dict_info_.lemma_count - 1;
-}
-
-inline bool UserDict::is_valid_lemma_id(LemmaIdType id) {
-  if (id >= start_id_ && id <= get_max_lemma_id())
-    return true;
-  return false;
-}
-
-inline bool UserDict::is_valid_state() {
-  if (state_ == USER_DICT_NONE)
-    return false;
-  return true;
-}
-
-UserDict::UserDict()
-    : start_id_(0),
-      version_(0),
-      lemmas_(NULL),
-      offsets_(NULL),
-      scores_(NULL),
-      ids_(NULL),
-#ifdef ___PREDICT_ENABLED___
-      predicts_(NULL),
-#endif
-#ifdef ___SYNC_ENABLED___
-      syncs_(NULL),
-      sync_count_size_(0),
-#endif
-      offsets_by_id_(NULL),
-      lemma_count_left_(0),
-      lemma_size_left_(0),
-      dict_file_(NULL),
-      state_(USER_DICT_NONE) {
-  memset(&dict_info_, 0, sizeof(dict_info_));
-  memset(&load_time_, 0, sizeof(load_time_));
-#ifdef ___CACHE_ENABLED___
-  cache_init();
-#endif
-}
-
-UserDict::~UserDict() {
-  close_dict();
-}
-
-bool UserDict::load_dict(const char *file_name, LemmaIdType start_id,
-                         LemmaIdType end_id) {
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_BEGIN;
-#endif
-  dict_file_ = strdup(file_name);
-  if (!dict_file_)
-    return false;
-
-  start_id_ = start_id;
-
-  if (false == validate(file_name) && false == reset(file_name)) {
-    goto error;
-  }
-  if (false == load(file_name, start_id)) {
-    goto error;
-  }
-
-  state_ = USER_DICT_SYNC;
-
-  gettimeofday(&load_time_, NULL);
-
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_END;
-  LOGD_PERF("load_dict");
-#endif
-  return true;
- error:
-  free((void*)dict_file_);
-  start_id_ = 0;
-  return false;
-}
-
-bool UserDict::close_dict() {
-  if (state_ == USER_DICT_NONE)
-    return true;
-  if (state_ == USER_DICT_SYNC)
-    goto out;
-
-  // If dictionary is written back by others,
-  // we can not simply write back here
-  // To do a safe flush, we have to discard all newly added
-  // lemmas and try to reload dict file.
-  pthread_mutex_lock(&g_mutex_);
-  if (load_time_.tv_sec > g_last_update_.tv_sec ||
-    (load_time_.tv_sec == g_last_update_.tv_sec &&
-     load_time_.tv_usec > g_last_update_.tv_usec)) {
-    write_back();
-    gettimeofday(&g_last_update_, NULL);
-  }
-  pthread_mutex_unlock(&g_mutex_);
-
- out:
-  free((void*)dict_file_);
-  free(lemmas_);
-  free(offsets_);
-  free(offsets_by_id_);
-  free(scores_);
-  free(ids_);
-#ifdef ___PREDICT_ENABLED___
-  free(predicts_);
-#endif
-
-  version_ = 0;
-  dict_file_ = NULL;
-  lemmas_ = NULL;
-#ifdef ___SYNC_ENABLED___
-  syncs_ = NULL;
-  sync_count_size_ = 0;
-#endif
-  offsets_ = NULL;
-  offsets_by_id_ = NULL;
-  scores_ = NULL;
-  ids_ = NULL;
-#ifdef ___PREDICT_ENABLED___
-  predicts_ = NULL;
-#endif
-
-  memset(&dict_info_, 0, sizeof(dict_info_));
-  lemma_count_left_ = 0;
-  lemma_size_left_ = 0;
-  state_ = USER_DICT_NONE;
-
-  return true;
-}
-
-size_t UserDict::number_of_lemmas() {
-  return dict_info_.lemma_count;
-}
-
-void UserDict::reset_milestones(uint16 from_step, MileStoneHandle from_handle) {
-  return;
-}
-
-MileStoneHandle UserDict::extend_dict(MileStoneHandle from_handle,
-                                      const DictExtPara *dep,
-                                      LmaPsbItem *lpi_items,
-                                      size_t lpi_max, size_t *lpi_num) {
-  if (is_valid_state() == false)
-    return 0;
-
-  bool need_extend = false;
-
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_BEGIN;
-#endif
-  *lpi_num = _get_lpis(dep->splids, dep->splids_extended + 1,
-                       lpi_items, lpi_max, &need_extend);
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_END;
-  LOGD_PERF("extend_dict");
-#endif
-  return ((*lpi_num > 0 || need_extend) ? 1 : 0);
-}
-
-int UserDict::is_fuzzy_prefix_spell_id(
-    const uint16 * id1, uint16 len1, const UserDictSearchable *searchable) {
-  if (len1 < searchable->splids_len)
-    return 0;
-
-  SpellingTrie &spl_trie = SpellingTrie::get_instance();
-  uint32 i = 0;
-  for (i = 0; i < searchable->splids_len; i++) {
-    const char py1 = *spl_trie.get_spelling_str(id1[i]);
-    uint16 off = 8 * (i % 4);
-    const char py2 = ((searchable->signature[i/4] & (0xff << off)) >> off);
-    if (py1 == py2)
-      continue;
-    return 0;
-  }
-  return 1;
-}
-
-int UserDict::fuzzy_compare_spell_id(
-    const uint16 * id1, uint16 len1, const UserDictSearchable *searchable) {
-  if (len1 < searchable->splids_len)
-    return -1;
-  if (len1 > searchable->splids_len)
-    return 1;
-
-  SpellingTrie &spl_trie = SpellingTrie::get_instance();
-  uint32 i = 0;
-  for (i = 0; i < len1; i++) {
-    const char py1 = *spl_trie.get_spelling_str(id1[i]);
-    uint16 off = 8 * (i % 4);
-    const char py2 = ((searchable->signature[i/4] & (0xff << off)) >> off);
-    if (py1 == py2)
-      continue;
-    if (py1 > py2)
-      return 1;
-    return -1;
-  }
-  return 0;
-}
-
-bool UserDict::is_prefix_spell_id(
-    const uint16 * fullids, uint16 fulllen,
-    const UserDictSearchable *searchable) {
-  if (fulllen < searchable->splids_len)
-    return false;
-
-  uint32 i = 0;
-  for (; i < searchable->splids_len; i++) {
-    uint16 start_id = searchable->splid_start[i];
-    uint16 count = searchable->splid_count[i];
-    if (fullids[i] >= start_id && fullids[i] < start_id + count)
-      continue;
-    else
-      return false;
-  }
-  return true;
-}
-
-bool UserDict::equal_spell_id(
-    const uint16 * fullids, uint16 fulllen,
-    const UserDictSearchable *searchable) {
-  if (fulllen != searchable->splids_len)
-    return false;
-
-  uint32 i = 0;
-  for (; i < fulllen; i++) {
-    uint16 start_id = searchable->splid_start[i];
-    uint16 count = searchable->splid_count[i];
-    if (fullids[i] >= start_id && fullids[i] < start_id + count)
-      continue;
-    else
-      return false;
-  }
-  return true;
-}
-
-int32 UserDict::locate_first_in_offsets(const UserDictSearchable * searchable) {
-  int32 begin = 0;
-  int32 end = dict_info_.lemma_count - 1;
-  int32 middle = -1;
-
-  int32 first_prefix = middle;
-  int32 last_matched = middle;
-
-  while (begin <= end) {
-    middle = (begin + end) >> 1;
-    uint32 offset = offsets_[middle];
-    uint8 nchar = get_lemma_nchar(offset);
-    const uint16 * splids = get_lemma_spell_ids(offset);
-    int cmp = fuzzy_compare_spell_id(splids, nchar, searchable);
-    int pre = is_fuzzy_prefix_spell_id(splids, nchar, searchable);
-
-    if (pre)
-      first_prefix = middle;
-
-    if (cmp < 0) {
-      begin = middle + 1;
-    } else if (cmp > 0) {
-      end = middle - 1;
-    } else {
-      end = middle - 1;
-      last_matched = middle;
-    }
-  }
-
-  return first_prefix;
-}
-
-void UserDict::prepare_locate(UserDictSearchable *searchable,
-                             const uint16 *splid_str,
-                             uint16 splid_str_len) {
-  searchable->splids_len = splid_str_len;
-  memset(searchable->signature, 0, sizeof(searchable->signature));
-
-  SpellingTrie &spl_trie = SpellingTrie::get_instance();
-  uint32 i = 0;
-  for (; i < splid_str_len; i++) {
-    if (spl_trie.is_half_id(splid_str[i])) {
-      searchable->splid_count[i] =
-          spl_trie.half_to_full(splid_str[i],
-                                &(searchable->splid_start[i]));
-    } else {
-      searchable->splid_count[i] = 1;
-      searchable->splid_start[i] = splid_str[i];
-    }
-    const unsigned char py = *spl_trie.get_spelling_str(splid_str[i]);
-    searchable->signature[i>>2] |= (py << (8 * (i % 4)));
-  }
-}
-
-size_t UserDict::get_lpis(const uint16 *splid_str, uint16 splid_str_len,
-                          LmaPsbItem *lpi_items, size_t lpi_max) {
-  return _get_lpis(splid_str, splid_str_len, lpi_items, lpi_max, NULL);
-}
-
-size_t UserDict::_get_lpis(const uint16 *splid_str,
-                           uint16 splid_str_len, LmaPsbItem *lpi_items,
-                           size_t lpi_max, bool * need_extend) {
-  bool tmp_extend;
-  if (!need_extend)
-    need_extend = &tmp_extend;
-
-  *need_extend = false;
-
-  if (is_valid_state() == false)
-    return 0;
-  if (lpi_max <= 0)
-    return 0;
-
-  if (0 == pthread_mutex_trylock(&g_mutex_)) {
-    if (load_time_.tv_sec < g_last_update_.tv_sec ||
-      (load_time_.tv_sec == g_last_update_.tv_sec &&
-       load_time_.tv_usec < g_last_update_.tv_usec)) {
-      // Others updated disk file, have to reload
-      pthread_mutex_unlock(&g_mutex_);
-      flush_cache();
-    } else {
-      pthread_mutex_unlock(&g_mutex_);
-    }
-  } else {
-  }
-
-  UserDictSearchable searchable;
-  prepare_locate(&searchable, splid_str, splid_str_len);
-
-  uint32 max_off = dict_info_.lemma_count;
-#ifdef ___CACHE_ENABLED___
-  int32 middle;
-  uint32 start, count;
-  bool cached = cache_hit(&searchable, &start, &count);
-  if (cached) {
-    middle = start;
-    max_off = start + count;
-  } else {
-    middle = locate_first_in_offsets(&searchable);
-    start = middle;
-  }
-#else
-  int32 middle = locate_first_in_offsets(&searchable);
-#endif
-
-  if (middle == -1) {
-#ifdef ___CACHE_ENABLED___
-    if (!cached)
-      cache_push(USER_DICT_MISS_CACHE, &searchable, 0, 0);
-#endif
-    return 0;
-  }
-
-  size_t lpi_current = 0;
-
-  bool fuzzy_break = false;
-  bool prefix_break = false;
-  while ((size_t)middle < max_off && !fuzzy_break && !prefix_break) {
-    if (lpi_current >= lpi_max)
-      break;
-    uint32 offset = offsets_[middle];
-    // Ignore deleted lemmas
-    if (offset & kUserDictOffsetFlagRemove) {
-      middle++;
-      continue;
-    }
-    uint8 nchar = get_lemma_nchar(offset);
-    uint16 * splids = get_lemma_spell_ids(offset);
-#ifdef ___CACHE_ENABLED___
-    if (!cached && 0 != fuzzy_compare_spell_id(splids, nchar, &searchable)) {
-#else
-    if (0 != fuzzy_compare_spell_id(splids, nchar, &searchable)) {
-#endif
-      fuzzy_break = true;
-    }
-
-    if (prefix_break == false) {
-      if (is_fuzzy_prefix_spell_id(splids, nchar, &searchable)) {
-        if (*need_extend == false &&
-            is_prefix_spell_id(splids, nchar, &searchable)) {
-          *need_extend = true;
-        }
-      } else {
-        prefix_break = true;
-      }
-    }
-
-    if (equal_spell_id(splids, nchar, &searchable) == true) {
-      lpi_items[lpi_current].psb = translate_score(scores_[middle]);
-      lpi_items[lpi_current].id = ids_[middle];
-      lpi_items[lpi_current].lma_len = nchar;
-      lpi_current++;
-    }
-    middle++;
-  }
-
-#ifdef ___CACHE_ENABLED___
-  if (!cached) {
-    count = middle - start;
-    cache_push(USER_DICT_CACHE, &searchable, start, count);
-  }
-#endif
-
-  return lpi_current;
-}
-
-uint16 UserDict::get_lemma_str(LemmaIdType id_lemma, char16* str_buf,
-                               uint16 str_max) {
-  if (is_valid_state() == false)
-    return 0;
-  if (is_valid_lemma_id(id_lemma) == false)
-    return 0;
-  uint32 offset = offsets_by_id_[id_lemma - start_id_];
-  uint8 nchar = get_lemma_nchar(offset);
-  char16 * str = get_lemma_word(offset);
-  uint16 m = nchar < str_max -1 ? nchar : str_max - 1;
-  int i = 0;
-  for (; i < m; i++) {
-    str_buf[i] = str[i];
-  }
-  str_buf[i] = 0;
-  return m;
-}
-
-uint16 UserDict::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
-                                  uint16 splids_max, bool arg_valid) {
-  if (is_valid_lemma_id(id_lemma) == false)
-    return 0;
-  uint32 offset = offsets_by_id_[id_lemma - start_id_];
-  uint8 nchar = get_lemma_nchar(offset);
-  const uint16 * ids = get_lemma_spell_ids(offset);
-  int i = 0;
-  for (; i < nchar && i < splids_max; i++)
-    splids[i] = ids[i];
-  return i;
-}
-
-size_t UserDict::predict(const char16 last_hzs[], uint16 hzs_len,
-                         NPredictItem *npre_items, size_t npre_max,
-                         size_t b4_used) {
-  uint32 new_added = 0;
-#ifdef ___PREDICT_ENABLED___
-  int32 end = dict_info_.lemma_count - 1;
-  int j = locate_first_in_predicts((const uint16*)last_hzs, hzs_len);
-  if (j == -1)
-    return 0;
-
-  while (j <= end) {
-    uint32 offset = predicts_[j];
-    // Ignore deleted lemmas
-    if (offset & kUserDictOffsetFlagRemove) {
-      j++;
-      continue;
-    }
-    uint32 nchar = get_lemma_nchar(offset);
-    uint16 * words = get_lemma_word(offset);
-    uint16 * splids = get_lemma_spell_ids(offset);
-
-    if (nchar <= hzs_len) {
-      j++;
-      continue;
-    }
-
-    if (memcmp(words, last_hzs, hzs_len << 1) == 0) {
-      if (new_added >= npre_max) {
-        return new_added;
-      }
-      uint32 cpy_len =
-          (nchar < kMaxPredictSize ? (nchar << 1) : (kMaxPredictSize << 1))
-          - (hzs_len << 1);
-      npre_items[new_added].his_len = hzs_len;
-      npre_items[new_added].psb = get_lemma_score(words, splids, nchar);
-      memcpy(npre_items[new_added].pre_hzs, words + hzs_len, cpy_len);
-      if ((cpy_len >> 1) < kMaxPredictSize) {
-        npre_items[new_added].pre_hzs[cpy_len >> 1] = 0;
-      }
-      new_added++;
-    } else {
-      break;
-    }
-
-    j++;
-  }
-#endif
-  return new_added;
-}
-
-int32 UserDict::locate_in_offsets(char16 lemma_str[], uint16 splid_str[],
-                                  uint16 lemma_len) {
-  int32 max_off = dict_info_.lemma_count;
-
-  UserDictSearchable searchable;
-  prepare_locate(&searchable, splid_str, lemma_len);
-#ifdef ___CACHE_ENABLED___
-  int32 off;
-  uint32 start, count;
-  bool cached = load_cache(&searchable, &start, &count);
-  if (cached) {
-    off = start;
-    max_off = start + count;
-  } else {
-    off = locate_first_in_offsets(&searchable);
-    start = off;
-  }
-#else
-  int32 off = locate_first_in_offsets(&searchable);
-#endif
-
-  if (off == -1) {
-    return off;
-  }
-
-  while (off < max_off) {
-    uint32 offset = offsets_[off];
-    if (offset & kUserDictOffsetFlagRemove) {
-      off++;
-      continue;
-    }
-    uint16 * splids = get_lemma_spell_ids(offset);
-#ifdef ___CACHE_ENABLED___
-    if (!cached && 0 != fuzzy_compare_spell_id(splids, lemma_len, &searchable))
-      break;
-#else
-    if (0 != fuzzy_compare_spell_id(splids, lemma_len, &searchable))
-      break;
-#endif
-    if (equal_spell_id(splids, lemma_len, &searchable) == true) {
-      uint16 * str = get_lemma_word(offset);
-      uint32 i = 0;
-      for (i = 0; i < lemma_len; i++) {
-        if (str[i] == lemma_str[i])
-          continue;
-        break;
-      }
-      if (i < lemma_len) {
-        off++;
-        continue;
-      }
-#ifdef ___CACHE_ENABLED___
-      // No need to save_cache here, since current function is invoked by
-      // put_lemma. It's rarely possible for a user input same lemma twice.
-      // That means first time user type a new lemma, it is newly added into
-      // user dictionary, then it's possible that user type the same lemma
-      // again.
-      // Another reason save_cache can not be invoked here is this function
-      // aborts when lemma is found, and it never knows the count.
-#endif
-      return off;
-    }
-    off++;
-  }
-
-  return -1;
-}
-
-#ifdef ___PREDICT_ENABLED___
-uint32 UserDict::locate_where_to_insert_in_predicts(
-    const uint16 * words, int lemma_len) {
-  int32 begin = 0;
-  int32 end = dict_info_.lemma_count - 1;
-  int32 middle = end;
-
-  uint32 last_matched = middle;
-
-  while (begin <= end) {
-    middle = (begin + end) >> 1;
-    uint32 offset = offsets_[middle];
-    uint8 nchar = get_lemma_nchar(offset);
-    const uint16 * ws = get_lemma_word(offset);
-
-    uint32 minl = nchar < lemma_len ? nchar : lemma_len;
-    uint32 k = 0;
-    int cmp = 0;
-
-    for (; k < minl; k++) {
-      if (ws[k] < words[k]) {
-        cmp = -1;
-        break;
-      } else if (ws[k] > words[k]) {
-        cmp = 1;
-        break;
-      }
-    }
-    if (cmp == 0) {
-      if (nchar < lemma_len)
-        cmp = -1;
-      else if (nchar > lemma_len)
-        cmp = 1;
-    }
-
-    if (cmp < 0) {
-      begin = middle + 1;
-      last_matched = middle;
-    } else if (cmp > 0) {
-      end = middle - 1;
-    } else {
-      end = middle - 1;
-      last_matched = middle;
-    }
-  }
-
-  return last_matched;
-}
-
-int32 UserDict::locate_first_in_predicts(const uint16 * words, int lemma_len) {
-  int32 begin = 0;
-  int32 end = dict_info_.lemma_count - 1;
-  int32 middle = -1;
-
-  int32 last_matched = middle;
-
-  while (begin <= end) {
-    middle = (begin + end) >> 1;
-    uint32 offset = offsets_[middle];
-    uint8 nchar = get_lemma_nchar(offset);
-    const uint16 * ws = get_lemma_word(offset);
-
-    uint32 minl = nchar < lemma_len ? nchar : lemma_len;
-    uint32 k = 0;
-    int cmp = 0;
-
-    for (; k < minl; k++) {
-      if (ws[k] < words[k]) {
-        cmp = -1;
-        break;
-      } else if (ws[k] > words[k]) {
-        cmp = 1;
-        break;
-      }
-    }
-    if (cmp == 0) {
-      if (nchar >= lemma_len)
-        last_matched = middle;
-      if (nchar < lemma_len)
-        cmp = -1;
-      else if (nchar > lemma_len)
-        cmp = 1;
-    }
-
-    if (cmp < 0) {
-      begin = middle + 1;
-    } else if (cmp > 0) {
-      end = middle - 1;
-    } else {
-      end = middle - 1;
-    }
-  }
-
-  return last_matched;
-}
-
-#endif
-
-LemmaIdType UserDict::get_lemma_id(char16 lemma_str[], uint16 splids[],
-                                   uint16 lemma_len) {
-  int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
-  if (off == -1) {
-    return 0;
-  }
-
-  return ids_[off];
-}
-
-LmaScoreType UserDict::get_lemma_score(LemmaIdType lemma_id) {
-  if (is_valid_state() == false)
-    return 0;
-  if (is_valid_lemma_id(lemma_id) == false)
-    return 0;
-
-  return translate_score(_get_lemma_score(lemma_id));
-}
-
-LmaScoreType UserDict::get_lemma_score(char16 lemma_str[], uint16 splids[],
-                                uint16 lemma_len) {
-  if (is_valid_state() == false)
-    return 0;
-  return translate_score(_get_lemma_score(lemma_str, splids, lemma_len));
-}
-
-int UserDict::_get_lemma_score(LemmaIdType lemma_id) {
-  if (is_valid_state() == false)
-    return 0;
-  if (is_valid_lemma_id(lemma_id) == false)
-    return 0;
-
-  uint32 offset = offsets_by_id_[lemma_id - start_id_];
-
-  uint32 nchar = get_lemma_nchar(offset);
-  uint16 * spl = get_lemma_spell_ids(offset);
-  uint16 * wrd = get_lemma_word(offset);
-
-  int32 off = locate_in_offsets(wrd, spl, nchar);
-  if (off == -1) {
-    return 0;
-  }
-
-  return scores_[off];
-}
-
-int UserDict::_get_lemma_score(char16 lemma_str[], uint16 splids[],
-                                uint16 lemma_len) {
-  if (is_valid_state() == false)
-    return 0;
-
-  int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
-  if (off == -1) {
-    return 0;
-  }
-
-  return scores_[off];
-}
-
-#ifdef ___SYNC_ENABLED___
-void UserDict::remove_lemma_from_sync_list(uint32 offset) {
-  offset &= kUserDictOffsetMask;
-  uint32 i = 0;
-  for (; i < dict_info_.sync_count; i++) {
-    unsigned int off = (syncs_[i] & kUserDictOffsetMask);
-    if (off == offset)
-      break;
-  }
-  if (i < dict_info_.sync_count) {
-    syncs_[i] = syncs_[dict_info_.sync_count - 1];
-    dict_info_.sync_count--;
-  }
-}
-#endif
-
-#ifdef ___PREDICT_ENABLED___
-void UserDict::remove_lemma_from_predict_list(uint32 offset) {
-  offset &= kUserDictOffsetMask;
-  uint32 i = 0;
-  for (; i < dict_info_.lemma_count; i++) {
-    unsigned int off = (predicts_[i] & kUserDictOffsetMask);
-    if (off == offset) {
-      predicts_[i] |= kUserDictOffsetFlagRemove;
-      break;
-    }
-  }
-}
-#endif
-
-bool UserDict::remove_lemma_by_offset_index(int offset_index) {
-  if (is_valid_state() == false)
-    return 0;
-
-  int32 off = offset_index;
-  if (off == -1) {
-    return false;
-  }
-
-  uint32 offset = offsets_[off];
-  uint32 nchar = get_lemma_nchar(offset);
-
-  offsets_[off] |= kUserDictOffsetFlagRemove;
-
-#ifdef ___SYNC_ENABLED___
-  // Remove corresponding sync item
-  remove_lemma_from_sync_list(offset);
-#endif
-
-#ifdef ___PREDICT_ENABLED___
-  remove_lemma_from_predict_list(offset);
-#endif
-  dict_info_.free_count++;
-  dict_info_.free_size += (2 + (nchar << 2));
-
-  if (state_ < USER_DICT_OFFSET_DIRTY)
-    state_ = USER_DICT_OFFSET_DIRTY;
-  return true;
-}
-
-bool UserDict::remove_lemma(LemmaIdType lemma_id) {
-  if (is_valid_state() == false)
-    return 0;
-  if (is_valid_lemma_id(lemma_id) == false)
-    return false;
-  uint32 offset = offsets_by_id_[lemma_id - start_id_];
-
-  uint32 nchar = get_lemma_nchar(offset);
-  uint16 * spl = get_lemma_spell_ids(offset);
-  uint16 * wrd = get_lemma_word(offset);
-
-  int32 off = locate_in_offsets(wrd, spl, nchar);
-
-  return remove_lemma_by_offset_index(off);
-}
-
-void UserDict::flush_cache() {
-  LemmaIdType start_id = start_id_;
-  const char * file = strdup(dict_file_);
-  if (!file)
-    return;
-  close_dict();
-  load_dict(file, start_id, kUserDictIdEnd);
-  free((void*)file);
-#ifdef ___CACHE_ENABLED___
-  cache_init();
-#endif
-  return;
-}
-
-bool UserDict::reset(const char *file) {
-  FILE *fp = fopen(file, "w+");
-  if (!fp) {
-    return false;
-  }
-  uint32 version = kUserDictVersion;
-  size_t wred = fwrite(&version, 1, 4, fp);
-  UserDictInfo info;
-  memset(&info, 0, sizeof(info));
-  // By default, no limitation for lemma count and size
-  // thereby, reclaim_ratio is never used
-  wred += fwrite(&info, 1, sizeof(info), fp);
-  if (wred != sizeof(info) + sizeof(version)) {
-    fclose(fp);
-    unlink(file);
-    return false;
-  }
-  fclose(fp);
-  return true;
-}
-
-bool UserDict::validate(const char *file) {
-  // b is ignored in POSIX compatible os including Linux
-  // while b is important flag for Windows to specify binary mode
-  FILE *fp = fopen(file, "rb");
-  if (!fp) {
-    return false;
-  }
-
-  size_t size;
-  size_t readed;
-  uint32 version;
-  UserDictInfo dict_info;
-
-  // validate
-  int err = fseek(fp, 0, SEEK_END);
-  if (err) {
-    goto error;
-  }
-
-  size = ftell(fp);
-  if (size < 4 + sizeof(dict_info)) {
-    goto error;
-  }
-
-  err = fseek(fp, 0, SEEK_SET);
-  if (err) {
-    goto error;
-  }
-
-  readed = fread(&version, 1, sizeof(version), fp);
-  if (readed < sizeof(version)) {
-    goto error;
-  }
-  if (version != kUserDictVersion) {
-    goto error;
-  }
-
-  err = fseek(fp, -1 * sizeof(dict_info), SEEK_END);
-  if (err) {
-    goto error;
-  }
-
-  readed = fread(&dict_info, 1, sizeof(dict_info), fp);
-  if (readed != sizeof(dict_info)) {
-    goto error;
-  }
-
-  if (size != get_dict_file_size(&dict_info)) {
-    goto error;
-  }
-
-  fclose(fp);
-  return true;
-
- error:
-  fclose(fp);
-  return false;
-}
-
-bool UserDict::load(const char *file, LemmaIdType start_id) {
-  if (0 != pthread_mutex_trylock(&g_mutex_)) {
-    return false;
-  }
-  // b is ignored in POSIX compatible os including Linux
-  // while b is important flag for Windows to specify binary mode
-  FILE *fp = fopen(file, "rb");
-  if (!fp) {
-    pthread_mutex_unlock(&g_mutex_);
-    return false;
-  }
-
-  size_t readed, toread;
-  UserDictInfo dict_info;
-  uint8 *lemmas = NULL;
-  uint32 *offsets = NULL;
-#ifdef ___SYNC_ENABLED___
-  uint32 *syncs = NULL;
-#endif
-  uint32 *scores = NULL;
-  uint32 *ids = NULL;
-  uint32 *offsets_by_id = NULL;
-#ifdef ___PREDICT_ENABLED___
-  uint32 *predicts = NULL;
-#endif
-  size_t i;
-  int err;
-
-  err = fseek(fp, -1 * sizeof(dict_info), SEEK_END);
-  if (err) goto error;
-
-  readed = fread(&dict_info, 1, sizeof(dict_info), fp);
-  if (readed != sizeof(dict_info)) goto error;
-
-  lemmas = (uint8 *)malloc(
-      dict_info.lemma_size +
-      (kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2))));
-
-  if (!lemmas) goto error;
-
-  offsets = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
-  if (!offsets) goto error;
-
-#ifdef ___PREDICT_ENABLED___
-  predicts = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
-  if (!predicts) goto error;
-#endif
-
-#ifdef ___SYNC_ENABLED___
-  syncs = (uint32 *)malloc((dict_info.sync_count + kUserDictPreAlloc) << 2);
-  if (!syncs) goto error;
-#endif
-
-  scores = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
-  if (!scores) goto error;
-
-  ids = (uint32 *)malloc((dict_info.lemma_count + kUserDictPreAlloc) << 2);
-  if (!ids) goto error;
-
-  offsets_by_id = (uint32 *)malloc(
-      (dict_info.lemma_count + kUserDictPreAlloc) << 2);
-  if (!offsets_by_id) goto error;
-
-  err = fseek(fp, 4, SEEK_SET);
-  if (err) goto error;
-
-  readed = 0;
-  while (readed < dict_info.lemma_size && !ferror(fp) && !feof(fp)) {
-    readed += fread(lemmas + readed, 1, dict_info.lemma_size - readed, fp);
-  }
-  if (readed < dict_info.lemma_size)
-    goto error;
-
-  toread = (dict_info.lemma_count << 2);
-  readed = 0;
-  while (readed < toread && !ferror(fp) && !feof(fp)) {
-    readed += fread((((uint8*)offsets) + readed), 1, toread - readed, fp);
-  }
-  if (readed < toread)
-    goto error;
-
-#ifdef ___PREDICT_ENABLED___
-  toread = (dict_info.lemma_count << 2);
-  readed = 0;
-  while (readed < toread && !ferror(fp) && !feof(fp)) {
-    readed += fread((((uint8*)predicts) + readed), 1, toread - readed, fp);
-  }
-  if (readed < toread)
-    goto error;
-#endif
-
-  readed = 0;
-  while (readed < toread && !ferror(fp) && !feof(fp)) {
-    readed += fread((((uint8*)scores) + readed), 1, toread - readed, fp);
-  }
-  if (readed < toread)
-    goto error;
-
-#ifdef ___SYNC_ENABLED___
-  toread = (dict_info.sync_count << 2);
-  readed = 0;
-  while (readed < toread && !ferror(fp) && !feof(fp)) {
-    readed += fread((((uint8*)syncs) + readed), 1, toread - readed, fp);
-  }
-  if (readed < toread)
-    goto error;
-#endif
-
-  for (i = 0; i < dict_info.lemma_count; i++) {
-    ids[i] = start_id + i;
-    offsets_by_id[i] = offsets[i];
-  }
-
-  lemmas_ = lemmas;
-  offsets_ = offsets;
-#ifdef ___SYNC_ENABLED___
-  syncs_ = syncs;
-  sync_count_size_ = dict_info.sync_count + kUserDictPreAlloc;
-#endif
-  offsets_by_id_ = offsets_by_id;
-  scores_ = scores;
-  ids_ = ids;
-#ifdef ___PREDICT_ENABLED___
-  predicts_ = predicts;
-#endif
-  lemma_count_left_ = kUserDictPreAlloc;
-  lemma_size_left_ = kUserDictPreAlloc * (2 + (kUserDictAverageNchar << 2));
-  memcpy(&dict_info_, &dict_info, sizeof(dict_info));
-  state_ = USER_DICT_SYNC;
-
-  fclose(fp);
-
-  pthread_mutex_unlock(&g_mutex_);
-  return true;
-
- error:
-  if (lemmas) free(lemmas);
-  if (offsets) free(offsets);
-#ifdef ___SYNC_ENABLED___
-  if (syncs) free(syncs);
-#endif
-  if (scores) free(scores);
-  if (ids) free(ids);
-  if (offsets_by_id) free(offsets_by_id);
-#ifdef ___PREDICT_ENABLED___
-  if (predicts) free(predicts);
-#endif
-  fclose(fp);
-  pthread_mutex_unlock(&g_mutex_);
-  return false;
-}
-
-void UserDict::write_back() {
-  // XXX write back is only allowed from close_dict due to thread-safe sake
-  if (state_ == USER_DICT_NONE || state_ == USER_DICT_SYNC)
-    return;
-  int fd = open(dict_file_, O_WRONLY);
-  if (fd == -1)
-    return;
-  switch (state_) {
-    case USER_DICT_DEFRAGMENTED:
-      write_back_all(fd);
-      break;
-    case USER_DICT_LEMMA_DIRTY:
-      write_back_lemma(fd);
-      break;
-    case USER_DICT_OFFSET_DIRTY:
-      write_back_offset(fd);
-      break;
-    case USER_DICT_SCORE_DIRTY:
-      write_back_score(fd);
-      break;
-#ifdef ___SYNC_ENABLED___
-    case USER_DICT_SYNC_DIRTY:
-      write_back_sync(fd);
-      break;
-#endif
-    default:
-      break;
-  }
-  // It seems truncate is not need on Linux, Windows except Mac
-  // I am doing it here anyway for safety.
-  off_t cur = lseek(fd, 0, SEEK_CUR);
-  ftruncate(fd, cur);
-  close(fd);
-  state_ = USER_DICT_SYNC;
-}
-
-#ifdef ___SYNC_ENABLED___
-void UserDict::write_back_sync(int fd) {
-  int err = lseek(fd, 4 + dict_info_.lemma_size
-                  + (dict_info_.lemma_count << 3)
-#ifdef ___PREDICT_ENABLED___
-                  + (dict_info_.lemma_count << 2)
-#endif
-                  , SEEK_SET);
-  if (err == -1)
-    return;
-  write(fd, syncs_, dict_info_.sync_count << 2);
-  write(fd, &dict_info_, sizeof(dict_info_));
-}
-#endif
-
-void UserDict::write_back_offset(int fd) {
-  int err = lseek(fd, 4 + dict_info_.lemma_size, SEEK_SET);
-  if (err == -1)
-    return;
-  write(fd, offsets_, dict_info_.lemma_count << 2);
-#ifdef ___PREDICT_ENABLED___
-  write(fd, predicts_, dict_info_.lemma_count << 2);
-#endif
-  write(fd, scores_, dict_info_.lemma_count << 2);
-#ifdef ___SYNC_ENABLED___
-  write(fd, syncs_, dict_info_.sync_count << 2);
-#endif
-  write(fd, &dict_info_, sizeof(dict_info_));
-}
-
-void UserDict::write_back_score(int fd) {
-  int err = lseek(fd, 4 + dict_info_.lemma_size
-                  + (dict_info_.lemma_count << 2)
-#ifdef ___PREDICT_ENABLED___
-                  + (dict_info_.lemma_count << 2)
-#endif
-                  , SEEK_SET);
-  if (err == -1)
-    return;
-  write(fd, scores_, dict_info_.lemma_count << 2);
-#ifdef ___SYNC_ENABLED___
-  write(fd, syncs_, dict_info_.sync_count << 2);
-#endif
-  write(fd, &dict_info_, sizeof(dict_info_));
-}
-
-void UserDict::write_back_lemma(int fd) {
-  int err = lseek(fd, 4, SEEK_SET);
-  if (err == -1)
-    return;
-  // New lemmas are always appended, no need to write whole lemma block
-  size_t need_write = kUserDictPreAlloc *
-      (2 + (kUserDictAverageNchar << 2)) - lemma_size_left_;
-  err = lseek(fd, dict_info_.lemma_size - need_write, SEEK_CUR);
-  if (err == -1)
-    return;
-  write(fd, lemmas_ + dict_info_.lemma_size - need_write, need_write);
-
-  write(fd, offsets_,  dict_info_.lemma_count << 2);
-#ifdef ___PREDICT_ENABLED___
-  write(fd, predicts_,  dict_info_.lemma_count << 2);
-#endif
-  write(fd, scores_, dict_info_.lemma_count << 2);
-#ifdef ___SYNC_ENABLED___
-  write(fd, syncs_, dict_info_.sync_count << 2);
-#endif
-  write(fd, &dict_info_, sizeof(dict_info_));
-}
-
-void UserDict::write_back_all(int fd) {
-  // XXX lemma_size is handled differently in writeall
-  // and writelemma. I update lemma_size and lemma_count in different
-  // places for these two cases. Should fix it to make it consistent.
-  int err = lseek(fd, 4, SEEK_SET);
-  if (err == -1)
-    return;
-  write(fd, lemmas_, dict_info_.lemma_size);
-  write(fd, offsets_, dict_info_.lemma_count << 2);
-#ifdef ___PREDICT_ENABLED___
-  write(fd, predicts_, dict_info_.lemma_count << 2);
-#endif
-  write(fd, scores_, dict_info_.lemma_count << 2);
-#ifdef ___SYNC_ENABLED___
-  write(fd, syncs_, dict_info_.sync_count << 2);
-#endif
-  write(fd, &dict_info_, sizeof(dict_info_));
-}
-
-#ifdef ___CACHE_ENABLED___
-bool UserDict::load_cache(UserDictSearchable *searchable,
-                          uint32 *offset, uint32 *length) {
-  UserDictCache *cache = &caches_[searchable->splids_len - 1];
-  if (cache->head == cache->tail)
-    return false;
-
-  uint16 j, sig_len = kMaxLemmaSize / 4;
-  uint16 i = cache->head;
-  while (1) {
-    j = 0;
-    for (; j < sig_len; j++) {
-      if (cache->signatures[i][j] != searchable->signature[j])
-        break;
-    }
-    if (j < sig_len) {
-      i++;
-      if (i >= kUserDictCacheSize)
-        i -= kUserDictCacheSize;
-      if (i == cache->tail)
-        break;
-      continue;
-    }
-    *offset = cache->offsets[i];
-    *length = cache->lengths[i];
-    return true;
-  }
-  return false;
-}
-
-void UserDict::save_cache(UserDictSearchable *searchable,
-                          uint32 offset, uint32 length) {
-  UserDictCache *cache = &caches_[searchable->splids_len - 1];
-  uint16 next = cache->tail;
-
-  cache->offsets[next] = offset;
-  cache->lengths[next] = length;
-  uint16 sig_len = kMaxLemmaSize / 4;
-  uint16 j = 0;
-  for (; j < sig_len; j++) {
-    cache->signatures[next][j] = searchable->signature[j];
-  }
-
-  if (++next >= kUserDictCacheSize) {
-    next -= kUserDictCacheSize;
-  }
-  if (next == cache->head) {
-    cache->head++;
-    if (cache->head >= kUserDictCacheSize) {
-      cache->head -= kUserDictCacheSize;
-    }
-  }
-  cache->tail = next;
-}
-
-void UserDict::reset_cache() {
-  memset(caches_, 0, sizeof(caches_));
-}
-
-bool UserDict::load_miss_cache(UserDictSearchable *searchable) {
-  UserDictMissCache *cache = &miss_caches_[searchable->splids_len - 1];
-  if (cache->head == cache->tail)
-    return false;
-
-  uint16 j, sig_len = kMaxLemmaSize / 4;
-  uint16 i = cache->head;
-  while (1) {
-    j = 0;
-    for (; j < sig_len; j++) {
-      if (cache->signatures[i][j] != searchable->signature[j])
-        break;
-    }
-    if (j < sig_len) {
-      i++;
-      if (i >= kUserDictMissCacheSize)
-        i -= kUserDictMissCacheSize;
-      if (i == cache->tail)
-        break;
-      continue;
-    }
-    return true;
-  }
-  return false;
-}
-
-void UserDict::save_miss_cache(UserDictSearchable *searchable) {
-  UserDictMissCache *cache = &miss_caches_[searchable->splids_len - 1];
-  uint16 next = cache->tail;
-
-  uint16 sig_len = kMaxLemmaSize / 4;
-  uint16 j = 0;
-  for (; j < sig_len; j++) {
-    cache->signatures[next][j] = searchable->signature[j];
-  }
-
-  if (++next >= kUserDictMissCacheSize) {
-    next -= kUserDictMissCacheSize;
-  }
-  if (next == cache->head) {
-    cache->head++;
-    if (cache->head >= kUserDictMissCacheSize) {
-      cache->head -= kUserDictMissCacheSize;
-    }
-  }
-  cache->tail = next;
-}
-
-void UserDict::reset_miss_cache() {
-  memset(miss_caches_, 0, sizeof(miss_caches_));
-}
-
-void UserDict::cache_init() {
-  reset_cache();
-  reset_miss_cache();
-}
-
-bool UserDict::cache_hit(UserDictSearchable *searchable,
-                         uint32 *offset, uint32 *length) {
-  bool hit = load_miss_cache(searchable);
-  if (hit) {
-    *offset = 0;
-    *length = 0;
-    return true;
-  }
-  hit = load_cache(searchable, offset, length);
-  if (hit) {
-    return true;
-  }
-  return false;
-}
-
-void UserDict::cache_push(UserDictCacheType type,
-                         UserDictSearchable *searchable,
-                         uint32 offset, uint32 length) {
-  switch (type) {
-    case USER_DICT_MISS_CACHE:
-      save_miss_cache(searchable);
-      break;
-    case USER_DICT_CACHE:
-      save_cache(searchable, offset, length);
-      break;
-    default:
-      break;
-  }
-}
-
-#endif
-
-void UserDict::defragment(void) {
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_BEGIN;
-#endif
-  if (is_valid_state() == false)
-    return;
-  // Fixup offsets_, set REMOVE flag to lemma's flag if needed
-  size_t first_freed = 0;
-  size_t first_inuse = 0;
-  while (first_freed < dict_info_.lemma_count) {
-    // Find first freed offset
-    while ((offsets_[first_freed] & kUserDictOffsetFlagRemove) == 0 &&
-            first_freed < dict_info_.lemma_count) {
-      first_freed++;
-    }
-    if (first_freed < dict_info_.lemma_count) {
-      // Save REMOVE flag to lemma flag
-      int off = offsets_[first_freed];
-      set_lemma_flag(off, kUserDictLemmaFlagRemove);
-    } else {
-      break;
-    }
-    // Find first inuse offse after first_freed
-    first_inuse = first_freed + 1;
-    while ((offsets_[first_inuse] & kUserDictOffsetFlagRemove) &&
-           (first_inuse < dict_info_.lemma_count)) {
-      // Save REMOVE flag to lemma flag
-      int off = offsets_[first_inuse];
-      set_lemma_flag(off, kUserDictLemmaFlagRemove);
-      first_inuse++;
-    }
-    if (first_inuse >= dict_info_.lemma_count) {
-      break;
-    }
-    // Swap offsets_
-    int tmp = offsets_[first_inuse];
-    offsets_[first_inuse] = offsets_[first_freed];
-    offsets_[first_freed] = tmp;
-    // Move scores_, no need to swap
-    tmp = scores_[first_inuse];
-    scores_[first_inuse] = scores_[first_freed];
-    scores_[first_freed] = tmp;
-    // Swap ids_
-    LemmaIdType tmpid = ids_[first_inuse];
-    ids_[first_inuse] = ids_[first_freed];
-    ids_[first_freed] = tmpid;
-    // Go on
-    first_freed++;
-  }
-#ifdef ___PREDICT_ENABLED___
-  // Fixup predicts_
-  first_freed = 0;
-  first_inuse = 0;
-  while (first_freed < dict_info_.lemma_count) {
-    // Find first freed offset
-    while ((predicts_[first_freed] & kUserDictOffsetFlagRemove) == 0 &&
-            first_freed < dict_info_.lemma_count) {
-      first_freed++;
-    }
-    if (first_freed >= dict_info_.lemma_count)
-      break;
-    // Find first inuse offse after first_freed
-    first_inuse = first_freed + 1;
-    while ((predicts_[first_inuse] & kUserDictOffsetFlagRemove)
-           && (first_inuse < dict_info_.lemma_count)) {
-      first_inuse++;
-    }
-    if (first_inuse >= dict_info_.lemma_count) {
-      break;
-    }
-    // Swap offsets_
-    int tmp = predicts_[first_inuse];
-    predicts_[first_inuse] = predicts_[first_freed];
-    predicts_[first_freed] = tmp;
-    // Go on
-    first_freed++;
-  }
-#endif
-  dict_info_.lemma_count = first_freed;
-  // Fixup lemmas_
-  size_t begin = 0;
-  size_t end = 0;
-  size_t dst = 0;
-  int total_size = dict_info_.lemma_size + lemma_size_left_;
-  int total_count = dict_info_.lemma_count + lemma_count_left_;
-  size_t real_size = total_size - lemma_size_left_;
-  while (dst < real_size) {
-    unsigned char flag = get_lemma_flag(dst);
-    unsigned char nchr = get_lemma_nchar(dst);
-    if ((flag & kUserDictLemmaFlagRemove) == 0) {
-      dst += nchr * 4 + 2;
-      continue;
-    }
-    break;
-  }
-  if (dst >= real_size)
-    return;
-
-  end = dst;
-  while (end < real_size) {
-    begin = end + get_lemma_nchar(end) * 4 + 2;
- repeat:
-    // not used any more
-    if (begin >= real_size)
-      break;
-    unsigned char flag = get_lemma_flag(begin);
-    unsigned char nchr = get_lemma_nchar(begin);
-    if (flag & kUserDictLemmaFlagRemove) {
-      begin += nchr * 4 + 2;
-      goto repeat;
-    }
-    end = begin + nchr * 4 + 2;
-    while (end < real_size) {
-      unsigned char eflag = get_lemma_flag(end);
-      unsigned char enchr = get_lemma_nchar(end);
-      if ((eflag & kUserDictLemmaFlagRemove) == 0) {
-        end += enchr * 4 + 2;
-        continue;
-      }
-      break;
-    }
-    memmove(lemmas_ + dst, lemmas_ + begin, end - begin);
-    for (size_t j = 0; j < dict_info_.lemma_count; j++) {
-      if (offsets_[j] >= begin && offsets_[j] < end) {
-        offsets_[j] -= (begin - dst);
-        offsets_by_id_[ids_[j] - start_id_] = offsets_[j];
-      }
-#ifdef ___PREDICT_ENABLED___
-      if (predicts_[j] >= begin && predicts_[j] < end) {
-        predicts_[j] -= (begin - dst);
-      }
-#endif
-    }
-#ifdef ___SYNC_ENABLED___
-    for (size_t j = 0; j < dict_info_.sync_count; j++) {
-      if (syncs_[j] >= begin && syncs_[j] < end) {
-        syncs_[j] -= (begin - dst);
-      }
-    }
-#endif
-    dst += (end - begin);
-  }
-
-  dict_info_.free_count = 0;
-  dict_info_.free_size = 0;
-  dict_info_.lemma_size = dst;
-  lemma_size_left_ = total_size - dict_info_.lemma_size;
-  lemma_count_left_ = total_count - dict_info_.lemma_count;
-
-  // XXX Without following code,
-  // offsets_by_id_ is not reordered.
-  // That's to say, all removed lemmas' ids are not collected back.
-  // There may not be room for addition of new lemmas due to
-  // offsests_by_id_ reason, although lemma_size_left_ is fixed.
-  // By default, we do want defrag as fast as possible, because
-  // during defrag procedure, other peers can not write new lemmas
-  // to user dictionary file.
-  // XXX If write-back is invoked immediately after
-  // this defragment, no need to fix up following in-mem data.
-  for (uint32 i = 0; i < dict_info_.lemma_count; i++) {
-    ids_[i] = start_id_ + i;
-    offsets_by_id_[i] = offsets_[i];
-  }
-
-  state_ = USER_DICT_DEFRAGMENTED;
-
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_END;
-  LOGD_PERF("defragment");
-#endif
-}
-
-#ifdef ___SYNC_ENABLED___
-void UserDict::clear_sync_lemmas(unsigned int start, unsigned int end) {
-  if (is_valid_state() == false)
-    return;
-  if (end > dict_info_.sync_count)
-    end = dict_info_.sync_count;
-  memmove(syncs_ + start, syncs_ + end, (dict_info_.sync_count - end) << 2);
-  dict_info_.sync_count -= (end - start);
-  if (state_ < USER_DICT_SYNC_DIRTY)
-    state_ = USER_DICT_SYNC_DIRTY;
-}
-
-int UserDict::get_sync_count() {
-  if (is_valid_state() == false)
-    return 0;
-  return dict_info_.sync_count;
-}
-
-LemmaIdType UserDict::put_lemma_no_sync(char16 lemma_str[], uint16 splids[],
-                        uint16 lemma_len, uint16 count, uint64 lmt) {
-  int again = 0;
- begin:
-  LemmaIdType id;
-  uint32 * syncs_bak = syncs_;
-  syncs_ = NULL;
-  id = _put_lemma(lemma_str, splids, lemma_len, count, lmt);
-  syncs_ = syncs_bak;
-  if (id == 0 && again == 0) {
-    if ((dict_info_.limit_lemma_count > 0 &&
-        dict_info_.lemma_count >= dict_info_.limit_lemma_count)
-        || (dict_info_.limit_lemma_size > 0 &&
-            dict_info_.lemma_size + (2 + (lemma_len << 2))
-            > dict_info_.limit_lemma_size)) {
-      // XXX Always reclaim and defrag in sync code path
-      //     sync thread is background thread and ok with heavy work
-      reclaim();
-      defragment();
-      flush_cache();
-      again = 1;
-      goto begin;
-    }
-  }
-  return id;
-}
-
-int UserDict::put_lemmas_no_sync_from_utf16le_string(char16 * lemmas, int len) {
-  int newly_added = 0;
-
-  SpellingParser * spl_parser = new SpellingParser();
-  if (!spl_parser) {
-    return 0;
-  }
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_BEGIN;
-#endif
-  char16 *ptr = lemmas;
-
-  // Extract pinyin,words,frequence,last_mod_time
-  char16 * p = ptr, * py16 = ptr;
-  char16 * hz16 = NULL;
-  int py16_len = 0;
-  uint16 splid[kMaxLemmaSize];
-  int splid_len = 0;
-  int hz16_len = 0;
-  char16 * fr16 = NULL;
-  int fr16_len = 0;
-
-  while (p - ptr < len) {
-    // Pinyin
-    py16 = p;
-    splid_len = 0;
-    while (*p != 0x2c && (p - ptr) < len) {
-      if (*p == 0x20)
-        splid_len++;
-      p++;
-    }
-    splid_len++;
-    if (p - ptr == len)
-      break;
-    py16_len = p - py16;
-    if (kMaxLemmaSize < splid_len) {
-      break;
-    }
-    bool is_pre;
-    int splidl = spl_parser->splstr16_to_idxs_f(
-        py16, py16_len, splid, NULL, kMaxLemmaSize, is_pre);
-    if (splidl != splid_len)
-      break;
-    // Phrase
-    hz16 = ++p;
-    while (*p != 0x2c && (p - ptr) < len) {
-      p++;
-    }
-    hz16_len = p - hz16;
-    if (hz16_len != splid_len)
-      break;
-    // Frequency
-    fr16 = ++p;
-    fr16_len = 0;
-    while (*p != 0x2c && (p - ptr) < len) {
-      p++;
-    }
-    fr16_len = p - fr16;
-    uint32 intf = (uint32)utf16le_atoll(fr16, fr16_len);
-    // Last modified time
-    fr16 = ++p;
-    fr16_len = 0;
-    while (*p != 0x3b && (p - ptr) < len) {
-      p++;
-    }
-    fr16_len = p - fr16;
-    uint64 last_mod = utf16le_atoll(fr16, fr16_len);
-
-    put_lemma_no_sync(hz16, splid, splid_len, intf, last_mod);
-    newly_added++;
-
-    p++;
-  }
-
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_END;
-  LOGD_PERF("put_lemmas_no_sync_from_utf16le_string");
-#endif
-  return newly_added;
-}
-
-int UserDict::get_sync_lemmas_in_utf16le_string_from_beginning(
-    char16 * str, int size, int * count) {
-  int len = 0;
-  *count = 0;
-
-  int left_len = size;
-
-  if (is_valid_state() == false)
-    return len;
-
-  SpellingTrie * spl_trie = &SpellingTrie::get_instance();
-  if (!spl_trie) {
-    return 0;
-  }
-
-  uint32 i;
-  for (i = 0; i < dict_info_.sync_count; i++) {
-    int offset = syncs_[i];
-    uint32 nchar = get_lemma_nchar(offset);
-    uint16 *spl = get_lemma_spell_ids(offset);
-    uint16 *wrd = get_lemma_word(offset);
-    int score = _get_lemma_score(wrd, spl, nchar);
-
-    static char score_temp[32], *pscore_temp = score_temp;
-    static char16 temp[256], *ptemp = temp;
-
-    pscore_temp = score_temp;
-    ptemp = temp;
-
-    uint32 j;
-    // Add pinyin
-    for (j = 0; j < nchar; j++) {
-      int ret_len = spl_trie->get_spelling_str16(
-          spl[j], ptemp, temp + sizeof(temp) - ptemp);
-      if (ret_len <= 0)
-        break;
-      ptemp += ret_len;
-      if (ptemp < temp + sizeof(temp) - 1) {
-        *(ptemp++) = ' ';
-      } else {
-        j = 0;
-        break;
-      }
-    }
-    if (j < nchar) {
-      continue;
-    }
-    ptemp--;
-    if (ptemp < temp + sizeof(temp) - 1) {
-      *(ptemp++) = ',';
-    } else {
-      continue;
-    }
-    // Add phrase
-    for (j = 0; j < nchar; j++) {
-      if (ptemp < temp + sizeof(temp) - 1) {
-        *(ptemp++) = wrd[j];
-      } else {
-        break;
-      }
-    }
-    if (j < nchar) {
-      continue;
-    }
-    if (ptemp < temp + sizeof(temp) - 1) {
-      *(ptemp++) = ',';
-    } else {
-      continue;
-    }
-    // Add frequency
-    uint32 intf = extract_score_freq(score);
-    int ret_len = utf16le_lltoa(intf, ptemp, temp + sizeof(temp) - ptemp);
-    if (ret_len <= 0)
-      continue;
-    ptemp += ret_len;
-    if (ptemp < temp + sizeof(temp) - 1) {
-      *(ptemp++) = ',';
-    } else {
-      continue;
-    }
-    // Add last modified time
-    uint64 last_mod = extract_score_lmt(score);
-    ret_len = utf16le_lltoa(last_mod, ptemp, temp + sizeof(temp) - ptemp);
-    if (ret_len <= 0)
-      continue;
-    ptemp += ret_len;
-    if (ptemp < temp + sizeof(temp) - 1) {
-      *(ptemp++) = ';';
-    } else {
-      continue;
-    }
-
-    // Write to string
-    int need_len = ptemp - temp;
-    if (need_len > left_len)
-      break;
-    memcpy(str + len, temp, need_len * 2);
-    left_len -= need_len;
-
-    len += need_len;
-    (*count)++;
-  }
-
-  if (len > 0) {
-    if (state_ < USER_DICT_SYNC_DIRTY)
-      state_ = USER_DICT_SYNC_DIRTY;
-  }
-  return len;
-}
-
-#endif
-
-bool UserDict::state(UserDictStat * stat) {
-  if (is_valid_state() == false)
-    return false;
-  if (!stat)
-    return false;
-  stat->version = version_;
-  stat->file_name = dict_file_;
-  stat->load_time.tv_sec = load_time_.tv_sec;
-  stat->load_time.tv_usec = load_time_.tv_usec;
-  pthread_mutex_lock(&g_mutex_);
-  stat->last_update.tv_sec = g_last_update_.tv_sec;
-  stat->last_update.tv_usec = g_last_update_.tv_usec;
-  pthread_mutex_unlock(&g_mutex_);
-  stat->disk_size = get_dict_file_size(&dict_info_);
-  stat->lemma_count = dict_info_.lemma_count;
-  stat->lemma_size = dict_info_.lemma_size;
-  stat->delete_count = dict_info_.free_count;
-  stat->delete_size = dict_info_.free_size;
-#ifdef ___SYNC_ENABLED___
-  stat->sync_count = dict_info_.sync_count;
-#endif
-  stat->limit_lemma_count = dict_info_.limit_lemma_count;
-  stat->limit_lemma_size = dict_info_.limit_lemma_size;
-  stat->reclaim_ratio = dict_info_.reclaim_ratio;
-  return true;
-}
-
-void UserDict::set_limit(uint32 max_lemma_count,
-                         uint32 max_lemma_size, uint32 reclaim_ratio) {
-  dict_info_.limit_lemma_count = max_lemma_count;
-  dict_info_.limit_lemma_size = max_lemma_size;
-  if (reclaim_ratio > 100)
-    reclaim_ratio = 100;
-  dict_info_.reclaim_ratio = reclaim_ratio;
-}
-
-void UserDict::reclaim() {
-  if (is_valid_state() == false)
-    return;
-
-  switch (dict_info_.reclaim_ratio) {
-    case 0:
-      return;
-    case 100:
-      // TODO: CLEAR to be implemented
-      assert(false);
-      return;
-    default:
-      break;
-  }
-
-  // XXX Reclaim is only based on count, not size
-  uint32 count = dict_info_.lemma_count;
-  int rc = count * dict_info_.reclaim_ratio / 100;
-
-  UserDictScoreOffsetPair * score_offset_pairs = NULL;
-  score_offset_pairs = (UserDictScoreOffsetPair *)malloc(
-      sizeof(UserDictScoreOffsetPair) * rc);
-  if (score_offset_pairs == NULL) {
-    return;
-  }
-
-  for (int i = 0; i < rc; i++) {
-    int s = scores_[i];
-    score_offset_pairs[i].score = s;
-    score_offset_pairs[i].offset_index = i;
-  }
-
-  for (int i = (rc + 1) / 2; i >= 0; i--)
-    shift_down(score_offset_pairs, i, rc);
-
-  for (uint32 i = rc; i < dict_info_.lemma_count; i++) {
-    int s = scores_[i];
-    if (s < score_offset_pairs[0].score) {
-      score_offset_pairs[0].score = s;
-      score_offset_pairs[0].offset_index = i;
-      shift_down(score_offset_pairs, 0, rc);
-    }
-  }
-
-  for (int i = 0; i < rc; i++) {
-    int off = score_offset_pairs[i].offset_index;
-    remove_lemma_by_offset_index(off);
-  }
-  if (rc > 0) {
-    if (state_ < USER_DICT_OFFSET_DIRTY)
-      state_ = USER_DICT_OFFSET_DIRTY;
-  }
-
-  free(score_offset_pairs);
-}
-
-inline void UserDict::swap(UserDictScoreOffsetPair * sop, int i, int j) {
-  int s = sop[i].score;
-  int p = sop[i].offset_index;
-  sop[i].score = sop[j].score;
-  sop[i].offset_index = sop[j].offset_index;
-  sop[j].score = s;
-  sop[j].offset_index = p;
-}
-
-void UserDict::shift_down(UserDictScoreOffsetPair * sop, int i, int n) {
-  int par = i;
-  while (par < n) {
-    int left = par * 2 + 1;
-    int right = left + 1;
-    if (left >= n && right >= n)
-      break;
-    if (right >= n) {
-      if (sop[left].score > sop[par].score) {
-        swap(sop, left, par);
-        par = left;
-        continue;
-      }
-    } else if (sop[left].score > sop[right].score &&
-               sop[left].score > sop[par].score) {
-      swap(sop, left, par);
-      par = left;
-      continue;
-    } else if (sop[right].score > sop[left].score &&
-               sop[right].score > sop[par].score) {
-      swap(sop, right, par);
-      par = right;
-      continue;
-    }
-    break;
-  }
-}
-
-LemmaIdType UserDict::put_lemma(char16 lemma_str[], uint16 splids[],
-                                uint16 lemma_len, uint16 count) {
-  return _put_lemma(lemma_str, splids, lemma_len, count, time(NULL));
-}
-
-LemmaIdType UserDict::_put_lemma(char16 lemma_str[], uint16 splids[],
-                                 uint16 lemma_len, uint16 count, uint64 lmt) {
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_BEGIN;
-#endif
-  if (is_valid_state() == false)
-    return 0;
-  int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
-  if (off != -1) {
-    int delta_score = count - scores_[off];
-    dict_info_.total_nfreq += delta_score;
-    scores_[off] = build_score(lmt, count);
-    if (state_ < USER_DICT_SCORE_DIRTY)
-      state_ = USER_DICT_SCORE_DIRTY;
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_END;
-    LOGD_PERF("_put_lemma(update)");
-#endif
-    return ids_[off];
-  } else {
-    if ((dict_info_.limit_lemma_count > 0 &&
-        dict_info_.lemma_count >= dict_info_.limit_lemma_count)
-        || (dict_info_.limit_lemma_size > 0 &&
-            dict_info_.lemma_size + (2 + (lemma_len << 2))
-            > dict_info_.limit_lemma_size)) {
-      // XXX Don't defragment here, it's too time-consuming.
-      return 0;
-    }
-    int flushed = 0;
-    if (lemma_count_left_ == 0 ||
-        lemma_size_left_ < (size_t)(2 + (lemma_len << 2))) {
-
-      // XXX When there is no space for new lemma, we flush to disk
-      // flush_cache() may be called by upper user
-      // and better place shoule be found instead of here
-      flush_cache();
-      flushed = 1;
-      // Or simply return and do nothing
-      // return 0;
-    }
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_END;
-    LOGD_PERF(flushed ? "_put_lemma(flush+add)" : "_put_lemma(add)");
-#endif
-    LemmaIdType id = append_a_lemma(lemma_str, splids, lemma_len, count, lmt);
-#ifdef ___SYNC_ENABLED___
-    if (syncs_ && id != 0) {
-      queue_lemma_for_sync(id);
-    }
-#endif
-    return id;
-  }
-  return 0;
-}
-
-#ifdef ___SYNC_ENABLED___
-void UserDict::queue_lemma_for_sync(LemmaIdType id) {
-  if (dict_info_.sync_count < sync_count_size_) {
-    syncs_[dict_info_.sync_count++] = offsets_by_id_[id - start_id_];
-  } else {
-    uint32 * syncs = (uint32*)realloc(
-        syncs_, (sync_count_size_ + kUserDictPreAlloc) << 2);
-    if (syncs) {
-      sync_count_size_ += kUserDictPreAlloc;
-      syncs_ = syncs;
-      syncs_[dict_info_.sync_count++] = offsets_by_id_[id - start_id_];
-    }
-  }
-}
-#endif
-
-LemmaIdType UserDict::update_lemma(LemmaIdType lemma_id, int16 delta_count,
-                                   bool selected) {
-#ifdef ___DEBUG_PERF___
-  DEBUG_PERF_BEGIN;
-#endif
-  if (is_valid_state() == false)
-    return 0;
-  if (is_valid_lemma_id(lemma_id) == false)
-    return 0;
-  uint32 offset = offsets_by_id_[lemma_id - start_id_];
-  uint8 lemma_len = get_lemma_nchar(offset);
-  char16 * lemma_str = get_lemma_word(offset);
-  uint16 * splids = get_lemma_spell_ids(offset);
-
-  int32 off = locate_in_offsets(lemma_str, splids, lemma_len);
-  if (off != -1) {
-    int score = scores_[off];
-    int count = extract_score_freq(score);
-    uint64 lmt = extract_score_lmt(score);
-    if (count + delta_count > kUserDictMaxFrequency ||
-        count + delta_count < count) {
-      delta_count = kUserDictMaxFrequency - count;
-    }
-    count += delta_count;
-    dict_info_.total_nfreq += delta_count;
-    if (selected) {
-      lmt = time(NULL);
-    }
-    scores_[off] = build_score(lmt, count);
-    if (state_ < USER_DICT_SCORE_DIRTY)
-      state_ = USER_DICT_SCORE_DIRTY;
-#ifdef ___DEBUG_PERF___
-    DEBUG_PERF_END;
-    LOGD_PERF("update_lemma");
-#endif
-#ifdef ___SYNC_ENABLED___
-    queue_lemma_for_sync(ids_[off]);
-#endif
-    return ids_[off];
-  }
-  return 0;
-}
-
-size_t UserDict::get_total_lemma_count() {
-  return dict_info_.total_nfreq;
-}
-
-void UserDict::set_total_lemma_count_of_others(size_t count) {
-  total_other_nfreq_ = count;
-}
-
-LemmaIdType UserDict::append_a_lemma(char16 lemma_str[], uint16 splids[],
-                                   uint16 lemma_len, uint16 count, uint64 lmt) {
-  LemmaIdType id = get_max_lemma_id() + 1;
-  size_t offset = dict_info_.lemma_size;
-  if (offset > kUserDictOffsetMask)
-    return 0;
-
-  lemmas_[offset] = 0;
-  lemmas_[offset + 1] = (uint8)lemma_len;
-  for (size_t i = 0; i < lemma_len; i++) {
-    *((uint16*)&lemmas_[offset + 2 + (i << 1)]) = splids[i];
-    *((char16*)&lemmas_[offset + 2 + (lemma_len << 1) + (i << 1)])
-        = lemma_str[i];
-  }
-  uint32 off = dict_info_.lemma_count;
-  offsets_[off] = offset;
-  scores_[off] = build_score(lmt, count);
-  ids_[off] = id;
-#ifdef ___PREDICT_ENABLED___
-  predicts_[off] = offset;
-#endif
-
-  offsets_by_id_[id - start_id_] = offset;
-
-  dict_info_.lemma_count++;
-  dict_info_.lemma_size += (2 + (lemma_len << 2));
-  lemma_count_left_--;
-  lemma_size_left_ -= (2 + (lemma_len << 2));
-
-  // Sort
-
-  UserDictSearchable searchable;
-  prepare_locate(&searchable, splids, lemma_len);
-
-  size_t i = 0;
-  while (i < off) {
-    offset = offsets_[i];
-    uint32 nchar = get_lemma_nchar(offset);
-    uint16 * spl = get_lemma_spell_ids(offset);
-
-    if (0 <= fuzzy_compare_spell_id(spl, nchar, &searchable))
-      break;
-    i++;
-  }
-  if (i != off) {
-    uint32 temp = offsets_[off];
-    memmove(offsets_ + i + 1, offsets_ + i, (off - i) << 2);
-    offsets_[i] = temp;
-
-    temp = scores_[off];
-    memmove(scores_ + i + 1, scores_ + i, (off - i) << 2);
-    scores_[i] = temp;
-
-    temp = ids_[off];
-    memmove(ids_ + i + 1, ids_ + i, (off - i) << 2);
-    ids_[i] = temp;
-  }
-
-#ifdef ___PREDICT_ENABLED___
-  uint32 j = 0;
-  uint16 * words_new = get_lemma_word(predicts_[off]);
-  j = locate_where_to_insert_in_predicts(words_new, lemma_len);
-  if (j != off) {
-    uint32 temp = predicts_[off];
-    memmove(predicts_ + j + 1, predicts_ + j, (off - j) << 2);
-    predicts_[j] = temp;
-  }
-#endif
-
-  if (state_ < USER_DICT_LEMMA_DIRTY)
-    state_ = USER_DICT_LEMMA_DIRTY;
-
-#ifdef ___CACHE_ENABLED___
-  cache_init();
-#endif
-
-  dict_info_.total_nfreq += count;
-  return id;
-}
-}
diff --git a/userdict.h b/userdict.h
deleted file mode 100644
index 02da218..0000000
--- a/userdict.h
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_USERDICT_H__
-#define PINYINIME_INCLUDE_USERDICT_H__
-
-#define ___CACHE_ENABLED___
-#define ___SYNC_ENABLED___
-#define ___PREDICT_ENABLED___
-
-// Debug performance for operations
-// #define ___DEBUG_PERF___
-
-#include <pthread.h>
-#include "atomdictbase.h"
-
-namespace ime_pinyin {
-
-class UserDict : public AtomDictBase {
- public:
-  UserDict();
-  ~UserDict();
-
-  bool load_dict(const char *file_name, LemmaIdType start_id,
-                 LemmaIdType end_id);
-
-  bool close_dict();
-
-  size_t number_of_lemmas();
-
-  void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
-
-  MileStoneHandle extend_dict(MileStoneHandle from_handle,
-                              const DictExtPara *dep, LmaPsbItem *lpi_items,
-                              size_t lpi_max, size_t *lpi_num);
-
-  size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
-                  LmaPsbItem *lpi_items, size_t lpi_max);
-
-  uint16 get_lemma_str(LemmaIdType id_lemma, char16* str_buf,
-                       uint16 str_max);
-
-  uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
-                          uint16 splids_max, bool arg_valid);
-
-  size_t predict(const char16 last_hzs[], uint16 hzs_len,
-                 NPredictItem *npre_items, size_t npre_max,
-                 size_t b4_used);
-
-  // Full spelling ids are required
-  LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
-                        uint16 lemma_len, uint16 count);
-
-  LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
-                           bool selected);
-
-  LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
-                           uint16 lemma_len);
-
-  LmaScoreType get_lemma_score(LemmaIdType lemma_id);
-
-  LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
-                        uint16 lemma_len);
-
-  bool remove_lemma(LemmaIdType lemma_id);
-
-  size_t get_total_lemma_count();
-  void set_total_lemma_count_of_others(size_t count);
-
-  void flush_cache();
-
-  void set_limit(uint32 max_lemma_count, uint32 max_lemma_size,
-                 uint32 reclaim_ratio);
-
-  void reclaim();
-
-  void defragment();
-
-#ifdef ___SYNC_ENABLED___
-  void clear_sync_lemmas(unsigned int start, unsigned int end);
-
-  int get_sync_count();
-
-  LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[],
-                        uint16 lemma_len, uint16 count, uint64 lmt);
-   /**
-    * Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
-    *
-    * @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
-    * @param len length of lemmas string in UTF-16LE
-    * @return newly added lemma count
-    */
-  int put_lemmas_no_sync_from_utf16le_string(char16 * lemmas, int len);
-
-  /**
-   * Get lemmas need sync to a UTF-16LE string of above format.
-   * Note: input buffer (str) must not be too small. If str is too small to 
-   *       contain single one lemma, there might be a dead loop.
-   *
-   * @param str buffer to write lemmas
-   * @param size buffer size in UTF-16LE
-   * @param count output value of lemma returned
-   * @return UTF-16LE string length
-   */
-  int get_sync_lemmas_in_utf16le_string_from_beginning(
-      char16 * str, int size, int * count);
-
-#endif
-
-  struct UserDictStat {
-    uint32 version;
-    const char * file_name;
-    struct timeval load_time;
-    struct timeval last_update;
-    uint32 disk_size;
-    uint32 lemma_count;
-    uint32 lemma_size;
-    uint32 delete_count;
-    uint32 delete_size;
-#ifdef ___SYNC_ENABLED___
-    uint32 sync_count;
-#endif
-    uint32 reclaim_ratio;
-    uint32 limit_lemma_count;
-    uint32 limit_lemma_size;
-  };
-
-  bool state(UserDictStat * stat);
-
- private:
-  uint32 total_other_nfreq_;
-  struct timeval load_time_;
-  LemmaIdType start_id_;
-  uint32 version_;
-  uint8 * lemmas_;
-
-  // In-Memory-Only flag for each lemma
-  static const uint8 kUserDictLemmaFlagRemove = 1;
-  // Inuse lemmas' offset
-  uint32 * offsets_;
-  // Highest bit in offset tells whether corresponding lemma is removed
-  static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
-  // Maximum possible for the offset
-  static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
-  // Bit width for last modified time, from 1 to 16
-  static const uint32 kUserDictLMTBitWidth = 16;
-  // Granularity for last modified time in second
-  static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
-  // Maximum frequency count
-  static const uint16 kUserDictMaxFrequency = 0xFFFF;
-
-#define COARSE_UTC(year, month, day, hour, minute, second) \
-  ( \
-    (year - 1970) * 365 * 24 * 60 * 60 + \
-    (month - 1) * 30 * 24 * 60 * 60 + \
-    (day - 1) * 24 * 60 * 60 + \
-    (hour - 0) * 60 * 60 + \
-    (minute - 0) * 60 + \
-    (second - 0) \
-  )
-  static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);
-
-  // Correspond to offsets_
-  uint32 * scores_;
-  // Following two fields are only valid in memory
-  uint32 * ids_;
-#ifdef ___PREDICT_ENABLED___
-  uint32 * predicts_;
-#endif
-#ifdef ___SYNC_ENABLED___
-  uint32 * syncs_;
-  size_t sync_count_size_;
-#endif
-  uint32 * offsets_by_id_;
-
-  size_t lemma_count_left_;
-  size_t lemma_size_left_;
-
-  const char * dict_file_;
-
-  // Be sure size is 4xN
-  struct UserDictInfo {
-    // When limitation reached, how much percentage will be reclaimed (1 ~ 100)
-    uint32 reclaim_ratio;
-    // maximum lemma count, 0 means no limitation
-    uint32 limit_lemma_count;
-    // Maximum lemma size, it's different from
-    // whole disk file size or in-mem dict size
-    // 0 means no limitation
-    uint32 limit_lemma_size;
-    // Total lemma count including deleted and inuse
-    // Also indicate offsets_ size
-    uint32 lemma_count;
-    // Total size of lemmas including used and freed
-    uint32 lemma_size;
-    // Freed lemma count
-    uint32 free_count;
-    // Freed lemma size in byte
-    uint32 free_size;
-#ifdef ___SYNC_ENABLED___
-    uint32 sync_count;
-#endif
-    int32 total_nfreq;
-  } dict_info_;
-
-  static const uint32 kUserDictVersion = 0x0ABCDEF0;
-
-  static const uint32 kUserDictPreAlloc = 32;
-  static const uint32 kUserDictAverageNchar = 8;
-
-  enum UserDictState {
-    // Keep in order
-    USER_DICT_NONE = 0,
-    USER_DICT_SYNC,
-#ifdef ___SYNC_ENABLED___
-    USER_DICT_SYNC_DIRTY,
-#endif
-    USER_DICT_SCORE_DIRTY,
-    USER_DICT_OFFSET_DIRTY,
-    USER_DICT_LEMMA_DIRTY,
-
-    USER_DICT_DEFRAGMENTED,
-  } state_;
-
-  struct UserDictSearchable {
-    uint16 splids_len;
-    uint16 splid_start[kMaxLemmaSize];
-    uint16 splid_count[kMaxLemmaSize];
-    // Compact inital letters for both FuzzyCompareSpellId and cache system
-    uint32 signature[kMaxLemmaSize / 4];
-  };
-
-#ifdef ___CACHE_ENABLED___
-  enum UserDictCacheType {
-    USER_DICT_CACHE,
-    USER_DICT_MISS_CACHE,
-  };
-
-  static const int kUserDictCacheSize = 4;
-  static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;
-
-  struct UserDictMissCache {
-    uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
-    uint16 head, tail;
-  } miss_caches_[kMaxLemmaSize];
-
-  struct UserDictCache {
-    uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
-    uint32 offsets[kUserDictCacheSize];
-    uint32 lengths[kUserDictCacheSize];
-    // Ring buffer
-    uint16 head, tail;
-  } caches_[kMaxLemmaSize];
-
-  void cache_init();
-
-  void cache_push(UserDictCacheType type,
-                 UserDictSearchable *searchable,
-                 uint32 offset, uint32 length);
-
-  bool cache_hit(UserDictSearchable *searchable,
-                 uint32 *offset, uint32 *length);
-
-  bool load_cache(UserDictSearchable *searchable,
-                  uint32 *offset, uint32 *length);
-
-  void save_cache(UserDictSearchable *searchable,
-                  uint32 offset, uint32 length);
-
-  void reset_cache();
-
-  bool load_miss_cache(UserDictSearchable *searchable);
-
-  void save_miss_cache(UserDictSearchable *searchable);
-
-  void reset_miss_cache();
-#endif
-
-  LmaScoreType translate_score(int f);
-
-  int extract_score_freq(int raw_score);
-
-  uint64 extract_score_lmt(int raw_score);
-
-  inline int build_score(uint64 lmt, int freq);
-
-  inline int64 utf16le_atoll(uint16 *s, int len);
-
-  inline int utf16le_lltoa(int64 v, uint16 *s, int size);
-
-  LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[],
-                        uint16 lemma_len, uint16 count, uint64 lmt);
-
-  size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len,
-                   LmaPsbItem *lpi_items, size_t lpi_max, bool * need_extend);
-
-  int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
-
-  int _get_lemma_score(LemmaIdType lemma_id);
-
-  int is_fuzzy_prefix_spell_id(const uint16 * id1, uint16 len1,
-                               const UserDictSearchable *searchable);
-
-  bool is_prefix_spell_id(const uint16 * fullids,
-                          uint16 fulllen, const UserDictSearchable *searchable);
-
-  uint32 get_dict_file_size(UserDictInfo * info);
-
-  bool reset(const char *file);
-
-  bool validate(const char *file);
-
-  bool load(const char *file, LemmaIdType start_id);
-
-  bool is_valid_state();
-
-  bool is_valid_lemma_id(LemmaIdType id);
-
-  LemmaIdType get_max_lemma_id();
-
-  void set_lemma_flag(uint32 offset, uint8 flag);
-
-  char get_lemma_flag(uint32 offset);
-
-  char get_lemma_nchar(uint32 offset);
-
-  uint16 * get_lemma_spell_ids(uint32 offset);
-
-  uint16 * get_lemma_word(uint32 offset);
-
-  // Prepare searchable to fasten locate process
-  void prepare_locate(UserDictSearchable *searchable,
-                      const uint16 * splids, uint16 len);
-
-  // Compare initial letters only
-  int32 fuzzy_compare_spell_id(const uint16 * id1, uint16 len1,
-                               const UserDictSearchable *searchable);
-
-  // Compare exactly two spell ids
-  // First argument must be a full id spell id
-  bool equal_spell_id(const uint16 * fullids,
-                      uint16 fulllen, const UserDictSearchable *searchable);
-
-  // Find first item by initial letters
-  int32 locate_first_in_offsets(const UserDictSearchable *searchable);
-
-  LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[],
-                           uint16 lemma_len, uint16 count, uint64 lmt);
-
-  // Check if a lemma is in dictionary
-  int32 locate_in_offsets(char16 lemma_str[],
-                          uint16 splid_str[], uint16 lemma_len);
-
-  bool remove_lemma_by_offset_index(int offset_index);
-#ifdef ___PREDICT_ENABLED___
-  uint32 locate_where_to_insert_in_predicts(const uint16 * words,
-                                            int lemma_len);
-
-  int32 locate_first_in_predicts(const uint16 * words, int lemma_len);
-
-  void remove_lemma_from_predict_list(uint32 offset);
-#endif
-#ifdef ___SYNC_ENABLED___
-  void queue_lemma_for_sync(LemmaIdType id);
-
-  void remove_lemma_from_sync_list(uint32 offset);
-
-  void write_back_sync(int fd);
-#endif
-  void write_back_score(int fd);
-  void write_back_offset(int fd);
-  void write_back_lemma(int fd);
-  void write_back_all(int fd);
-  void write_back();
-
-  struct UserDictScoreOffsetPair {
-    int score;
-    uint32 offset_index;
-  };
-
-  inline void swap(UserDictScoreOffsetPair * sop, int i, int j);
-
-  void shift_down(UserDictScoreOffsetPair * sop, int i, int n);
-
-  // On-disk format for each lemma
-  // +-------------+
-  // | Version (4) |
-  // +-------------+
-  // +-----------+-----------+--------------------+-------------------+
-  // | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
-  // +-----------+-----------+--------------------+-------------------+
-  // ...
-  // +-----------------------+     +-------------+      <---Offset of offset
-  // | Offset1 by_splids (4) | ... | OffsetN (4) |
-  // +-----------------------+     +-------------+
-#ifdef ___PREDICT_ENABLED___
-  // +----------------------+     +-------------+
-  // | Offset1 by_lemma (4) | ... | OffsetN (4) |
-  // +----------------------+     +-------------+
-#endif
-  // +------------+     +------------+
-  // | Score1 (4) | ... | ScoreN (4) |
-  // +------------+     +------------+
-#ifdef ___SYNC_ENABLED___
-  // +-------------+     +-------------+
-  // | NewAdd1 (4) | ... | NewAddN (4) |
-  // +-------------+     +-------------+
-#endif
-  // +----------------+
-  // | Dict Info (4x) |
-  // +----------------+
-};
-}
-
-#endif