From 1be16c52e604c426638431e8a532d443324e87c9 Mon Sep 17 00:00:00 2001 From: fanlumaster <1730976608@qq.com> Date: Thu, 2 Jan 2025 02:11:24 +0800 Subject: [PATCH] supplement info about building dicts --- .gitignore | 1 + command/CMakeLists.txt | 57 +++++++++++++++++++++++++++++++ command/dict/.gitkeep | 0 command/lcompile.sh | 23 +++++++++++++ command/llaunch.sh | 45 ++++++++++++++++++++++++ command/lrun.sh | 18 ++++++++++ command/pinyinime_dictbuilder.cpp | 55 +++++++++++++++++++++++++++++ 7 files changed, 199 insertions(+) create mode 100644 command/CMakeLists.txt create mode 100644 command/dict/.gitkeep create mode 100755 command/lcompile.sh create mode 100755 command/llaunch.sh create mode 100755 command/lrun.sh create mode 100644 command/pinyinime_dictbuilder.cpp diff --git a/.gitignore b/.gitignore index 880ac59..2e16dfc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ build/ .DS_Store +command/dict/*.dat diff --git a/command/CMakeLists.txt b/command/CMakeLists.txt new file mode 100644 index 0000000..ecb9aa6 --- /dev/null +++ b/command/CMakeLists.txt @@ -0,0 +1,57 @@ +cmake_minimum_required(VERSION 3.15) + +project(build_dict VERSION 1.0 LANGUAGES CXX) + +include_directories(../src/include) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD_REQUIRED True) + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +set(HEADERS + ../src/include/atomdictbase.h + ../src/include/dictbuilder.h + ../src/include/dictdef.h + ../src/include/dictlist.h + ../src/include/dicttrie.h + ../src/include/lpicache.h + ../src/include/matrixsearch.h + ../src/include/mystdlib.h + ../src/include/ngram.h + ../src/include/pinyinime.h + ../src/include/searchutility.h + ../src/include/spellingtable.h + ../src/include/spellingtrie.h + ../src/include/splparser.h + ../src/include/sync.h + ../src/include/userdict.h + ../src/include/utf16char.h + ../src/include/utf16reader.h +) + +set(SOURCES + ../src/share/dictbuilder.cpp + ../src/share/dictlist.cpp + ../src/share/dicttrie.cpp + ../src/share/lpicache.cpp + ../src/share/matrixsearch.cpp + ../src/share/mystdlib.cpp + ../src/share/ngram.cpp + ../src/share/pinyinime.cpp + ../src/share/searchutility.cpp + ../src/share/spellingtable.cpp + ../src/share/spellingtrie.cpp + ../src/share/splparser.cpp + ../src/share/sync.cpp + ../src/share/userdict.cpp + ../src/share/utf16char.cpp + ../src/share/utf16reader.cpp + ./pinyinime_dictbuilder.cpp +) + +set(MY_EXECUTABLE_NAME "build_dict") + +add_executable(${MY_EXECUTABLE_NAME} ${SOURCES} ${HEADERS}) \ No newline at end of file diff --git a/command/dict/.gitkeep b/command/dict/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/command/lcompile.sh b/command/lcompile.sh new file mode 100755 index 0000000..bb146aa --- /dev/null +++ b/command/lcompile.sh @@ -0,0 +1,23 @@ +#!/bin/bash +currentDirectory=$(pwd) +cmakeListsPath="${currentDirectory}/CMakeLists.txt" + +if [ ! -f "$cmakeListsPath" ]; then + echo "No CMakeLists.txt in current directory, please check." + exit 1 +fi + +echo "Start generating and compiling..." + +buildFolderPath="./build" + +if [ ! -d "$buildFolderPath" ]; then + mkdir -p "$buildFolderPath" + echo "build folder created." +fi + +cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/ + +if [ $? -eq 0 ]; then + cmake --build ./build/ --config DEBUG +fi diff --git a/command/llaunch.sh b/command/llaunch.sh new file mode 100755 index 0000000..4518c0b --- /dev/null +++ b/command/llaunch.sh @@ -0,0 +1,45 @@ +#!/bin/bash +currentDirectory=$(pwd) +cmakeListsPath="${currentDirectory}/CMakeLists.txt" + +if [ ! -f "$cmakeListsPath" ]; then + echo "No CMakeLists.txt in current directory, please check." + exit 1 +fi + +echo "Start generating and compiling..." + +buildFolderPath="./build" + +if [ ! -d "$buildFolderPath" ]; then + mkdir -p "$buildFolderPath" + echo "build folder created." +fi + +cmake -G "Unix Makefiles" -D CMAKE_CXX_COMPILER=/usr/bin/g++ -S . -B ./build/ + +if [ $? -eq 0 ]; then + cmake --build ./build/ --config DEBUG + if [ $? -eq 0 ]; then + content=$(<"./CMakeLists.txt") + exePath="" + while IFS= read -r line; do + if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then + pattern="\"([^\"]+)\"" + if [[ $line =~ $pattern ]]; then + contentInParentheses="${BASH_REMATCH[1]}" + result=($contentInParentheses) + exePath="./build/bin/${result[0]}" + echo "start running as follows..." + echo "==================================================" + fi + fi + done <<<"$content" + # execute the binary file + if [ -n "$exePath" ]; then + $exePath + else + echo "cannot find executable file path" + fi + fi +fi diff --git a/command/lrun.sh b/command/lrun.sh new file mode 100755 index 0000000..0c6264e --- /dev/null +++ b/command/lrun.sh @@ -0,0 +1,18 @@ +content=$(<"./CMakeLists.txt") +exePath="" +while IFS= read -r line; do + if [[ $line == "set(MY_EXECUTABLE_NAME"* ]]; then + pattern="\"([^\"]+)\"" + if [[ $line =~ $pattern ]]; then + contentInParentheses="${BASH_REMATCH[1]}" + result=($contentInParentheses) + exePath="./build/bin/${result[0]}" + fi + fi +done <<<"$content" + +if [ -n "$exePath" ]; then + $exePath +else + echo "cannot find executable file path" +fi diff --git a/command/pinyinime_dictbuilder.cpp b/command/pinyinime_dictbuilder.cpp new file mode 100644 index 0000000..d5e0433 --- /dev/null +++ b/command/pinyinime_dictbuilder.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include "../src/include/dicttrie.h" + +using namespace ime_pinyin; + +/** + * Build binary dictionary model. Make sure that ___BUILD_MODEL___ is defined + * in dictdef.h. + */ +int main(int argc, char* argv[]) { + DictTrie* dict_trie = new DictTrie(); + bool success; + if (argc >= 3) + success = dict_trie->build_dict(argv[1], argv[2]); + else + success = dict_trie->build_dict("../data/rawdict_utf16_65105_freq.txt", "../data/valid_utf16.txt"); + + if (success) { + printf("Build dictionary successfully.\n"); + } else { + printf("Build dictionary unsuccessfully.\n"); + return -1; + } + + success = dict_trie->save_dict("./dict/dict_pinyin.dat"); + + if (success) { + printf("Save dictionary successfully.\n"); + } else { + printf("Save dictionary unsuccessfully.\n"); + return -1; + } + + return 0; +} \ No newline at end of file