init
This commit is contained in:
29
vendor/github.com/go-ego/cedar/BUILD.bazel
generated
vendored
Normal file
29
vendor/github.com/go-ego/cedar/BUILD.bazel
generated
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"api.go",
|
||||
"cedar.go",
|
||||
"doc.go",
|
||||
"errors.go",
|
||||
"io.go",
|
||||
],
|
||||
importmap = "go-common/vendor/github.com/go-ego/cedar",
|
||||
importpath = "github.com/go-ego/cedar",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
674
vendor/github.com/go-ego/cedar/LICENSE
generated
vendored
Normal file
674
vendor/github.com/go-ego/cedar/LICENSE
generated
vendored
Normal file
@ -0,0 +1,674 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
{one line to give the program's name and a brief idea of what it does.}
|
||||
Copyright (C) {year} {name of author}
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
{project} Copyright (C) {year} {fullname}
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
93
vendor/github.com/go-ego/cedar/README.md
generated
vendored
Normal file
93
vendor/github.com/go-ego/cedar/README.md
generated
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
# cedar
|
||||
[](https://travis-ci.org/go-ego/cedar)
|
||||
[](https://circleci.com/gh/go-ego/cedar)
|
||||
[](https://codecov.io/gh/go-ego/cedar)
|
||||
[](https://goreportcard.com/report/github.com/go-ego/cedar)
|
||||
[](https://godoc.org/github.com/go-ego/cedar)
|
||||
[](https://github.com/go-ego/cedar/releases/latest)
|
||||
[](https://gitter.im/go-ego/ego?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
Package `cedar` implementes double-array trie, base on [cedar-go](https://github.com/adamzy/cedar-go).
|
||||
|
||||
It is a [Golang](https://golang.org/) port of [cedar](http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar) which is written in C++ by Naoki Yoshinaga. `cedar-go` currently implements the `reduced` verion of cedar.
|
||||
This package is not thread safe if there is one goroutine doing insertions or deletions.
|
||||
|
||||
## Install
|
||||
```
|
||||
go get github.com/go-ego/cedar
|
||||
```
|
||||
|
||||
## Usage
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-ego/cedar"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// create a new cedar trie.
|
||||
trie := cedar.New()
|
||||
|
||||
// a helper function to print the id-key-value triple given trie node id
|
||||
printIdKeyValue := func(id int) {
|
||||
// the key of node `id`.
|
||||
key, _ := trie.Key(id)
|
||||
// the value of node `id`.
|
||||
value, _ := trie.Value(id)
|
||||
fmt.Printf("%d\t%s:%v\n", id, key, value)
|
||||
}
|
||||
|
||||
// Insert key-value pairs.
|
||||
// The order of insertion is not important.
|
||||
trie.Insert([]byte("How many"), 0)
|
||||
trie.Insert([]byte("How many loved"), 1)
|
||||
trie.Insert([]byte("How many loved your moments"), 2)
|
||||
trie.Insert([]byte("How many loved your moments of glad grace"), 3)
|
||||
trie.Insert([]byte("姑苏"), 4)
|
||||
trie.Insert([]byte("姑苏城外"), 5)
|
||||
trie.Insert([]byte("姑苏城外寒山寺"), 6)
|
||||
|
||||
// Get the associated value of a key directly.
|
||||
value, _ := trie.Get([]byte("How many loved your moments of glad grace"))
|
||||
fmt.Println(value)
|
||||
|
||||
// Or, jump to the node first,
|
||||
id, _ := trie.Jump([]byte("How many loved your moments"), 0)
|
||||
// then get the key and the value
|
||||
printIdKeyValue(id)
|
||||
|
||||
fmt.Println("\nPrefixMatch\nid\tkey:value")
|
||||
for _, id := range trie.PrefixMatch([]byte("How many loved your moments of glad grace"), 0) {
|
||||
printIdKeyValue(id)
|
||||
}
|
||||
|
||||
fmt.Println("\nPrefixPredict\nid\tkey:value")
|
||||
for _, id := range trie.PrefixPredict([]byte("姑苏"), 0) {
|
||||
printIdKeyValue(id)
|
||||
}
|
||||
}
|
||||
```
|
||||
will produce
|
||||
```
|
||||
3
|
||||
281 How many loved your moments:2
|
||||
|
||||
PrefixMatch
|
||||
id key:value
|
||||
262 How many:0
|
||||
268 How many loved:1
|
||||
281 How many loved your moments:2
|
||||
296 How many loved your moments of glad grace:3
|
||||
|
||||
PrefixPredict
|
||||
id key:value
|
||||
303 姑苏:4
|
||||
309 姑苏城外:5
|
||||
318 姑苏城外寒山寺:6
|
||||
```
|
||||
## License
|
||||
|
||||
Under the GPL-3.0 License.
|
245
vendor/github.com/go-ego/cedar/api.go
generated
vendored
Normal file
245
vendor/github.com/go-ego/cedar/api.go
generated
vendored
Normal file
@ -0,0 +1,245 @@
|
||||
package cedar
|
||||
|
||||
// Status reports the following statistics of the cedar:
|
||||
// keys: number of keys that are in the cedar,
|
||||
// nodes: number of trie nodes (slots in the base array) has been taken,
|
||||
// size: the size of the base array used by the cedar,
|
||||
// capacity: the capicity of the base array used by the cedar.
|
||||
func (da *Cedar) Status() (keys, nodes, size, capacity int) {
|
||||
for i := 0; i < da.Size; i++ {
|
||||
n := da.Array[i]
|
||||
if n.Check >= 0 {
|
||||
nodes++
|
||||
if n.Value >= 0 {
|
||||
keys++
|
||||
}
|
||||
}
|
||||
}
|
||||
return keys, nodes, da.Size, da.Capacity
|
||||
}
|
||||
|
||||
// Jump travels from a node `from` to another node
|
||||
// `to` by following the path `path`.
|
||||
// For example, if the following keys were inserted:
|
||||
// id key
|
||||
// 19 abc
|
||||
// 23 ab
|
||||
// 37 abcd
|
||||
// then
|
||||
// Jump([]byte("ab"), 0) = 23, nil // reach "ab" from root
|
||||
// Jump([]byte("c"), 23) = 19, nil // reach "abc" from "ab"
|
||||
// Jump([]byte("cd"), 23) = 37, nil // reach "abcd" from "ab"
|
||||
func (da *Cedar) Jump(path []byte, from int) (to int, err error) {
|
||||
for _, b := range path {
|
||||
if da.Array[from].Value >= 0 {
|
||||
return from, ErrNoPath
|
||||
}
|
||||
to = da.Array[from].base() ^ int(b)
|
||||
if da.Array[to].Check != from {
|
||||
return from, ErrNoPath
|
||||
}
|
||||
from = to
|
||||
}
|
||||
return to, nil
|
||||
}
|
||||
|
||||
// Key returns the key of the node with the given `id`.
|
||||
// It will return ErrNoPath, if the node does not exist.
|
||||
func (da *Cedar) Key(id int) (key []byte, err error) {
|
||||
for id > 0 {
|
||||
from := da.Array[id].Check
|
||||
if from < 0 {
|
||||
return nil, ErrNoPath
|
||||
}
|
||||
if char := byte(da.Array[from].base() ^ id); char != 0 {
|
||||
key = append(key, char)
|
||||
}
|
||||
id = from
|
||||
}
|
||||
if id != 0 || len(key) == 0 {
|
||||
return nil, ErrInvalidKey
|
||||
}
|
||||
for i := 0; i < len(key)/2; i++ {
|
||||
key[i], key[len(key)-i-1] = key[len(key)-i-1], key[i]
|
||||
}
|
||||
return key, nil
|
||||
}
|
||||
|
||||
// Value returns the value of the node with the given `id`.
|
||||
// It will return ErrNoValue, if the node does not have a value.
|
||||
func (da *Cedar) Value(id int) (value int, err error) {
|
||||
value = da.Array[id].Value
|
||||
if value >= 0 {
|
||||
return value, nil
|
||||
}
|
||||
to := da.Array[id].base()
|
||||
if da.Array[to].Check == id && da.Array[to].Value >= 0 {
|
||||
return da.Array[to].Value, nil
|
||||
}
|
||||
return 0, ErrNoValue
|
||||
}
|
||||
|
||||
// Insert adds a key-value pair into the cedar.
|
||||
// It will return ErrInvalidValue, if value < 0 or >= ValueLimit.
|
||||
func (da *Cedar) Insert(key []byte, value int) error {
|
||||
if value < 0 || value >= ValueLimit {
|
||||
return ErrInvalidValue
|
||||
}
|
||||
p := da.get(key, 0, 0)
|
||||
*p = value
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update increases the value associated with the `key`.
|
||||
// The `key` will be inserted if it is not in the cedar.
|
||||
// It will return ErrInvalidValue, if the updated value < 0 or >= ValueLimit.
|
||||
func (da *Cedar) Update(key []byte, value int) error {
|
||||
p := da.get(key, 0, 0)
|
||||
|
||||
// key was not inserted
|
||||
if *p == ValueLimit {
|
||||
*p = value
|
||||
return nil
|
||||
}
|
||||
|
||||
// key was inserted before
|
||||
if *p+value < 0 || *p+value >= ValueLimit {
|
||||
return ErrInvalidValue
|
||||
}
|
||||
*p += value
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete removes a key-value pair from the cedar.
|
||||
// It will return ErrNoPath, if the key has not been added.
|
||||
func (da *Cedar) Delete(key []byte) error {
|
||||
// if the path does not exist, or the end is not a leaf,
|
||||
// nothing to delete
|
||||
to, err := da.Jump(key, 0)
|
||||
if err != nil {
|
||||
return ErrNoPath
|
||||
}
|
||||
|
||||
if da.Array[to].Value < 0 {
|
||||
base := da.Array[to].base()
|
||||
if da.Array[base].Check == to {
|
||||
to = base
|
||||
}
|
||||
}
|
||||
|
||||
for to > 0 {
|
||||
from := da.Array[to].Check
|
||||
base := da.Array[from].base()
|
||||
label := byte(to ^ base)
|
||||
|
||||
// if `to` has sibling, remove `to` from the sibling list, then stop
|
||||
if da.Ninfos[to].Sibling != 0 || da.Ninfos[from].Child != label {
|
||||
// delete the label from the child ring first
|
||||
da.popSibling(from, base, label)
|
||||
// then release the current node `to` to the empty node ring
|
||||
da.pushEnode(to)
|
||||
break
|
||||
}
|
||||
// otherwise, just release the current node `to` to the empty node ring
|
||||
da.pushEnode(to)
|
||||
// then check its parent node
|
||||
to = from
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get returns the value associated with the given `key`.
|
||||
// It is equivalent to
|
||||
// id, err1 = Jump(key)
|
||||
// value, err2 = Value(id)
|
||||
// Thus, it may return ErrNoPath or ErrNoValue,
|
||||
func (da *Cedar) Get(key []byte) (value int, err error) {
|
||||
to, err := da.Jump(key, 0)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return da.Value(to)
|
||||
}
|
||||
|
||||
// PrefixMatch returns a list of at most `num` nodes
|
||||
// which match the prefix of the key.
|
||||
// If `num` is 0, it returns all matches.
|
||||
// For example, if the following keys were inserted:
|
||||
// id key
|
||||
// 19 abc
|
||||
// 23 ab
|
||||
// 37 abcd
|
||||
// then
|
||||
// PrefixMatch([]byte("abc"), 1) = [ 23 ] // match ["ab"]
|
||||
// PrefixMatch([]byte("abcd"), 0) = [ 23, 19, 37]
|
||||
// match ["ab", "abc", "abcd"]
|
||||
func (da *Cedar) PrefixMatch(key []byte, num int) (ids []int) {
|
||||
for from, i := 0, 0; i < len(key); i++ {
|
||||
to, err := da.Jump(key[i:i+1], from)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
if _, err := da.Value(to); err == nil {
|
||||
ids = append(ids, to)
|
||||
num--
|
||||
if num == 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
from = to
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// PrefixPredict returns a list of at most `num` nodes
|
||||
// which has the key as their prefix.
|
||||
// These nodes are ordered by their keys.
|
||||
// If `num` is 0, it returns all matches.
|
||||
// For example, if the following keys were inserted:
|
||||
// id key
|
||||
// 19 abc
|
||||
// 23 ab
|
||||
// 37 abcd
|
||||
// then
|
||||
// PrefixPredict([]byte("ab"), 2) = [ 23, 19 ] // predict ["ab", "abc"]
|
||||
// PrefixPredict([]byte("ab"), 0) = [ 23, 19, 37 ]
|
||||
// predict ["ab", "abc", "abcd"]
|
||||
func (da *Cedar) PrefixPredict(key []byte, num int) (ids []int) {
|
||||
root, err := da.Jump(key, 0)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for from, err := da.begin(root); err == nil; from, err = da.next(from, root) {
|
||||
ids = append(ids, from)
|
||||
num--
|
||||
if num == 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (da *Cedar) begin(from int) (to int, err error) {
|
||||
for c := da.Ninfos[from].Child; c != 0; {
|
||||
to = da.Array[from].base() ^ int(c)
|
||||
c = da.Ninfos[to].Child
|
||||
from = to
|
||||
}
|
||||
if da.Array[from].base() > 0 {
|
||||
return da.Array[from].base(), nil
|
||||
}
|
||||
return from, nil
|
||||
}
|
||||
|
||||
func (da *Cedar) next(from int, root int) (to int, err error) {
|
||||
c := da.Ninfos[from].Sibling
|
||||
for c == 0 && from != root && da.Array[from].Check >= 0 {
|
||||
from = da.Array[from].Check
|
||||
c = da.Ninfos[from].Sibling
|
||||
}
|
||||
if from == root {
|
||||
return 0, ErrNoPath
|
||||
}
|
||||
from = da.Array[da.Array[from].Check].base() ^ int(c)
|
||||
return da.begin(from)
|
||||
}
|
472
vendor/github.com/go-ego/cedar/cedar.go
generated
vendored
Normal file
472
vendor/github.com/go-ego/cedar/cedar.go
generated
vendored
Normal file
@ -0,0 +1,472 @@
|
||||
package cedar
|
||||
|
||||
const (
|
||||
// ValueLimit limit value
|
||||
ValueLimit = int(^uint(0) >> 1)
|
||||
)
|
||||
|
||||
type node struct {
|
||||
Value int
|
||||
Check int
|
||||
}
|
||||
|
||||
func (n *node) base() int {
|
||||
return -(n.Value + 1)
|
||||
}
|
||||
|
||||
type ninfo struct {
|
||||
Sibling, Child byte
|
||||
}
|
||||
|
||||
type block struct {
|
||||
Prev, Next, Num, Reject, Trial, Ehead int
|
||||
}
|
||||
|
||||
func (b *block) init() {
|
||||
b.Num = 256
|
||||
b.Reject = 257
|
||||
}
|
||||
|
||||
// Cedar cedar struct
|
||||
type Cedar struct {
|
||||
*cedar
|
||||
}
|
||||
|
||||
type cedar struct {
|
||||
Array []node
|
||||
Ninfos []ninfo
|
||||
Blocks []block
|
||||
Reject [257]int
|
||||
BheadF int
|
||||
BheadC int
|
||||
BheadO int
|
||||
Capacity int
|
||||
Size int
|
||||
Ordered bool
|
||||
MaxTrial int
|
||||
}
|
||||
|
||||
// New new cedar
|
||||
func New() *Cedar {
|
||||
da := cedar{
|
||||
Array: make([]node, 256),
|
||||
Ninfos: make([]ninfo, 256),
|
||||
Blocks: make([]block, 1),
|
||||
Capacity: 256,
|
||||
Size: 256,
|
||||
Ordered: true,
|
||||
MaxTrial: 1,
|
||||
}
|
||||
|
||||
da.Array[0] = node{-2, 0}
|
||||
for i := 1; i < 256; i++ {
|
||||
da.Array[i] = node{-(i - 1), -(i + 1)}
|
||||
}
|
||||
da.Array[1].Value = -255
|
||||
da.Array[255].Check = -1
|
||||
|
||||
da.Blocks[0].Ehead = 1
|
||||
da.Blocks[0].init()
|
||||
|
||||
for i := 0; i <= 256; i++ {
|
||||
da.Reject[i] = i + 1
|
||||
}
|
||||
|
||||
return &Cedar{&da}
|
||||
}
|
||||
|
||||
// Get value by key, insert the key if not exist
|
||||
func (da *cedar) get(key []byte, from, pos int) *int {
|
||||
for ; pos < len(key); pos++ {
|
||||
if value := da.Array[from].Value; value >= 0 && value != ValueLimit {
|
||||
to := da.follow(from, 0)
|
||||
da.Array[to].Value = value
|
||||
}
|
||||
from = da.follow(from, key[pos])
|
||||
}
|
||||
to := from
|
||||
if da.Array[from].Value < 0 {
|
||||
to = da.follow(from, 0)
|
||||
}
|
||||
return &da.Array[to].Value
|
||||
}
|
||||
|
||||
func (da *cedar) follow(from int, label byte) int {
|
||||
base := da.Array[from].base()
|
||||
to := base ^ int(label)
|
||||
|
||||
if base < 0 || da.Array[to].Check < 0 {
|
||||
hasChild := false
|
||||
if base >= 0 {
|
||||
hasChild = (da.Array[base^int(da.Ninfos[from].Child)].Check == from)
|
||||
}
|
||||
to = da.popEnode(base, label, from)
|
||||
da.pushSibling(from, to^int(label), label, hasChild)
|
||||
|
||||
return to
|
||||
}
|
||||
|
||||
if da.Array[to].Check != from {
|
||||
to = da.resolve(from, base, label)
|
||||
return to
|
||||
}
|
||||
|
||||
if da.Array[to].Check == from {
|
||||
return to
|
||||
}
|
||||
|
||||
panic("cedar: internal error, should not be here")
|
||||
// return to
|
||||
}
|
||||
|
||||
func (da *cedar) popBlock(bi int, headIn *int, last bool) {
|
||||
if last {
|
||||
*headIn = 0
|
||||
return
|
||||
}
|
||||
|
||||
b := &da.Blocks[bi]
|
||||
da.Blocks[b.Prev].Next = b.Next
|
||||
da.Blocks[b.Next].Prev = b.Prev
|
||||
if bi == *headIn {
|
||||
*headIn = b.Next
|
||||
}
|
||||
}
|
||||
|
||||
func (da *cedar) pushBlock(bi int, headOut *int, empty bool) {
|
||||
b := &da.Blocks[bi]
|
||||
if empty {
|
||||
*headOut, b.Prev, b.Next = bi, bi, bi
|
||||
} else {
|
||||
tailOut := &da.Blocks[*headOut].Prev
|
||||
b.Prev = *tailOut
|
||||
b.Next = *headOut
|
||||
*headOut, *tailOut, da.Blocks[*tailOut].Next = bi, bi, bi
|
||||
}
|
||||
}
|
||||
|
||||
func (da *cedar) addBlock() int {
|
||||
if da.Size == da.Capacity {
|
||||
da.Capacity *= 2
|
||||
|
||||
oldArray := da.Array
|
||||
da.Array = make([]node, da.Capacity)
|
||||
copy(da.Array, oldArray)
|
||||
|
||||
oldNinfo := da.Ninfos
|
||||
da.Ninfos = make([]ninfo, da.Capacity)
|
||||
copy(da.Ninfos, oldNinfo)
|
||||
|
||||
oldBlock := da.Blocks
|
||||
da.Blocks = make([]block, da.Capacity>>8)
|
||||
copy(da.Blocks, oldBlock)
|
||||
}
|
||||
|
||||
da.Blocks[da.Size>>8].init()
|
||||
da.Blocks[da.Size>>8].Ehead = da.Size
|
||||
|
||||
da.Array[da.Size] = node{-(da.Size + 255), -(da.Size + 1)}
|
||||
for i := da.Size + 1; i < da.Size+255; i++ {
|
||||
da.Array[i] = node{-(i - 1), -(i + 1)}
|
||||
}
|
||||
da.Array[da.Size+255] = node{-(da.Size + 254), -da.Size}
|
||||
|
||||
da.pushBlock(da.Size>>8, &da.BheadO, da.BheadO == 0)
|
||||
da.Size += 256
|
||||
return da.Size>>8 - 1
|
||||
}
|
||||
|
||||
func (da *cedar) transferBlock(bi int, headIn, headOut *int) {
|
||||
da.popBlock(bi, headIn, bi == da.Blocks[bi].Next)
|
||||
da.pushBlock(bi, headOut, *headOut == 0 && da.Blocks[bi].Num != 0)
|
||||
}
|
||||
|
||||
func (da *cedar) popEnode(base int, label byte, from int) int {
|
||||
e := base ^ int(label)
|
||||
if base < 0 {
|
||||
e = da.findPlace()
|
||||
}
|
||||
bi := e >> 8
|
||||
n := &da.Array[e]
|
||||
b := &da.Blocks[bi]
|
||||
b.Num--
|
||||
if b.Num == 0 {
|
||||
if bi != 0 {
|
||||
da.transferBlock(bi, &da.BheadC, &da.BheadF)
|
||||
}
|
||||
} else {
|
||||
da.Array[-n.Value].Check = n.Check
|
||||
da.Array[-n.Check].Value = n.Value
|
||||
if e == b.Ehead {
|
||||
b.Ehead = -n.Check
|
||||
}
|
||||
if bi != 0 && b.Num == 1 && b.Trial != da.MaxTrial {
|
||||
da.transferBlock(bi, &da.BheadO, &da.BheadC)
|
||||
}
|
||||
}
|
||||
|
||||
n.Value = ValueLimit
|
||||
n.Check = from
|
||||
if base < 0 {
|
||||
da.Array[from].Value = -(e ^ int(label)) - 1
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
func (da *cedar) pushEnode(e int) {
|
||||
bi := e >> 8
|
||||
b := &da.Blocks[bi]
|
||||
b.Num++
|
||||
|
||||
if b.Num == 1 {
|
||||
b.Ehead = e
|
||||
da.Array[e] = node{-e, -e}
|
||||
if bi != 0 {
|
||||
da.transferBlock(bi, &da.BheadF, &da.BheadC)
|
||||
}
|
||||
} else {
|
||||
prev := b.Ehead
|
||||
next := -da.Array[prev].Check
|
||||
da.Array[e] = node{-prev, -next}
|
||||
da.Array[prev].Check = -e
|
||||
da.Array[next].Value = -e
|
||||
if b.Num == 2 || b.Trial == da.MaxTrial {
|
||||
if bi != 0 {
|
||||
da.transferBlock(bi, &da.BheadC, &da.BheadO)
|
||||
}
|
||||
}
|
||||
b.Trial = 0
|
||||
}
|
||||
|
||||
if b.Reject < da.Reject[b.Num] {
|
||||
b.Reject = da.Reject[b.Num]
|
||||
}
|
||||
da.Ninfos[e] = ninfo{}
|
||||
}
|
||||
|
||||
// hasChild: wherether the `from` node has children
|
||||
func (da *cedar) pushSibling(from, base int, label byte, hasChild bool) {
|
||||
c := &da.Ninfos[from].Child
|
||||
keepOrder := *c == 0
|
||||
if da.Ordered {
|
||||
keepOrder = label > *c
|
||||
}
|
||||
|
||||
if hasChild && keepOrder {
|
||||
c = &da.Ninfos[base^int(*c)].Sibling
|
||||
for da.Ordered && *c != 0 && *c < label {
|
||||
c = &da.Ninfos[base^int(*c)].Sibling
|
||||
}
|
||||
}
|
||||
|
||||
da.Ninfos[base^int(label)].Sibling = *c
|
||||
*c = label
|
||||
}
|
||||
|
||||
func (da *cedar) popSibling(from, base int, label byte) {
|
||||
c := &da.Ninfos[from].Child
|
||||
for *c != label {
|
||||
c = &da.Ninfos[base^int(*c)].Sibling
|
||||
}
|
||||
*c = da.Ninfos[base^int(*c)].Sibling
|
||||
}
|
||||
|
||||
func (da *cedar) consult(baseN, baseP int, cN, cP byte) bool {
|
||||
cN = da.Ninfos[baseN^int(cN)].Sibling
|
||||
cP = da.Ninfos[baseP^int(cP)].Sibling
|
||||
for cN != 0 && cP != 0 {
|
||||
cN = da.Ninfos[baseN^int(cN)].Sibling
|
||||
cP = da.Ninfos[baseP^int(cP)].Sibling
|
||||
}
|
||||
return cP != 0
|
||||
}
|
||||
|
||||
func (da *cedar) setChild(base int, c byte, label byte, flag bool) []byte {
|
||||
child := make([]byte, 0, 257)
|
||||
if c == 0 {
|
||||
child = append(child, c)
|
||||
c = da.Ninfos[base^int(c)].Sibling
|
||||
}
|
||||
if da.Ordered {
|
||||
for c != 0 && c <= label {
|
||||
child = append(child, c)
|
||||
c = da.Ninfos[base^int(c)].Sibling
|
||||
}
|
||||
}
|
||||
if flag {
|
||||
child = append(child, label)
|
||||
}
|
||||
for c != 0 {
|
||||
child = append(child, c)
|
||||
c = da.Ninfos[base^int(c)].Sibling
|
||||
}
|
||||
return child
|
||||
}
|
||||
|
||||
func (da *cedar) findPlace() int {
|
||||
if da.BheadC != 0 {
|
||||
return da.Blocks[da.BheadC].Ehead
|
||||
}
|
||||
if da.BheadO != 0 {
|
||||
return da.Blocks[da.BheadO].Ehead
|
||||
}
|
||||
return da.addBlock() << 8
|
||||
}
|
||||
|
||||
func (da *cedar) findPlaces(child []byte) int {
|
||||
bi := da.BheadO
|
||||
if bi != 0 {
|
||||
e := da.listBi(bi, child)
|
||||
if e > 0 {
|
||||
return e
|
||||
}
|
||||
}
|
||||
return da.addBlock() << 8
|
||||
}
|
||||
|
||||
func (da *cedar) listBi(bi int, child []byte) int {
|
||||
nc := len(child)
|
||||
bz := da.Blocks[da.BheadO].Prev
|
||||
for {
|
||||
b := &da.Blocks[bi]
|
||||
if b.Num >= nc && nc < b.Reject {
|
||||
e := da.listEhead(b, child)
|
||||
if e > 0 {
|
||||
return e
|
||||
}
|
||||
}
|
||||
b.Reject = nc
|
||||
if b.Reject < da.Reject[b.Num] {
|
||||
da.Reject[b.Num] = b.Reject
|
||||
}
|
||||
|
||||
biN := b.Next
|
||||
b.Trial++
|
||||
if b.Trial == da.MaxTrial {
|
||||
da.transferBlock(bi, &da.BheadO, &da.BheadC)
|
||||
}
|
||||
if bi == bz {
|
||||
break
|
||||
}
|
||||
bi = biN
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (da *cedar) listEhead(b *block, child []byte) int {
|
||||
for e := b.Ehead; ; {
|
||||
base := e ^ int(child[0])
|
||||
for i := 0; da.Array[base^int(child[i])].Check < 0; i++ {
|
||||
if i == len(child)-1 {
|
||||
b.Ehead = e
|
||||
// if e == 0 {
|
||||
// }
|
||||
return e
|
||||
}
|
||||
}
|
||||
e = -da.Array[e].Check
|
||||
if e == b.Ehead {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (da *cedar) resolve(fromN, baseN int, labelN byte) int {
|
||||
toPn := baseN ^ int(labelN)
|
||||
fromP := da.Array[toPn].Check
|
||||
baseP := da.Array[fromP].base()
|
||||
flag := da.consult(baseN, baseP, da.Ninfos[fromN].Child, da.Ninfos[fromP].Child)
|
||||
|
||||
var children []byte
|
||||
if flag {
|
||||
children = da.setChild(baseN, da.Ninfos[fromN].Child, labelN, true)
|
||||
} else {
|
||||
children = da.setChild(baseP, da.Ninfos[fromP].Child, 255, false)
|
||||
}
|
||||
|
||||
var base int
|
||||
if len(children) == 1 {
|
||||
base = da.findPlace()
|
||||
} else {
|
||||
base = da.findPlaces(children)
|
||||
}
|
||||
base ^= int(children[0])
|
||||
|
||||
var (
|
||||
from int
|
||||
nbase int
|
||||
)
|
||||
|
||||
if flag {
|
||||
from = fromN
|
||||
nbase = baseN
|
||||
} else {
|
||||
from = fromP
|
||||
nbase = baseP
|
||||
}
|
||||
|
||||
if flag && children[0] == labelN {
|
||||
da.Ninfos[from].Child = labelN
|
||||
}
|
||||
|
||||
da.Array[from].Value = -base - 1
|
||||
base, labelN, toPn = da.list(base, from, nbase, fromN, toPn,
|
||||
labelN, children, flag)
|
||||
|
||||
if flag {
|
||||
return base ^ int(labelN)
|
||||
}
|
||||
|
||||
return toPn
|
||||
}
|
||||
|
||||
func (da *cedar) list(base, from, nbase, fromN, toPn int,
|
||||
labelN byte, children []byte, flag bool) (int, byte, int) {
|
||||
for i := 0; i < len(children); i++ {
|
||||
to := da.popEnode(base, children[i], from)
|
||||
newTo := nbase ^ int(children[i])
|
||||
|
||||
if i == len(children)-1 {
|
||||
da.Ninfos[to].Sibling = 0
|
||||
} else {
|
||||
da.Ninfos[to].Sibling = children[i+1]
|
||||
}
|
||||
|
||||
if flag && newTo == toPn { // new node has no child
|
||||
continue
|
||||
}
|
||||
|
||||
n := &da.Array[to]
|
||||
ns := &da.Array[newTo]
|
||||
n.Value = ns.Value
|
||||
if n.Value < 0 && children[i] != 0 {
|
||||
// this node has children, fix their check
|
||||
c := da.Ninfos[newTo].Child
|
||||
da.Ninfos[to].Child = c
|
||||
da.Array[n.base()^int(c)].Check = to
|
||||
c = da.Ninfos[n.base()^int(c)].Sibling
|
||||
for c != 0 {
|
||||
da.Array[n.base()^int(c)].Check = to
|
||||
c = da.Ninfos[n.base()^int(c)].Sibling
|
||||
}
|
||||
}
|
||||
|
||||
if !flag && newTo == fromN { // parent node moved
|
||||
fromN = to
|
||||
}
|
||||
|
||||
if !flag && newTo == toPn {
|
||||
da.pushSibling(fromN, toPn^int(labelN), labelN, true)
|
||||
da.Ninfos[newTo].Child = 0
|
||||
ns.Value = ValueLimit
|
||||
ns.Check = fromN
|
||||
} else {
|
||||
da.pushEnode(newTo)
|
||||
}
|
||||
}
|
||||
|
||||
return base, labelN, toPn
|
||||
}
|
18
vendor/github.com/go-ego/cedar/circle.yml
generated
vendored
Normal file
18
vendor/github.com/go-ego/cedar/circle.yml
generated
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
version: 2
|
||||
|
||||
jobs:
|
||||
build:
|
||||
docker:
|
||||
- image: govgo/go:1.10.3
|
||||
working_directory: /gopath/src/github.com/go-ego/cedar
|
||||
steps:
|
||||
- checkout
|
||||
# - run:
|
||||
# name: "Build & Test"
|
||||
# command: make dev
|
||||
# specify any bash command here prefixed with `run: `
|
||||
- run: go get -v -t -d ./...
|
||||
- run: go test -v ./...
|
||||
# codecov.io
|
||||
- run: go test -v -covermode=count -coverprofile=coverage.out
|
||||
- run: bash <(curl -s https://codecov.io/bash)
|
14
vendor/github.com/go-ego/cedar/doc.go
generated
vendored
Normal file
14
vendor/github.com/go-ego/cedar/doc.go
generated
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
// Package cedar implements double-array trie.
|
||||
//
|
||||
// It is a golang port of cedar (http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar)
|
||||
// which is written in C++ by Naoki Yoshinaga.
|
||||
// Currently cedar-go implements the `reduced` verion of cedar.
|
||||
// This package is not thread safe if there is one goroutine doing
|
||||
// insertions or deletions.
|
||||
//
|
||||
// Note
|
||||
//
|
||||
// key must be `[]byte` without zero items,
|
||||
// while value must be integer in the range [0, 2<<63-2] or
|
||||
// [0, 2<<31-2] depends on the platform.
|
||||
package cedar
|
18
vendor/github.com/go-ego/cedar/errors.go
generated
vendored
Normal file
18
vendor/github.com/go-ego/cedar/errors.go
generated
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
package cedar
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrInvalidDataType invalid data type error
|
||||
ErrInvalidDataType = errors.New("cedar: invalid datatype")
|
||||
// ErrInvalidValue invalid value error
|
||||
ErrInvalidValue = errors.New("cedar: invalid value")
|
||||
// ErrInvalidKey invalid key error
|
||||
ErrInvalidKey = errors.New("cedar: invalid key")
|
||||
// ErrNoPath no path error
|
||||
ErrNoPath = errors.New("cedar: no path")
|
||||
// ErrNoValue no value error
|
||||
ErrNoValue = errors.New("cedar: no value")
|
||||
)
|
63
vendor/github.com/go-ego/cedar/io.go
generated
vendored
Normal file
63
vendor/github.com/go-ego/cedar/io.go
generated
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
package cedar
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/gob"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Save saves the cedar to an io.Writer,
|
||||
// where dataType is either "json" or "gob".
|
||||
func (da *Cedar) Save(out io.Writer, dataType string) error {
|
||||
switch dataType {
|
||||
case "gob", "GOB":
|
||||
dataEecoder := gob.NewEncoder(out)
|
||||
return dataEecoder.Encode(da.cedar)
|
||||
case "json", "JSON":
|
||||
dataEecoder := json.NewEncoder(out)
|
||||
return dataEecoder.Encode(da.cedar)
|
||||
}
|
||||
return ErrInvalidDataType
|
||||
}
|
||||
|
||||
// SaveToFile saves the cedar to a file,
|
||||
// where dataType is either "json" or "gob".
|
||||
func (da *Cedar) SaveToFile(fileName string, dataType string) error {
|
||||
file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY, 0666)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
out := bufio.NewWriter(file)
|
||||
defer out.Flush()
|
||||
da.Save(out, dataType)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Load loads the cedar from an io.Writer,
|
||||
// where dataType is either "json" or "gob".
|
||||
func (da *Cedar) Load(in io.Reader, dataType string) error {
|
||||
switch dataType {
|
||||
case "gob", "GOB":
|
||||
dataDecoder := gob.NewDecoder(in)
|
||||
return dataDecoder.Decode(da.cedar)
|
||||
case "json", "JSON":
|
||||
dataDecoder := json.NewDecoder(in)
|
||||
return dataDecoder.Decode(da.cedar)
|
||||
}
|
||||
return ErrInvalidDataType
|
||||
}
|
||||
|
||||
// LoadFromFile loads the cedar from a file,
|
||||
// where dataType is either "json" or "gob".
|
||||
func (da *Cedar) LoadFromFile(fileName string, dataType string) error {
|
||||
file, err := os.OpenFile(fileName, os.O_RDONLY, 0600)
|
||||
defer file.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
in := bufio.NewReader(file)
|
||||
return da.Load(in, dataType)
|
||||
}
|
28
vendor/github.com/go-ego/gpy/BUILD.bazel
generated
vendored
Normal file
28
vendor/github.com/go-ego/gpy/BUILD.bazel
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"doc.go",
|
||||
"phonetic_symbol.go",
|
||||
"pinyin.go",
|
||||
"pinyin_dict.go",
|
||||
],
|
||||
importmap = "go-common/vendor/github.com/go-ego/gpy",
|
||||
importpath = "github.com/go-ego/gpy",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
4
vendor/github.com/go-ego/gpy/CHANGELOG.md
generated
vendored
Normal file
4
vendor/github.com/go-ego/gpy/CHANGELOG.md
generated
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
# Changelog
|
||||
|
||||
## 0.1.0 (2017-6-23)
|
||||
* Initial Release
|
69
vendor/github.com/go-ego/gpy/CONTRIBUTING.md
generated
vendored
Normal file
69
vendor/github.com/go-ego/gpy/CONTRIBUTING.md
generated
vendored
Normal file
@ -0,0 +1,69 @@
|
||||
# Contribution Guidelines
|
||||
|
||||
## Introduction
|
||||
|
||||
This document explains how to contribute changes to the Ego project. It assumes you have followed the README.md and [API Document](https://github.com/go-ego/gpy/blob/master/docs/doc.md). <!--Sensitive security-related issues should be reported to [security@Ego.io](mailto:security@Ego.io.)-->
|
||||
|
||||
## Bug reports
|
||||
|
||||
Please search the issues on the issue tracker with a variety of keywords to ensure your bug is not already reported.
|
||||
|
||||
If unique, [open an issue](https://github.com/go-ego/gpy/issues/new) and answer the questions so we can understand and reproduce the problematic behavior.
|
||||
|
||||
The burden is on you to convince us that it is actually a bug in Ego. This is easiest to do when you write clear, concise instructions so we can reproduce the behavior (even if it seems obvious). The more detailed and specific you are, the faster we will be able to help you. Check out [How to Report Bugs Effectively](http://www.chiark.greenend.org.uk/~sgtatham/bugs.html).
|
||||
|
||||
Please be kind, remember that Ego comes at no cost to you, and you're getting free help.
|
||||
|
||||
## Discuss your design
|
||||
|
||||
The project welcomes submissions but please let everyone know what you're working on if you want to change or add something to the Ego repositories.
|
||||
|
||||
Before starting to write something new for the Ego project, please [file an issue](https://github.com/go-ego/gpy/issues/new). Significant changes must go through the [change proposal process](https://github.com/go-ego/proposals) before they can be accepted.
|
||||
|
||||
This process gives everyone a chance to validate the design, helps prevent duplication of effort, and ensures that the idea fits inside the goals for the project and tools. It also checks that the design is sound before code is written; the code review tool is not the place for high-level discussions.
|
||||
|
||||
## Testing redux
|
||||
|
||||
Before sending code out for review, run all the tests for the whole tree to make sure the changes don't break other usage and keep the compatibility on upgrade. You must be test on Mac, Windows, Linux and other. You should install the CLI for Circle CI, as we are using the server for continous testing.
|
||||
|
||||
## Code review
|
||||
|
||||
In addition to the owner, Changes to Ego must be reviewed before they are accepted, no matter who makes the change even if it is a maintainer. We use GitHub's pull request workflow to do that and we also use [LGTM](http://lgtm.co) to ensure every PR is reviewed by vz or least 2 maintainers.
|
||||
|
||||
|
||||
## Sign your work
|
||||
|
||||
The sign-off is a simple line at the end of the explanation for the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch.
|
||||
|
||||
## Maintainers
|
||||
|
||||
To make sure every PR is checked, we got team maintainers. A maintainer should be a contributor of Ego and contributed at least 4 accepted PRs.
|
||||
|
||||
## Owners
|
||||
|
||||
Since Ego is a pure community organization without any company support, Copyright 2016 The go-ego Project Developers.
|
||||
|
||||
|
||||
## Versions
|
||||
|
||||
Ego has the `master` branch as a tip branch and has version branches such as `v0.30.0`. `v0.40.0` is a release branch and we will tag `v0.40.0` for binary download. If `v0.40.0` has bugs, we will accept pull requests on the `v0.40.0` branch and publish a `v0.40.1` tag, after bringing the bug fix also to the master branch.
|
||||
|
||||
Since the `master` branch is a tip version, if you wish to use Ego in production, please download the latest release tag version. All the branches will be protected via GitHub, all the PRs to every branch must be reviewed by two maintainers and must pass the automatic tests.
|
||||
|
||||
## Copyright
|
||||
|
||||
Code that you contribute should use the standard copyright header:
|
||||
|
||||
```
|
||||
// Copyright 2016 The go-ego Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// https://github.com/go-ego/gpy/blob/master/LICENSE
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
```
|
||||
|
||||
Files in the repository contain copyright from the year they are added to the year they are last changed. If the copyright author is changed, just paste the header below the old one.
|
201
vendor/github.com/go-ego/gpy/LICENSE
generated
vendored
Normal file
201
vendor/github.com/go-ego/gpy/LICENSE
generated
vendored
Normal file
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
20
vendor/github.com/go-ego/gpy/Makefile
generated
vendored
Normal file
20
vendor/github.com/go-ego/gpy/Makefile
generated
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
help:
|
||||
@echo "test run test"
|
||||
@echo "lint run lint"
|
||||
@echo "gen_pinyin_dict gen pinyin dict"
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
@echo "run test"
|
||||
@go test -v -cover
|
||||
|
||||
.PHONY: gen_pinyin_dict
|
||||
gen_pinyin_dict:
|
||||
@go run tools/gen_pinyin_dict.go tools/pinyin-data/pinyin.txt pinyin_dict.go
|
||||
|
||||
.PHONY: lint
|
||||
lint:
|
||||
gofmt -s -w . pinyin tools
|
||||
golint .
|
||||
golint pinyin
|
||||
golint tools
|
94
vendor/github.com/go-ego/gpy/README.md
generated
vendored
Normal file
94
vendor/github.com/go-ego/gpy/README.md
generated
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
# gpy
|
||||
|
||||
[](https://circleci.com/gh/go-ego/gpy)
|
||||
[](https://travis-ci.org/go-ego/gpy)<!-- [](https://coveralls.io/r/github.com/go-ego/gpy?branch=master) -->
|
||||
[](https://codecov.io/gh/go-ego/gpy)
|
||||
[](https://goreportcard.com/report/github.com/go-ego/gpy)
|
||||
[](https://godoc.org/github.com/go-ego/gpy)
|
||||
|
||||
汉语拼音转换工具 Go 版。
|
||||
|
||||
[简体中文](https://github.com/go-ego/gpy/blob/master/README_zh.md)
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
```
|
||||
go get -u github.com/go-ego/gpy
|
||||
```
|
||||
|
||||
### install CLI tool:
|
||||
|
||||
```
|
||||
go get -u github.com/go-ego/gpy/pinyin
|
||||
$ gpy 中国人
|
||||
zhōng guó rén
|
||||
```
|
||||
|
||||
|
||||
## Documentation
|
||||
|
||||
API documentation can be found here:
|
||||
[godoc](https://godoc.org/github.com/go-ego/gpy)
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-ego/gpy"
|
||||
)
|
||||
|
||||
func main() {
|
||||
hans := "中国人"
|
||||
|
||||
// 默认
|
||||
a := gpy.NewArgs()
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zhong] [guo] [ren]]
|
||||
|
||||
// 包含声调
|
||||
a.Style = gpy.Tone
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zhōng] [guó] [rén]]
|
||||
|
||||
// 声调用数字表示
|
||||
a.Style = gpy.Tone2
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zho1ng] [guo2] [re2n]]
|
||||
|
||||
// 开启多音字模式
|
||||
a = gpy.NewArgs()
|
||||
a.Heteronym = true
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zhong zhong] [guo] [ren]]
|
||||
a.Style = gpy.Tone2
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zho1ng zho4ng] [guo2] [re2n]]
|
||||
|
||||
fmt.Println(gpy.LazyPinyin(hans, gpy.NewArgs()))
|
||||
// [zhong guo ren]
|
||||
|
||||
fmt.Println(gpy.Convert(hans, nil))
|
||||
// [[zhong] [guo] [ren]]
|
||||
|
||||
fmt.Println(gpy.LazyConvert(hans, nil))
|
||||
// [zhong guo ren]
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Related Projects
|
||||
|
||||
* [hotoo/pinyin](https://github.com/hotoo/pinyin): 汉语拼音转换工具 Node.js/JavaScript 版。
|
||||
* [mozillazg/python-pinyin](https://github.com/mozillazg/python-pinyin): 汉语拼音转换工具 Python 版。
|
||||
* [mozillazg/rust-pinyin](https://github.com/mozillazg/rust-pinyin): 汉语拼音转换工具 Rust 版。
|
||||
|
||||
|
||||
## License
|
||||
|
||||
Under the MIT License, base on [go-pinyin](https://github.com/mozillazg/go-pinyin).
|
92
vendor/github.com/go-ego/gpy/README_zh.md
generated
vendored
Normal file
92
vendor/github.com/go-ego/gpy/README_zh.md
generated
vendored
Normal file
@ -0,0 +1,92 @@
|
||||
# gpy
|
||||
|
||||
[](https://circleci.com/gh/go-ego/gpy)
|
||||
[](https://travis-ci.org/go-ego/gpy)<!-- [](https://coveralls.io/r/github.com/go-ego/gpy?branch=master) -->
|
||||
[](https://codecov.io/gh/go-ego/gpy)
|
||||
[](https://goreportcard.com/report/github.com/go-ego/gpy)
|
||||
[](https://godoc.org/github.com/go-ego/gpy)
|
||||
|
||||
汉语拼音转换工具 Go 版。
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
```
|
||||
go get -u github.com/go-ego/gpy
|
||||
```
|
||||
|
||||
### install CLI tool:
|
||||
|
||||
```
|
||||
go get -u github.com/go-ego/gpy/pinyin
|
||||
$ gpy 中国人
|
||||
zhōng guó rén
|
||||
```
|
||||
|
||||
|
||||
## Documentation
|
||||
|
||||
API documentation can be found here:
|
||||
[godoc](https://godoc.org/github.com/go-ego/gpy)
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-ego/gpy"
|
||||
)
|
||||
|
||||
func main() {
|
||||
hans := "中国人"
|
||||
|
||||
// 默认
|
||||
a := gpy.NewArgs()
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zhong] [guo] [ren]]
|
||||
|
||||
// 包含声调
|
||||
a.Style = gpy.Tone
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zhōng] [guó] [rén]]
|
||||
|
||||
// 声调用数字表示
|
||||
a.Style = gpy.Tone2
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zho1ng] [guo2] [re2n]]
|
||||
|
||||
// 开启多音字模式
|
||||
a = gpy.NewArgs()
|
||||
a.Heteronym = true
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zhong zhong] [guo] [ren]]
|
||||
a.Style = gpy.Tone2
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zho1ng zho4ng] [guo2] [re2n]]
|
||||
|
||||
fmt.Println(gpy.LazyPinyin(hans, gpy.NewArgs()))
|
||||
// [zhong guo ren]
|
||||
|
||||
fmt.Println(gpy.Convert(hans, nil))
|
||||
// [[zhong] [guo] [ren]]
|
||||
|
||||
fmt.Println(gpy.LazyConvert(hans, nil))
|
||||
// [zhong guo ren]
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Related Projects
|
||||
|
||||
* [hotoo/pinyin](https://github.com/hotoo/pinyin): 汉语拼音转换工具 Node.js/JavaScript 版。
|
||||
* [mozillazg/python-pinyin](https://github.com/mozillazg/python-pinyin): 汉语拼音转换工具 Python 版。
|
||||
* [mozillazg/rust-pinyin](https://github.com/mozillazg/rust-pinyin): 汉语拼音转换工具 Rust 版。
|
||||
|
||||
|
||||
## License
|
||||
|
||||
Under the MIT License, base on [go-pinyin](https://github.com/mozillazg/go-pinyin).
|
29
vendor/github.com/go-ego/gpy/circle.yml
generated
vendored
Normal file
29
vendor/github.com/go-ego/gpy/circle.yml
generated
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
# circle.yml #
|
||||
# machine:
|
||||
# go:
|
||||
# version: 1.9.1
|
||||
|
||||
version: 2
|
||||
|
||||
jobs:
|
||||
build:
|
||||
docker:
|
||||
- image: govgo/go:1.10.3
|
||||
working_directory: /gopath/src/github.com/go-ego/gpy
|
||||
steps:
|
||||
- checkout
|
||||
# specify any bash command here prefixed with `run: `
|
||||
- run: go get -u github.com/mattn/go-isatty
|
||||
- run: go get -u github.com/axw/gocov/gocov
|
||||
- run: go get -u github.com/mattn/goveralls
|
||||
- run: go get -v -t -d ./...
|
||||
- run: go test -v ./...
|
||||
# codecov.io
|
||||
- run: go test -v -covermode=count -coverprofile=coverage.out
|
||||
- run: bash <(curl -s https://codecov.io/bash)
|
||||
|
||||
# test:
|
||||
# post:
|
||||
# - go test -v -covermode=count -coverprofile=coverage.out
|
||||
# - bash <(curl -s https://codecov.io/bash)
|
||||
|
55
vendor/github.com/go-ego/gpy/doc.go
generated
vendored
Normal file
55
vendor/github.com/go-ego/gpy/doc.go
generated
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
// Copyright 2017 The go-ego Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// https://github.com/go-ego/ego/blob/master/LICENSE
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
/*
|
||||
|
||||
Package gpy : Chinese Pinyin conversion tool; 汉语拼音转换工具.
|
||||
|
||||
Installation:
|
||||
go get -u github.com/go-ego/gpy
|
||||
|
||||
Usage :
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-ego/gpy"
|
||||
)
|
||||
|
||||
func main() {
|
||||
hans := "中国人"
|
||||
// 默认
|
||||
a := gpy.NewArgs()
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zhong] [guo] [ren]]
|
||||
|
||||
// 包含声调
|
||||
a.Style = gpy.Tone
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zhōng] [guó] [rén]]
|
||||
|
||||
// 声调用数字表示
|
||||
a.Style = gpy.Tone2
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zho1ng] [guo2] [re2n]]
|
||||
|
||||
// 开启多音字模式
|
||||
a = gpy.NewArgs()
|
||||
a.Heteronym = true
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zhong zhong] [guo] [ren]]
|
||||
a.Style = gpy.Tone2
|
||||
fmt.Println(gpy.Pinyin(hans, a))
|
||||
// [[zho1ng zho4ng] [guo2] [re2n]]
|
||||
}
|
||||
*/
|
||||
package gpy
|
33
vendor/github.com/go-ego/gpy/phonetic_symbol.go
generated
vendored
Normal file
33
vendor/github.com/go-ego/gpy/phonetic_symbol.go
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
package gpy
|
||||
|
||||
// 带音标字符。
|
||||
var phoneticSymbol = map[string]string{
|
||||
"ā": "a1",
|
||||
"á": "a2",
|
||||
"ǎ": "a3",
|
||||
"à": "a4",
|
||||
"ē": "e1",
|
||||
"é": "e2",
|
||||
"ě": "e3",
|
||||
"è": "e4",
|
||||
"ō": "o1",
|
||||
"ó": "o2",
|
||||
"ǒ": "o3",
|
||||
"ò": "o4",
|
||||
"ī": "i1",
|
||||
"í": "i2",
|
||||
"ǐ": "i3",
|
||||
"ì": "i4",
|
||||
"ū": "u1",
|
||||
"ú": "u2",
|
||||
"ǔ": "u3",
|
||||
"ù": "u4",
|
||||
"ü": "v",
|
||||
"ǘ": "v2",
|
||||
"ǚ": "v3",
|
||||
"ǜ": "v4",
|
||||
"ń": "n2",
|
||||
"ň": "n3",
|
||||
"ǹ": "n4",
|
||||
"ḿ": "m2",
|
||||
}
|
339
vendor/github.com/go-ego/gpy/pinyin.go
generated
vendored
Normal file
339
vendor/github.com/go-ego/gpy/pinyin.go
generated
vendored
Normal file
@ -0,0 +1,339 @@
|
||||
package gpy
|
||||
|
||||
import (
|
||||
// "fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// Meta
|
||||
const (
|
||||
version = "0.10.0.34"
|
||||
// License = "MIT"
|
||||
)
|
||||
|
||||
// GetVersion get version
|
||||
func GetVersion() string {
|
||||
return version
|
||||
}
|
||||
|
||||
// 拼音风格(推荐)
|
||||
const (
|
||||
Normal = 0 // 普通风格,不带声调(默认风格)。如: zhong guo
|
||||
Tone = 1 // 声调风格1,拼音声调在韵母第一个字母上。如: zhōng guó
|
||||
Tone2 = 2 // 声调风格2,即拼音声调在各个韵母之后,用数字 [1-4] 进行表示。如: zho1ng guo2
|
||||
Tone3 = 8 // 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示。如: zhong1 guo2
|
||||
Initials = 3 // 声母风格,只返回各个拼音的声母部分。如: zh g
|
||||
FirstLetter = 4 // 首字母风格,只返回拼音的首字母部分。如: z g
|
||||
Finals = 5 // 韵母风格,只返回各个拼音的韵母部分,不带声调。如: ong uo
|
||||
FinalsTone = 6 // 韵母风格1,带声调,声调在韵母第一个字母上。如: ōng uó
|
||||
FinalsTone2 = 7 // 韵母风格2,带声调,声调在各个韵母之后,用数字 [1-4] 进行表示。如: o1ng uo2
|
||||
FinalsTone3 = 9 // 韵母风格3,带声调,声调在各个拼音之后,用数字 [1-4] 进行表示。如: ong1 uo2
|
||||
)
|
||||
|
||||
// 拼音风格(兼容之前的版本)
|
||||
// const (
|
||||
// NORMAL = Normal
|
||||
// TONE = Tone
|
||||
// TONE2 = Tone2
|
||||
// INITIALS = Initials
|
||||
// FIRST_LETTER = FirstLetter
|
||||
// FINALS = Finals
|
||||
// FINALS_TONE = FinalsTone
|
||||
// FINALS_TONE2 = FinalsTone2
|
||||
// )
|
||||
|
||||
var (
|
||||
// 声母表
|
||||
initialArray = strings.Split(
|
||||
"b,p,m,f,d,t,n,l,g,k,h,j,q,x,r,zh,ch,sh,z,c,s",
|
||||
",",
|
||||
)
|
||||
|
||||
// 所有带声调的字符
|
||||
rePhoneticSymbolSource = func(m map[string]string) string {
|
||||
s := ""
|
||||
for k := range m {
|
||||
s = s + k
|
||||
}
|
||||
return s
|
||||
}(phoneticSymbol)
|
||||
)
|
||||
|
||||
var (
|
||||
// 匹配带声调字符的正则表达式
|
||||
rePhoneticSymbol = regexp.MustCompile("[" + rePhoneticSymbolSource + "]")
|
||||
|
||||
// 匹配使用数字标识声调的字符的正则表达式
|
||||
reTone2 = regexp.MustCompile("([aeoiuvnm])([1-4])$")
|
||||
|
||||
// 匹配 Tone2 中标识韵母声调的正则表达式
|
||||
reTone3 = regexp.MustCompile("^([a-z]+)([1-4])([a-z]*)$")
|
||||
)
|
||||
|
||||
// Args 配置信息
|
||||
type Args struct {
|
||||
Style int // 拼音风格(默认: Normal)
|
||||
Heteronym bool // 是否启用多音字模式(默认:禁用)
|
||||
Separator string // Slug 中使用的分隔符(默认:-)
|
||||
|
||||
// 处理没有拼音的字符(默认忽略没有拼音的字符)
|
||||
// 函数返回的 slice 的长度为0 则表示忽略这个字符
|
||||
Fallback func(r rune, a Args) []string
|
||||
}
|
||||
|
||||
var (
|
||||
// Style 默认配置:风格
|
||||
Style = Normal
|
||||
|
||||
// Heteronym 默认配置:是否启用多音字模式
|
||||
Heteronym = false
|
||||
|
||||
// Separator 默认配置: `Slug` 中 Join 所用的分隔符
|
||||
Separator = "-"
|
||||
|
||||
// Fallback 默认配置: 如何处理没有拼音的字符(忽略这个字符)
|
||||
Fallback = func(r rune, a Args) []string {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
finalExceptionsMap = map[string]string{
|
||||
"ū": "ǖ",
|
||||
"ú": "ǘ",
|
||||
"ǔ": "ǚ",
|
||||
"ù": "ǜ",
|
||||
}
|
||||
|
||||
reFinalExceptions = regexp.MustCompile("^(j|q|x)(ū|ú|ǔ|ù)$")
|
||||
reFinal2Exceptions = regexp.MustCompile("^(j|q|x)u(\\d?)$")
|
||||
)
|
||||
|
||||
// NewArgs 返回包含默认配置的 `Args`
|
||||
func NewArgs() Args {
|
||||
return Args{Style, Heteronym, Separator, Fallback}
|
||||
}
|
||||
|
||||
// 获取单个拼音中的声母
|
||||
func initial(p string) string {
|
||||
s := ""
|
||||
for _, v := range initialArray {
|
||||
if strings.HasPrefix(p, v) {
|
||||
s = v
|
||||
break
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// 获取单个拼音中的韵母
|
||||
func final(p string) string {
|
||||
n := initial(p)
|
||||
if n == "" {
|
||||
return handleYW(p)
|
||||
}
|
||||
|
||||
// 特例 j/q/x
|
||||
matches := reFinalExceptions.FindStringSubmatch(p)
|
||||
// jū -> jǖ
|
||||
if len(matches) == 3 && matches[1] != "" && matches[2] != "" {
|
||||
v, _ := finalExceptionsMap[matches[2]]
|
||||
return v
|
||||
}
|
||||
// ju -> jv, ju1 -> jv1
|
||||
p = reFinal2Exceptions.ReplaceAllString(p, "${1}v$2")
|
||||
return strings.Join(strings.SplitN(p, n, 2), "")
|
||||
}
|
||||
|
||||
// 处理 y, w
|
||||
func handleYW(p string) string {
|
||||
// 特例 y/w
|
||||
if strings.HasPrefix(p, "yu") {
|
||||
p = "v" + p[2:] // yu -> v
|
||||
} else if strings.HasPrefix(p, "yi") {
|
||||
p = p[1:] // yi -> i
|
||||
} else if strings.HasPrefix(p, "y") {
|
||||
p = "i" + p[1:] // y -> i
|
||||
} else if strings.HasPrefix(p, "wu") {
|
||||
p = p[1:] // wu -> u
|
||||
} else if strings.HasPrefix(p, "w") {
|
||||
p = "u" + p[1:] // w -> u
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func toFixed(p string, a Args) string {
|
||||
if a.Style == Initials {
|
||||
return initial(p)
|
||||
}
|
||||
origP := p
|
||||
|
||||
// 替换拼音中的带声调字符
|
||||
py := rePhoneticSymbol.ReplaceAllStringFunc(p, func(m string) string {
|
||||
symbol, _ := phoneticSymbol[m]
|
||||
switch a.Style {
|
||||
// 不包含声调
|
||||
case Normal, FirstLetter, Finals:
|
||||
// 去掉声调: a1 -> a
|
||||
m = reTone2.ReplaceAllString(symbol, "$1")
|
||||
case Tone2, FinalsTone2, Tone3, FinalsTone3:
|
||||
// 返回使用数字标识声调的字符
|
||||
m = symbol
|
||||
default:
|
||||
// 声调在头上
|
||||
}
|
||||
return m
|
||||
})
|
||||
|
||||
switch a.Style {
|
||||
// 将声调移动到最后
|
||||
case Tone3, FinalsTone3:
|
||||
py = reTone3.ReplaceAllString(py, "$1$3$2")
|
||||
}
|
||||
switch a.Style {
|
||||
// 首字母
|
||||
case FirstLetter:
|
||||
py = py[:1]
|
||||
// 韵母
|
||||
case Finals, FinalsTone, FinalsTone2, FinalsTone3:
|
||||
// 转换为 []rune unicode 编码用于获取第一个拼音字符
|
||||
// 因为 string 是 utf-8 编码不方便获取第一个拼音字符
|
||||
rs := []rune(origP)
|
||||
switch string(rs[0]) {
|
||||
// 因为鼻音没有声母所以不需要去掉声母部分
|
||||
case "ḿ", "ń", "ň", "ǹ":
|
||||
default:
|
||||
py = final(py)
|
||||
}
|
||||
}
|
||||
return py
|
||||
}
|
||||
|
||||
func applyStyle(p []string, a Args) []string {
|
||||
newP := []string{}
|
||||
for _, v := range p {
|
||||
newP = append(newP, toFixed(v, a))
|
||||
}
|
||||
return newP
|
||||
}
|
||||
|
||||
// SinglePinyin 把单个 `rune` 类型的汉字转换为拼音.
|
||||
func SinglePinyin(r rune, a Args) []string {
|
||||
if a.Fallback == nil {
|
||||
a.Fallback = Fallback
|
||||
}
|
||||
value, ok := PinyinDict[int(r)]
|
||||
pys := []string{}
|
||||
if ok {
|
||||
pys = strings.Split(value, ",")
|
||||
} else {
|
||||
pys = a.Fallback(r, a)
|
||||
}
|
||||
if len(pys) > 0 {
|
||||
if !a.Heteronym {
|
||||
pys = pys[:1]
|
||||
}
|
||||
|
||||
return applyStyle(pys, a)
|
||||
}
|
||||
return pys
|
||||
}
|
||||
|
||||
// IsChineseChar to determine whether the Chinese string
|
||||
// 判断是否为中文字符串
|
||||
func IsChineseChar(str string) bool {
|
||||
for _, r := range str {
|
||||
if unicode.Is(unicode.Scripts["Han"], r) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// HanPinyin 汉字转拼音,支持多音字模式.
|
||||
func HanPinyin(s string, a Args) [][]string {
|
||||
pys := [][]string{}
|
||||
for _, r := range s {
|
||||
py := SinglePinyin(r, a)
|
||||
if len(py) > 0 {
|
||||
pys = append(pys, py)
|
||||
}
|
||||
}
|
||||
return pys
|
||||
}
|
||||
|
||||
// Pinyin 汉字转拼音,支持多音字模式和拼音与英文等字母混合.
|
||||
func Pinyin(s string, a Args) [][]string {
|
||||
pys := [][]string{}
|
||||
for _, r := range s {
|
||||
if unicode.Is(unicode.Scripts["Han"], r) {
|
||||
// if r {
|
||||
// }
|
||||
py := SinglePinyin(r, a)
|
||||
if len(py) > 0 {
|
||||
pys = append(pys, py)
|
||||
}
|
||||
}
|
||||
// else {
|
||||
// py := strings.Split(s, " ")
|
||||
// fmt.Println(py)
|
||||
// }
|
||||
}
|
||||
|
||||
py := strings.Split(s, " ")
|
||||
for i := 0; i < len(py); i++ {
|
||||
var (
|
||||
pyarr []string
|
||||
cs int64
|
||||
)
|
||||
|
||||
for _, r := range py[i] {
|
||||
if unicode.Is(unicode.Scripts["Han"], r) {
|
||||
// continue
|
||||
cs++
|
||||
}
|
||||
}
|
||||
if cs == 0 {
|
||||
pyarr = append(pyarr, py[i])
|
||||
pys = append(pys, pyarr)
|
||||
}
|
||||
}
|
||||
|
||||
return pys
|
||||
}
|
||||
|
||||
// LazyPinyin 汉字转拼音,与 `Pinyin` 的区别是:
|
||||
// 返回值类型不同,并且不支持多音字模式,每个汉字只取第一个音.
|
||||
func LazyPinyin(s string, a Args) []string {
|
||||
a.Heteronym = false
|
||||
pys := []string{}
|
||||
for _, v := range Pinyin(s, a) {
|
||||
pys = append(pys, v[0])
|
||||
}
|
||||
return pys
|
||||
}
|
||||
|
||||
// Slug join `LazyPinyin` 的返回值.
|
||||
// 建议改用 https://github.com/mozillazg/go-slugify
|
||||
func Slug(s string, a Args) string {
|
||||
separator := a.Separator
|
||||
return strings.Join(LazyPinyin(s, a), separator)
|
||||
}
|
||||
|
||||
// Convert 跟 Pinyin 的唯一区别就是 a 参数可以是 nil
|
||||
func Convert(s string, a *Args) [][]string {
|
||||
if a == nil {
|
||||
args := NewArgs()
|
||||
a = &args
|
||||
}
|
||||
return Pinyin(s, *a)
|
||||
}
|
||||
|
||||
// LazyConvert 跟 LazyPinyin 的唯一区别就是 a 参数可以是 nil
|
||||
func LazyConvert(s string, a *Args) []string {
|
||||
if a == nil {
|
||||
args := NewArgs()
|
||||
a = &args
|
||||
}
|
||||
return LazyPinyin(s, *a)
|
||||
}
|
41451
vendor/github.com/go-ego/gpy/pinyin_dict.go
generated
vendored
Normal file
41451
vendor/github.com/go-ego/gpy/pinyin_dict.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
31
vendor/github.com/go-ego/gse/BUILD.bazel
generated
vendored
Normal file
31
vendor/github.com/go-ego/gse/BUILD.bazel
generated
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"dictionary.go",
|
||||
"seg.go",
|
||||
"segmenter.go",
|
||||
"test_utils.go",
|
||||
"token.go",
|
||||
"utils.go",
|
||||
],
|
||||
importmap = "go-common/vendor/github.com/go-ego/gse",
|
||||
importpath = "github.com/go-ego/gse",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["//vendor/github.com/go-ego/cedar:go_default_library"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
69
vendor/github.com/go-ego/gse/CONTRIBUTING.md
generated
vendored
Normal file
69
vendor/github.com/go-ego/gse/CONTRIBUTING.md
generated
vendored
Normal file
@ -0,0 +1,69 @@
|
||||
# Contribution Guidelines
|
||||
|
||||
## Introduction
|
||||
|
||||
This document explains how to contribute changes to the Ego project. It assumes you have followed the README.md and [API Document](https://github.com/go-ego/gse/blob/master/docs/doc.md). <!--Sensitive security-related issues should be reported to [security@Ego.io](mailto:security@Ego.io.)-->
|
||||
|
||||
## Bug reports
|
||||
|
||||
Please search the issues on the issue tracker with a variety of keywords to ensure your bug is not already reported.
|
||||
|
||||
If unique, [open an issue](https://github.com/go-ego/gse/issues/new) and answer the questions so we can understand and reproduce the problematic behavior.
|
||||
|
||||
The burden is on you to convince us that it is actually a bug in Ego. This is easiest to do when you write clear, concise instructions so we can reproduce the behavior (even if it seems obvious). The more detailed and specific you are, the faster we will be able to help you. Check out [How to Report Bugs Effectively](http://www.chiark.greenend.org.uk/~sgtatham/bugs.html).
|
||||
|
||||
Please be kind, remember that Ego comes at no cost to you, and you're getting free help.
|
||||
|
||||
## Discuss your design
|
||||
|
||||
The project welcomes submissions but please let everyone know what you're working on if you want to change or add something to the Ego repositories.
|
||||
|
||||
Before starting to write something new for the Ego project, please [file an issue](https://github.com/go-ego/gse/issues/new). Significant changes must go through the [change proposal process](https://github.com/go-ego/proposals) before they can be accepted.
|
||||
|
||||
This process gives everyone a chance to validate the design, helps prevent duplication of effort, and ensures that the idea fits inside the goals for the project and tools. It also checks that the design is sound before code is written; the code review tool is not the place for high-level discussions.
|
||||
|
||||
## Testing redux
|
||||
|
||||
Before sending code out for review, run all the tests for the whole tree to make sure the changes don't break other usage and keep the compatibility on upgrade. You must be test on Mac, Windows, Linux and other. You should install the CLI for Circle CI, as we are using the server for continous testing.
|
||||
|
||||
## Code review
|
||||
|
||||
In addition to the owner, Changes to Ego must be reviewed before they are accepted, no matter who makes the change even if it is a maintainer. We use GitHub's pull request workflow to do that and we also use [LGTM](http://lgtm.co) to ensure every PR is reviewed by vz or least 2 maintainers.
|
||||
|
||||
|
||||
## Sign your work
|
||||
|
||||
The sign-off is a simple line at the end of the explanation for the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch.
|
||||
|
||||
## Maintainers
|
||||
|
||||
To make sure every PR is checked, we got team maintainers. A maintainer should be a contributor of Ego and contributed at least 4 accepted PRs.
|
||||
|
||||
## Owners
|
||||
|
||||
Since Ego is a pure community organization without any company support, Copyright 2016 The go-ego Project Developers.
|
||||
|
||||
|
||||
## Versions
|
||||
|
||||
Ego has the `master` branch as a tip branch and has version branches such as `v0.30.0`. `v0.40.0` is a release branch and we will tag `v0.40.0` for binary download. If `v0.40.0` has bugs, we will accept pull requests on the `v0.40.0` branch and publish a `v0.40.1` tag, after bringing the bug fix also to the master branch.
|
||||
|
||||
Since the `master` branch is a tip version, if you wish to use Ego in production, please download the latest release tag version. All the branches will be protected via GitHub, all the PRs to every branch must be reviewed by two maintainers and must pass the automatic tests.
|
||||
|
||||
## Copyright
|
||||
|
||||
Code that you contribute should use the standard copyright header:
|
||||
|
||||
```
|
||||
// Copyright 2016 The go-ego Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// https://github.com/go-ego/gse/blob/master/LICENSE
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
```
|
||||
|
||||
Files in the repository contain copyright from the year they are added to the year they are last changed. If the copyright author is changed, just paste the header below the old one.
|
21
vendor/github.com/go-ego/gse/Gopkg.lock
generated
vendored
Normal file
21
vendor/github.com/go-ego/gse/Gopkg.lock
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-ego/cedar"
|
||||
packages = ["."]
|
||||
revision = "39a3301a49c052d18c55ade2bd080cda1ccc0446"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/vcaesar/tt"
|
||||
packages = ["."]
|
||||
revision = "54af4381b253a2380245310c25e1ff66dfc46f62"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "e7f95d7aa5572a5d63d68ac0ac960c325f9c29fab95419e0ec24688333fb8c6b"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
38
vendor/github.com/go-ego/gse/Gopkg.toml
generated
vendored
Normal file
38
vendor/github.com/go-ego/gse/Gopkg.toml
generated
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
# Gopkg.toml example
|
||||
#
|
||||
# Refer to https://golang.github.io/dep/docs/Gopkg.toml.html
|
||||
# for detailed Gopkg.toml documentation.
|
||||
#
|
||||
# required = ["github.com/user/thing/cmd/thing"]
|
||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project"
|
||||
# version = "1.0.0"
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project2"
|
||||
# branch = "dev"
|
||||
# source = "github.com/myfork/project2"
|
||||
#
|
||||
# [[override]]
|
||||
# name = "github.com/x/y"
|
||||
# version = "2.4.0"
|
||||
#
|
||||
# [prune]
|
||||
# non-go = false
|
||||
# go-tests = true
|
||||
# unused-packages = true
|
||||
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/go-ego/cedar"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/vcaesar/tt"
|
||||
|
||||
[prune]
|
||||
go-tests = true
|
||||
unused-packages = true
|
201
vendor/github.com/go-ego/gse/LICENSE
generated
vendored
Normal file
201
vendor/github.com/go-ego/gse/LICENSE
generated
vendored
Normal file
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
112
vendor/github.com/go-ego/gse/README.md
generated
vendored
Normal file
112
vendor/github.com/go-ego/gse/README.md
generated
vendored
Normal file
@ -0,0 +1,112 @@
|
||||
# gse
|
||||
|
||||
Go efficient text segmentation; support english, chinese, japanese and other.
|
||||
|
||||
<!--<img align="right" src="https://raw.githubusercontent.com/go-ego/ego/master/logo.jpg">-->
|
||||
<!--<a href="https://circleci.com/gh/go-ego/ego/tree/dev"><img src="https://img.shields.io/circleci/project/go-ego/ego/dev.svg" alt="Build Status"></a>-->
|
||||
[](https://circleci.com/gh/go-ego/gse)
|
||||
[](https://codecov.io/gh/go-ego/gse)
|
||||
[](https://travis-ci.org/go-ego/gse)
|
||||
[](https://goreportcard.com/report/github.com/go-ego/gse)
|
||||
[](https://godoc.org/github.com/go-ego/gse)
|
||||
[](https://github.com/go-ego/gse/releases/latest)
|
||||
[](https://gitter.im/go-ego/ego?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
<!--<a href="https://github.com/go-ego/ego/releases"><img src="https://img.shields.io/badge/%20version%20-%206.0.0%20-blue.svg?style=flat-square" alt="Releases"></a>-->
|
||||
|
||||
[简体中文](https://github.com/go-ego/gse/blob/master/README_zh.md)
|
||||
|
||||
<a href="https://github.com/go-ego/gse/blob/master/dictionary.go">Dictionary </a> with double array trie (Double-Array Trie) to achieve,
|
||||
<a href="https://github.com/go-ego/gse/blob/master/segmenter.go">Sender </a> algorithm is the shortest path based on word frequency plus dynamic programming.
|
||||
|
||||
Support common and search engine two participle mode, support user dictionary, POS tagging, run<a href="https://github.com/go-ego/gse/blob/master/server/server.go"> JSON RPC service</a>.
|
||||
|
||||
Text Segmentation speed<a href="https://github.com/go-ego/gse/blob/master/tools/benchmark.go"> single thread</a> 9MB/s,<a href="https://github.com/go-ego/gse/blob/master/tools/goroutines.go">goroutines concurrent</a> 42MB/s (8 nuclear Macbook Pro).
|
||||
|
||||
## Install / update
|
||||
|
||||
```
|
||||
go get -u github.com/go-ego/gse
|
||||
```
|
||||
|
||||
## [Build-tools](https://github.com/go-ego/re)
|
||||
```
|
||||
go get -u github.com/go-ego/re
|
||||
```
|
||||
### re gse
|
||||
To create a new gse application
|
||||
|
||||
```
|
||||
$ re gse my-gse
|
||||
```
|
||||
|
||||
### re run
|
||||
|
||||
To run the application we just created, you can navigate to the application folder and execute:
|
||||
```
|
||||
$ cd my-gse && re run
|
||||
```
|
||||
|
||||
|
||||
## Use
|
||||
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-ego/gse"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Load the dictionary
|
||||
var seg gse.Segmenter
|
||||
// Loading the default dictionary
|
||||
seg.LoadDict()
|
||||
// seg.LoadDict("your gopath"+"/src/github.com/go-ego/gse/data/dict/dictionary.txt")
|
||||
|
||||
// Text Segmentation
|
||||
text := []byte("你好世界, Hello world.")
|
||||
fmt.Println(segmenter.String(text, true))
|
||||
|
||||
segments := segmenter.Segment(text)
|
||||
|
||||
// Handle word segmentation results
|
||||
// Support for normal mode and search mode two participle,
|
||||
// see the comments in the code ToString function.
|
||||
// The search mode is mainly used to provide search engines
|
||||
// with as many keywords as possible
|
||||
fmt.Println(gse.ToString(segments, true))
|
||||
}
|
||||
```
|
||||
|
||||
[Look at an custom dictionary example](/examples/dict/main.go)
|
||||
|
||||
```Go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-ego/gse"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var seg gse.Segmenter
|
||||
seg.LoadDict("zh,testdata/test_dict.txt,testdata/test_dict1.txt")
|
||||
|
||||
text1 := []byte("你好世界, Hello world")
|
||||
|
||||
segments := seg.Segment(text1)
|
||||
fmt.Println(gse.ToString(segments))
|
||||
}
|
||||
```
|
||||
|
||||
[Look at an Chinese example](https://github.com/go-ego/gse/blob/master/examples/example.go)
|
||||
|
||||
[Look at an Japanese example](https://github.com/go-ego/gse/blob/master/examples/jp/main.go)
|
||||
|
||||
## License
|
||||
|
||||
Gse is primarily distributed under the terms of both the MIT license and the Apache License (Version 2.0), base on [sego](https://github.com/huichen/sego).
|
115
vendor/github.com/go-ego/gse/README_zh.md
generated
vendored
Normal file
115
vendor/github.com/go-ego/gse/README_zh.md
generated
vendored
Normal file
@ -0,0 +1,115 @@
|
||||
# [gse](https://github.com/go-ego/gse)
|
||||
|
||||
Go 语言高效分词, 支持英文、中文、日文等
|
||||
|
||||
<!--<img align="right" src="https://raw.githubusercontent.com/go-ego/ego/master/logo.jpg">-->
|
||||
<!--<a href="https://circleci.com/gh/go-ego/ego/tree/dev"><img src="https://img.shields.io/circleci/project/go-ego/ego/dev.svg" alt="Build Status"></a>-->
|
||||
[](https://circleci.com/gh/go-ego/gse)
|
||||
[](https://codecov.io/gh/go-ego/gse)
|
||||
[](https://travis-ci.org/go-ego/gse)
|
||||
[](https://goreportcard.com/report/github.com/go-ego/gse)
|
||||
[](https://godoc.org/github.com/go-ego/gse)
|
||||
[](https://github.com/go-ego/gse/releases/latest)
|
||||
[](https://gitter.im/go-ego/ego?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
<!--<a href="https://github.com/go-ego/ego/releases"><img src="https://img.shields.io/badge/%20version%20-%206.0.0%20-blue.svg?style=flat-square" alt="Releases"></a>-->
|
||||
|
||||
<a href="https://github.com/go-ego/gse/blob/master/dictionary.go">词典</a>用双数组 trie(Double-Array Trie)实现,
|
||||
<a href="https://github.com/go-ego/gse/blob/master/segmenter.go">分词器</a>算法为基于词频的最短路径加动态规划。
|
||||
|
||||
支持普通和搜索引擎两种分词模式,支持用户词典、词性标注,可运行<a href="https://github.com/go-ego/gse/blob/master/server/server.go"> JSON RPC 服务</a>。
|
||||
|
||||
分词速度<a href="https://github.com/go-ego/gse/blob/master/tools/benchmark.go">单线程</a> 9MB/s,<a href="https://github.com/go-ego/gse/blob/master/tools/goroutines.go">goroutines 并发</a> 42MB/s(8核 Macbook Pro)。
|
||||
|
||||
QQ 群: 120563750
|
||||
|
||||
## 安装/更新
|
||||
|
||||
```
|
||||
go get -u github.com/go-ego/gse
|
||||
```
|
||||
|
||||
## [Build-tools](https://github.com/go-ego/re)
|
||||
```
|
||||
go get -u github.com/go-ego/re
|
||||
```
|
||||
### re gse
|
||||
创建一个新的 gse 程序
|
||||
|
||||
```
|
||||
$ re gse my-gse
|
||||
```
|
||||
|
||||
### re run
|
||||
|
||||
运行我们刚刚创建的应用程序, CD 到程序文件夹并执行:
|
||||
```
|
||||
$ cd my-gse && re run
|
||||
```
|
||||
|
||||
|
||||
## 使用
|
||||
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-ego/gse"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// 载入词典
|
||||
var seg gse.Segmenter
|
||||
// 加载默认字典
|
||||
seg.LoadDict()
|
||||
// seg.LoadDict("your gopath"+"/src/github.com/go-ego/gse/data/dict/dictionary.txt")
|
||||
|
||||
// 分词
|
||||
text := []byte("中华人民共和国中央人民政府")
|
||||
fmt.Println(seg.String(text, true))
|
||||
|
||||
segments := seg.Segment(text)
|
||||
|
||||
// 处理分词结果
|
||||
// 支持普通模式和搜索模式两种分词,见代码中 ToString 函数的注释。
|
||||
// 搜索模式主要用于给搜索引擎提供尽可能多的关键字
|
||||
fmt.Println(gse.ToString(segments))
|
||||
|
||||
text1 := []byte("深圳地标建筑, 深圳地王大厦")
|
||||
segments1 := seg.Segment([]byte(text1))
|
||||
fmt.Println(gse.ToString(segments1, true))
|
||||
}
|
||||
```
|
||||
|
||||
[自定义词典分词示例](/examples/dict/main.go)
|
||||
|
||||
```Go
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-ego/gse"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var seg gse.Segmenter
|
||||
seg.LoadDict("zh,testdata/test_dict.txt,testdata/test_dict1.txt")
|
||||
|
||||
text1 := []byte("所以, 你好, 再见")
|
||||
|
||||
segments := seg.Segment(text1)
|
||||
fmt.Println(gse.ToString(segments))
|
||||
}
|
||||
```
|
||||
|
||||
[中文分词示例](/examples/example.go)
|
||||
|
||||
[日文分词示例](/examples/jp/main.go)
|
||||
|
||||
## License
|
||||
|
||||
Gse is primarily distributed under the terms of both the MIT license and the Apache License (Version 2.0), base on [sego](https://github.com/huichen/sego).
|
25
vendor/github.com/go-ego/gse/circle.yml
generated
vendored
Normal file
25
vendor/github.com/go-ego/gse/circle.yml
generated
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
# circle.yml #
|
||||
# machine:
|
||||
# go:
|
||||
# version: 1.9.1
|
||||
|
||||
version: 2
|
||||
|
||||
jobs:
|
||||
build:
|
||||
docker:
|
||||
# - image: golang:1.10.1
|
||||
- image: govgo/go:1.10.3
|
||||
working_directory: /gopath/src/github.com/go-ego/gse
|
||||
steps:
|
||||
- checkout
|
||||
# specify any bash command here prefixed with `run: `
|
||||
# - run: go get -u github.com/go-ego/cedar
|
||||
# - run: go get -u github.com/go-ego/gse
|
||||
- run: go get -v -t -d ./...
|
||||
- run: go test -v ./...
|
||||
# codecov.io
|
||||
- run: go test -v -covermode=count -coverprofile=coverage.out
|
||||
- run: bash <(curl -s https://codecov.io/bash)
|
||||
|
||||
|
89
vendor/github.com/go-ego/gse/dictionary.go
generated
vendored
Normal file
89
vendor/github.com/go-ego/gse/dictionary.go
generated
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package gse
|
||||
|
||||
import (
|
||||
"github.com/go-ego/cedar"
|
||||
)
|
||||
|
||||
// Dictionary 结构体实现了一个字串前缀树,
|
||||
// 一个分词可能出现在叶子节点也有可能出现在非叶节点
|
||||
type Dictionary struct {
|
||||
trie *cedar.Cedar // Cedar 前缀树
|
||||
maxTokenLen int // 词典中最长的分词
|
||||
tokens []Token // 词典中所有的分词,方便遍历
|
||||
totalFrequency int64 // 词典中所有分词的频率之和
|
||||
}
|
||||
|
||||
// NewDict new dictionary
|
||||
func NewDict() *Dictionary {
|
||||
return &Dictionary{trie: cedar.New()}
|
||||
}
|
||||
|
||||
// MaxTokenLen 词典中最长的分词
|
||||
func (dict *Dictionary) MaxTokenLen() int {
|
||||
return dict.maxTokenLen
|
||||
}
|
||||
|
||||
// NumTokens 词典中分词数目
|
||||
func (dict *Dictionary) NumTokens() int {
|
||||
return len(dict.tokens)
|
||||
}
|
||||
|
||||
// TotalFrequency 词典中所有分词的频率之和
|
||||
func (dict *Dictionary) TotalFrequency() int64 {
|
||||
return dict.totalFrequency
|
||||
}
|
||||
|
||||
// addToken 向词典中加入一个分词
|
||||
func (dict *Dictionary) addToken(token Token) {
|
||||
bytes := textSliceToBytes(token.text)
|
||||
_, err := dict.trie.Get(bytes)
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
dict.trie.Insert(bytes, dict.NumTokens())
|
||||
dict.tokens = append(dict.tokens, token)
|
||||
dict.totalFrequency += int64(token.frequency)
|
||||
if len(token.text) > dict.maxTokenLen {
|
||||
dict.maxTokenLen = len(token.text)
|
||||
}
|
||||
}
|
||||
|
||||
// lookupTokens 在词典中查找和字元组 words 可以前缀匹配的所有分词
|
||||
// 返回值为找到的分词数
|
||||
func (dict *Dictionary) lookupTokens(words []Text,
|
||||
tokens []*Token) (numOfTokens int) {
|
||||
var (
|
||||
id, value int
|
||||
err error
|
||||
)
|
||||
|
||||
for _, word := range words {
|
||||
id, err = dict.trie.Jump(word, id)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
value, err = dict.trie.Value(id)
|
||||
if err == nil {
|
||||
tokens[numOfTokens] = &dict.tokens[value]
|
||||
numOfTokens++
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
28
vendor/github.com/go-ego/gse/seg.go
generated
vendored
Normal file
28
vendor/github.com/go-ego/gse/seg.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
package gse
|
||||
|
||||
// Segment 文本中的一个分词
|
||||
type Segment struct {
|
||||
// 分词在文本中的起始字节位置
|
||||
start int
|
||||
|
||||
// 分词在文本中的结束字节位置(不包括该位置)
|
||||
end int
|
||||
|
||||
// 分词信息
|
||||
token *Token
|
||||
}
|
||||
|
||||
// Start 返回分词在文本中的起始字节位置
|
||||
func (s *Segment) Start() int {
|
||||
return s.start
|
||||
}
|
||||
|
||||
// End 返回分词在文本中的结束字节位置(不包括该位置)
|
||||
func (s *Segment) End() int {
|
||||
return s.end
|
||||
}
|
||||
|
||||
// Token 返回分词信息
|
||||
func (s *Segment) Token() *Token {
|
||||
return s.token
|
||||
}
|
530
vendor/github.com/go-ego/gse/segmenter.go
generated
vendored
Normal file
530
vendor/github.com/go-ego/gse/segmenter.go
generated
vendored
Normal file
@ -0,0 +1,530 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
/*
|
||||
|
||||
package gse Go efficient text segmentation, Go 语言分词
|
||||
*/
|
||||
|
||||
package gse
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"path"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
version string = "v0.10.0.106, Danube River!"
|
||||
|
||||
minTokenFrequency = 2 // 仅从字典文件中读取大于等于此频率的分词
|
||||
)
|
||||
|
||||
// GetVersion get the gse version
|
||||
func GetVersion() string {
|
||||
return version
|
||||
}
|
||||
|
||||
// Segmenter 分词器结构体
|
||||
type Segmenter struct {
|
||||
dict *Dictionary
|
||||
}
|
||||
|
||||
// jumper 该结构体用于记录 Viterbi 算法中某字元处的向前分词跳转信息
|
||||
type jumper struct {
|
||||
minDistance float32
|
||||
token *Token
|
||||
}
|
||||
|
||||
// Dictionary 返回分词器使用的词典
|
||||
func (seg *Segmenter) Dictionary() *Dictionary {
|
||||
return seg.dict
|
||||
}
|
||||
|
||||
// getCurrentFilePath get current file path
|
||||
func getCurrentFilePath() string {
|
||||
_, filePath, _, _ := runtime.Caller(1)
|
||||
return filePath
|
||||
}
|
||||
|
||||
// Read read the dict flie
|
||||
func (seg *Segmenter) Read(file string) error {
|
||||
log.Printf("Load the gse dictionary: \"%s\" ", file)
|
||||
dictFile, err := os.Open(file)
|
||||
if err != nil {
|
||||
log.Printf("Could not load dictionaries: \"%s\", %v \n", file, err)
|
||||
return err
|
||||
}
|
||||
defer dictFile.Close()
|
||||
|
||||
reader := bufio.NewReader(dictFile)
|
||||
var (
|
||||
text string
|
||||
freqText string
|
||||
frequency int
|
||||
pos string
|
||||
)
|
||||
|
||||
// 逐行读入分词
|
||||
line := 0
|
||||
for {
|
||||
line++
|
||||
size, fsErr := fmt.Fscanln(reader, &text, &freqText, &pos)
|
||||
if fsErr != nil {
|
||||
if fsErr == io.EOF {
|
||||
// End of file
|
||||
break
|
||||
}
|
||||
|
||||
if size > 0 {
|
||||
log.Printf("File '%v' line \"%v\" read error: %v, skip",
|
||||
file, line, fsErr.Error())
|
||||
} else {
|
||||
log.Printf("File '%v' line \"%v\" is empty, read error: %v, skip",
|
||||
file, line, fsErr.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if size == 0 {
|
||||
// 文件结束或错误行
|
||||
// break
|
||||
continue
|
||||
} else if size < 2 {
|
||||
// 无效行
|
||||
continue
|
||||
} else if size == 2 {
|
||||
// 没有词性标注时设为空字符串
|
||||
pos = ""
|
||||
}
|
||||
|
||||
// 解析词频
|
||||
var err error
|
||||
frequency, err = strconv.Atoi(freqText)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// 过滤频率太小的词
|
||||
if frequency < minTokenFrequency {
|
||||
continue
|
||||
}
|
||||
// 过滤, 降低词频
|
||||
if len([]rune(text)) < 2 {
|
||||
// continue
|
||||
frequency = 2
|
||||
}
|
||||
|
||||
// 将分词添加到字典中
|
||||
words := splitTextToWords([]byte(text))
|
||||
token := Token{text: words, frequency: frequency, pos: pos}
|
||||
seg.dict.addToken(token)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DictPaths get the dict's paths
|
||||
func DictPaths(dictDir, filePath string) (files []string) {
|
||||
var dictPath string
|
||||
|
||||
if filePath == "en" {
|
||||
return
|
||||
}
|
||||
|
||||
if filePath == "zh" {
|
||||
dictPath = path.Join(dictDir, "dict/dictionary.txt")
|
||||
files = []string{dictPath}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if filePath == "jp" {
|
||||
dictPath = path.Join(dictDir, "dict/jp/dict.txt")
|
||||
files = []string{dictPath}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// if strings.Contains(filePath, ",") {
|
||||
fileName := strings.Split(filePath, ",")
|
||||
for i := 0; i < len(fileName); i++ {
|
||||
if fileName[i] == "jp" {
|
||||
dictPath = path.Join(dictDir, "dict/jp/dict.txt")
|
||||
}
|
||||
|
||||
if fileName[i] == "zh" {
|
||||
dictPath = path.Join(dictDir, "dict/dictionary.txt")
|
||||
}
|
||||
|
||||
// if str[i] == "ti" {
|
||||
// }
|
||||
|
||||
dictName := fileName[i] != "en" && fileName[i] != "zh" &&
|
||||
fileName[i] != "jp" && fileName[i] != "ti"
|
||||
|
||||
if dictName {
|
||||
dictPath = fileName[i]
|
||||
}
|
||||
|
||||
if dictPath != "" {
|
||||
files = append(files, dictPath)
|
||||
}
|
||||
}
|
||||
// }
|
||||
log.Println("Dict files path: ", files)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// IsJp is jp char return true
|
||||
func IsJp(segText string) bool {
|
||||
for _, r := range segText {
|
||||
jp := unicode.Is(unicode.Scripts["Hiragana"], r) ||
|
||||
unicode.Is(unicode.Scripts["Katakana"], r)
|
||||
if jp {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// SegToken add segmenter token
|
||||
func (seg *Segmenter) SegToken() {
|
||||
// 计算每个分词的路径值,路径值含义见 Token 结构体的注释
|
||||
logTotalFrequency := float32(math.Log2(float64(seg.dict.totalFrequency)))
|
||||
for i := range seg.dict.tokens {
|
||||
token := &seg.dict.tokens[i]
|
||||
token.distance = logTotalFrequency - float32(math.Log2(float64(token.frequency)))
|
||||
}
|
||||
|
||||
// 对每个分词进行细致划分,用于搜索引擎模式,
|
||||
// 该模式用法见 Token 结构体的注释。
|
||||
for i := range seg.dict.tokens {
|
||||
token := &seg.dict.tokens[i]
|
||||
segments := seg.segmentWords(token.text, true)
|
||||
|
||||
// 计算需要添加的子分词数目
|
||||
numTokensToAdd := 0
|
||||
for iToken := 0; iToken < len(segments); iToken++ {
|
||||
// if len(segments[iToken].token.text) > 1 {
|
||||
// 略去字元长度为一的分词
|
||||
// TODO: 这值得进一步推敲,特别是当字典中有英文复合词的时候
|
||||
if len(segments[iToken].token.text) > 0 {
|
||||
hasJp := false
|
||||
if len(segments[iToken].token.text) == 1 {
|
||||
segText := string(segments[iToken].token.text[0])
|
||||
hasJp = IsJp(segText)
|
||||
}
|
||||
|
||||
if !hasJp {
|
||||
numTokensToAdd++
|
||||
}
|
||||
}
|
||||
}
|
||||
token.segments = make([]*Segment, numTokensToAdd)
|
||||
|
||||
// 添加子分词
|
||||
iSegmentsToAdd := 0
|
||||
for iToken := 0; iToken < len(segments); iToken++ {
|
||||
// if len(segments[iToken].token.text) > 1 {
|
||||
if len(segments[iToken].token.text) > 0 {
|
||||
hasJp := false
|
||||
if len(segments[iToken].token.text) == 1 {
|
||||
segText := string(segments[iToken].token.text[0])
|
||||
hasJp = IsJp(segText)
|
||||
}
|
||||
|
||||
if !hasJp {
|
||||
token.segments[iSegmentsToAdd] = &segments[iToken]
|
||||
iSegmentsToAdd++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// LoadDict load the dictionary from the file
|
||||
//
|
||||
// The format of the dictionary is (one for each participle):
|
||||
// participle text, frequency, part of speech
|
||||
//
|
||||
// Can load multiple dictionary files, the file name separated by ","
|
||||
// the front of the dictionary preferentially load the participle,
|
||||
// such as: "user_dictionary.txt,common_dictionary.txt"
|
||||
// When a participle appears both in the user dictionary and
|
||||
// in the `common dictionary`, the `user dictionary` is given priority.
|
||||
//
|
||||
// 从文件中载入词典
|
||||
//
|
||||
// 可以载入多个词典文件,文件名用 "," 分隔,排在前面的词典优先载入分词,比如:
|
||||
// "用户词典.txt,通用词典.txt"
|
||||
// 当一个分词既出现在用户词典也出现在 `通用词典` 中,则优先使用 `用户词典`。
|
||||
//
|
||||
// 词典的格式为(每个分词一行):
|
||||
// 分词文本 频率 词性
|
||||
func (seg *Segmenter) LoadDict(files ...string) error {
|
||||
seg.dict = NewDict()
|
||||
|
||||
var (
|
||||
dictDir = path.Join(path.Dir(getCurrentFilePath()), "data")
|
||||
dictPath string
|
||||
// load bool
|
||||
)
|
||||
|
||||
if len(files) > 0 {
|
||||
dictFiles := DictPaths(dictDir, files[0])
|
||||
if len(dictFiles) > 0 {
|
||||
// load = true
|
||||
// files = dictFiles
|
||||
for i := 0; i < len(dictFiles); i++ {
|
||||
err := seg.Read(dictFiles[i])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(files) == 0 {
|
||||
dictPath = path.Join(dictDir, "dict/dictionary.txt")
|
||||
// files = []string{dictPath}
|
||||
err := seg.Read(dictPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// if files[0] != "" && files[0] != "en" && !load {
|
||||
// for _, file := range strings.Split(files[0], ",") {
|
||||
// // for _, file := range files {
|
||||
// err := seg.Read(file)
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
seg.SegToken()
|
||||
log.Println("Gse dictionary loaded finished.")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Segment 对文本分词
|
||||
//
|
||||
// 输入参数:
|
||||
// bytes UTF8 文本的字节数组
|
||||
//
|
||||
// 输出:
|
||||
// []Segment 划分的分词
|
||||
func (seg *Segmenter) Segment(bytes []byte) []Segment {
|
||||
return seg.internalSegment(bytes, false)
|
||||
}
|
||||
|
||||
// ModeSegment segment using search mode if searchMode is true
|
||||
func (seg *Segmenter) ModeSegment(bytes []byte, searchMode ...bool) []Segment {
|
||||
var mode bool
|
||||
if len(searchMode) > 0 {
|
||||
mode = searchMode[0]
|
||||
}
|
||||
|
||||
return seg.internalSegment(bytes, mode)
|
||||
}
|
||||
|
||||
// Slice use modeSegment segment retrun []string
|
||||
// using search mode if searchMode is true
|
||||
func (seg *Segmenter) Slice(bytes []byte, searchMode ...bool) []string {
|
||||
segs := seg.ModeSegment(bytes, searchMode...)
|
||||
return ToSlice(segs, searchMode...)
|
||||
}
|
||||
|
||||
// Slice use modeSegment segment retrun string
|
||||
// using search mode if searchMode is true
|
||||
func (seg *Segmenter) String(bytes []byte, searchMode ...bool) string {
|
||||
segs := seg.ModeSegment(bytes, searchMode...)
|
||||
return ToString(segs, searchMode...)
|
||||
}
|
||||
|
||||
func (seg *Segmenter) internalSegment(bytes []byte, searchMode bool) []Segment {
|
||||
// 处理特殊情况
|
||||
if len(bytes) == 0 {
|
||||
// return []Segment{}
|
||||
return nil
|
||||
}
|
||||
|
||||
// 划分字元
|
||||
text := splitTextToWords(bytes)
|
||||
|
||||
return seg.segmentWords(text, searchMode)
|
||||
}
|
||||
|
||||
func (seg *Segmenter) segmentWords(text []Text, searchMode bool) []Segment {
|
||||
// 搜索模式下该分词已无继续划分可能的情况
|
||||
if searchMode && len(text) == 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// jumpers 定义了每个字元处的向前跳转信息,
|
||||
// 包括这个跳转对应的分词,
|
||||
// 以及从文本段开始到该字元的最短路径值
|
||||
jumpers := make([]jumper, len(text))
|
||||
|
||||
if seg.dict == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
tokens := make([]*Token, seg.dict.maxTokenLen)
|
||||
for current := 0; current < len(text); current++ {
|
||||
// 找到前一个字元处的最短路径,以便计算后续路径值
|
||||
var baseDistance float32
|
||||
if current == 0 {
|
||||
// 当本字元在文本首部时,基础距离应该是零
|
||||
baseDistance = 0
|
||||
} else {
|
||||
baseDistance = jumpers[current-1].minDistance
|
||||
}
|
||||
|
||||
// 寻找所有以当前字元开头的分词
|
||||
numTokens := seg.dict.lookupTokens(
|
||||
text[current:minInt(current+seg.dict.maxTokenLen, len(text))], tokens)
|
||||
|
||||
// 对所有可能的分词,更新分词结束字元处的跳转信息
|
||||
for iToken := 0; iToken < numTokens; iToken++ {
|
||||
location := current + len(tokens[iToken].text) - 1
|
||||
if !searchMode || current != 0 || location != len(text)-1 {
|
||||
updateJumper(&jumpers[location], baseDistance, tokens[iToken])
|
||||
}
|
||||
}
|
||||
|
||||
// 当前字元没有对应分词时补加一个伪分词
|
||||
if numTokens == 0 || len(tokens[0].text) > 1 {
|
||||
updateJumper(&jumpers[current], baseDistance,
|
||||
&Token{text: []Text{text[current]}, frequency: 1, distance: 32, pos: "x"})
|
||||
}
|
||||
}
|
||||
|
||||
// 从后向前扫描第一遍得到需要添加的分词数目
|
||||
numSeg := 0
|
||||
for index := len(text) - 1; index >= 0; {
|
||||
location := index - len(jumpers[index].token.text) + 1
|
||||
numSeg++
|
||||
index = location - 1
|
||||
}
|
||||
|
||||
// 从后向前扫描第二遍添加分词到最终结果
|
||||
outputSegments := make([]Segment, numSeg)
|
||||
for index := len(text) - 1; index >= 0; {
|
||||
location := index - len(jumpers[index].token.text) + 1
|
||||
numSeg--
|
||||
outputSegments[numSeg].token = jumpers[index].token
|
||||
index = location - 1
|
||||
}
|
||||
|
||||
// 计算各个分词的字节位置
|
||||
bytePosition := 0
|
||||
for iSeg := 0; iSeg < len(outputSegments); iSeg++ {
|
||||
outputSegments[iSeg].start = bytePosition
|
||||
bytePosition += textSliceByteLen(outputSegments[iSeg].token.text)
|
||||
outputSegments[iSeg].end = bytePosition
|
||||
}
|
||||
return outputSegments
|
||||
}
|
||||
|
||||
// updateJumper 更新跳转信息:
|
||||
// 1. 当该位置从未被访问过时 (jumper.minDistance 为零的情况),或者
|
||||
// 2. 当该位置的当前最短路径大于新的最短路径时
|
||||
// 将当前位置的最短路径值更新为 baseDistance 加上新分词的概率
|
||||
func updateJumper(jumper *jumper, baseDistance float32, token *Token) {
|
||||
newDistance := baseDistance + token.distance
|
||||
if jumper.minDistance == 0 || jumper.minDistance > newDistance {
|
||||
jumper.minDistance = newDistance
|
||||
jumper.token = token
|
||||
}
|
||||
}
|
||||
|
||||
// minInt 取两整数较小值
|
||||
func minInt(a, b int) int {
|
||||
if a > b {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// maxInt 取两整数较大值
|
||||
func maxInt(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// splitTextToWords 将文本划分成字元
|
||||
func splitTextToWords(text Text) []Text {
|
||||
output := make([]Text, 0, len(text)/3)
|
||||
current := 0
|
||||
inAlphanumeric := true
|
||||
alphanumericStart := 0
|
||||
for current < len(text) {
|
||||
r, size := utf8.DecodeRune(text[current:])
|
||||
if size <= 2 && (unicode.IsLetter(r) || unicode.IsNumber(r)) {
|
||||
// 当前是拉丁字母或数字(非中日韩文字)
|
||||
if !inAlphanumeric {
|
||||
alphanumericStart = current
|
||||
inAlphanumeric = true
|
||||
}
|
||||
} else {
|
||||
if inAlphanumeric {
|
||||
inAlphanumeric = false
|
||||
if current != 0 {
|
||||
output = append(output, toLower(text[alphanumericStart:current]))
|
||||
}
|
||||
}
|
||||
output = append(output, text[current:current+size])
|
||||
}
|
||||
current += size
|
||||
}
|
||||
|
||||
// 处理最后一个字元是英文的情况
|
||||
if inAlphanumeric {
|
||||
if current != 0 {
|
||||
output = append(output, toLower(text[alphanumericStart:current]))
|
||||
}
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
// toLower 将英文词转化为小写
|
||||
func toLower(text []byte) []byte {
|
||||
output := make([]byte, len(text))
|
||||
for i, t := range text {
|
||||
if t >= 'A' && t <= 'Z' {
|
||||
output[i] = t - 'A' + 'a'
|
||||
} else {
|
||||
output[i] = t
|
||||
}
|
||||
}
|
||||
return output
|
||||
}
|
38
vendor/github.com/go-ego/gse/test_utils.go
generated
vendored
Normal file
38
vendor/github.com/go-ego/gse/test_utils.go
generated
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
package gse
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func printTokens(tokens []*Token, numTokens int) (output string) {
|
||||
for iToken := 0; iToken < numTokens; iToken++ {
|
||||
for _, word := range tokens[iToken].text {
|
||||
output += fmt.Sprint(string(word))
|
||||
}
|
||||
output += " "
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func toWords(strings ...string) []Text {
|
||||
words := []Text{}
|
||||
for _, s := range strings {
|
||||
words = append(words, []byte(s))
|
||||
}
|
||||
return words
|
||||
}
|
||||
|
||||
func bytesToString(bytes []Text) (output string) {
|
||||
for _, b := range bytes {
|
||||
output += (string(b) + "/")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func expect(t *testing.T, expect string, actual interface{}) {
|
||||
actualString := fmt.Sprint(actual)
|
||||
if expect != actualString {
|
||||
t.Errorf("期待值=\"%s\", 实际=\"%s\"", expect, actualString)
|
||||
}
|
||||
}
|
75
vendor/github.com/go-ego/gse/token.go
generated
vendored
Normal file
75
vendor/github.com/go-ego/gse/token.go
generated
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
package gse
|
||||
|
||||
// Text 字串类型,可以用来表达
|
||||
// 1. 一个字元,比如 "中" 又如 "国", 英文的一个字元是一个词
|
||||
// 2. 一个分词,比如 "中国" 又如 "人口"
|
||||
// 3. 一段文字,比如 "中国有十三亿人口"
|
||||
type Text []byte
|
||||
|
||||
// Token 一个分词
|
||||
type Token struct {
|
||||
// 分词的字串,这实际上是个字元数组
|
||||
text []Text
|
||||
|
||||
// 分词在语料库中的词频
|
||||
frequency int
|
||||
|
||||
// log2(总词频/该分词词频),这相当于 log2(1/p(分词)),用作动态规划中
|
||||
// 该分词的路径长度。求解 prod(p(分词)) 的最大值相当于求解
|
||||
// sum(distance(分词)) 的最小值,这就是“最短路径”的来历。
|
||||
distance float32
|
||||
|
||||
// 词性标注
|
||||
pos string
|
||||
|
||||
// 该分词文本的进一步分词划分,见 Segments 函数注释。
|
||||
segments []*Segment
|
||||
}
|
||||
|
||||
// Text 返回分词文本
|
||||
func (token *Token) Text() string {
|
||||
return textSliceToString(token.text)
|
||||
}
|
||||
|
||||
// Frequency 返回分词在语料库中的词频
|
||||
func (token *Token) Frequency() int {
|
||||
return token.frequency
|
||||
}
|
||||
|
||||
// Pos 返回分词词性标注
|
||||
func (token *Token) Pos() string {
|
||||
return token.pos
|
||||
}
|
||||
|
||||
// Segments 该分词文本的进一步分词划分,比如 "中华人民共和国中央人民政府" 这个分词
|
||||
// 有两个子分词 "中华人民共和国 " 和 "中央人民政府"。子分词也可以进一步有子分词
|
||||
// 形成一个树结构,遍历这个树就可以得到该分词的所有细致分词划分,这主要
|
||||
// 用于搜索引擎对一段文本进行全文搜索。
|
||||
func (token *Token) Segments() []*Segment {
|
||||
return token.segments
|
||||
}
|
||||
|
||||
// Equals compare str split tokens
|
||||
func (token *Token) Equals(str string) bool {
|
||||
tokenLen := 0
|
||||
for _, t := range token.text {
|
||||
tokenLen += len(t)
|
||||
}
|
||||
if tokenLen != len(str) {
|
||||
return false
|
||||
}
|
||||
|
||||
bytStr := []byte(str)
|
||||
index := 0
|
||||
for i := 0; i < len(token.text); i++ {
|
||||
textArray := []byte(token.text[i])
|
||||
for j := 0; j < len(textArray); j++ {
|
||||
if textArray[j] != bytStr[index] {
|
||||
index = index + 1
|
||||
return false
|
||||
}
|
||||
index = index + 1
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
168
vendor/github.com/go-ego/gse/utils.go
generated
vendored
Normal file
168
vendor/github.com/go-ego/gse/utils.go
generated
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
package gse
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// ToString segments to string 输出分词结果为字符串
|
||||
//
|
||||
// 有两种输出模式,以 "中华人民共和国" 为例
|
||||
//
|
||||
// 普通模式(searchMode=false)输出一个分词 "中华人民共和国/ns "
|
||||
// 搜索模式(searchMode=true) 输出普通模式的再细致切分:
|
||||
// "中华/nz 人民/n 共和/nz 国/n 共和国/ns 人民共和国/nt 中华人民共和国/ns "
|
||||
//
|
||||
// 默认 searchMode=false
|
||||
// 搜索模式主要用于给搜索引擎提供尽可能多的关键字,详情请见 Token 结构体的注释。
|
||||
func ToString(segs []Segment, searchMode ...bool) (output string) {
|
||||
var mode bool
|
||||
if len(searchMode) > 0 {
|
||||
mode = searchMode[0]
|
||||
}
|
||||
|
||||
if mode {
|
||||
for _, seg := range segs {
|
||||
output += tokenToString(seg.token)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
for _, seg := range segs {
|
||||
output += fmt.Sprintf("%s/%s ",
|
||||
textSliceToString(seg.token.text), seg.token.pos)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func tokenToBytes(token *Token) (output []byte) {
|
||||
for _, s := range token.segments {
|
||||
output = append(output, tokenToBytes(s.token)...)
|
||||
}
|
||||
output = append(output, []byte(fmt.Sprintf("%s/%s ",
|
||||
textSliceToString(token.text), token.pos))...)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func tokenToString(token *Token) (output string) {
|
||||
hasOnlyTerminalToken := true
|
||||
for _, s := range token.segments {
|
||||
if len(s.token.segments) > 1 || IsJp(string(s.token.text[0])) {
|
||||
hasOnlyTerminalToken = false
|
||||
}
|
||||
|
||||
if !hasOnlyTerminalToken {
|
||||
if s != nil {
|
||||
output += tokenToString(s.token)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output += fmt.Sprintf("%s/%s ", textSliceToString(token.text), token.pos)
|
||||
return
|
||||
}
|
||||
|
||||
// ToSlice segments to slice 输出分词结果到一个字符串 slice
|
||||
//
|
||||
// 有两种输出模式,以 "中华人民共和国" 为例
|
||||
//
|
||||
// 普通模式(searchMode=false)输出一个分词"[中华人民共和国]"
|
||||
// 搜索模式(searchMode=true) 输出普通模式的再细致切分:
|
||||
// "[中华 人民 共和 国 共和国 人民共和国 中华人民共和国]"
|
||||
//
|
||||
// 默认 searchMode=false
|
||||
// 搜索模式主要用于给搜索引擎提供尽可能多的关键字,详情请见Token结构体的注释。
|
||||
func ToSlice(segs []Segment, searchMode ...bool) (output []string) {
|
||||
var mode bool
|
||||
if len(searchMode) > 0 {
|
||||
mode = searchMode[0]
|
||||
}
|
||||
|
||||
if mode {
|
||||
for _, seg := range segs {
|
||||
output = append(output, tokenToSlice(seg.token)...)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
for _, seg := range segs {
|
||||
output = append(output, seg.token.Text())
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func tokenToSlice(token *Token) (output []string) {
|
||||
hasOnlyTerminalToken := true
|
||||
for _, s := range token.segments {
|
||||
if len(s.token.segments) > 1 || IsJp(string(s.token.text[0])) {
|
||||
hasOnlyTerminalToken = false
|
||||
}
|
||||
|
||||
if !hasOnlyTerminalToken {
|
||||
output = append(output, tokenToSlice(s.token)...)
|
||||
}
|
||||
}
|
||||
|
||||
output = append(output, textSliceToString(token.text))
|
||||
return output
|
||||
}
|
||||
|
||||
// 将多个字元拼接一个字符串输出
|
||||
func textToString(text []Text) string {
|
||||
var output string
|
||||
for _, word := range text {
|
||||
output += string(word)
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
// 将多个字元拼接一个字符串输出
|
||||
func textSliceToString(text []Text) string {
|
||||
return Join(text)
|
||||
}
|
||||
|
||||
// 返回多个字元的字节总长度
|
||||
func textSliceByteLen(text []Text) (length int) {
|
||||
for _, word := range text {
|
||||
length += len(word)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func textSliceToBytes(text []Text) []byte {
|
||||
var buf bytes.Buffer
|
||||
for _, word := range text {
|
||||
buf.Write(word)
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// Join is better string splicing
|
||||
func Join(text []Text) string {
|
||||
switch len(text) {
|
||||
case 0:
|
||||
return ""
|
||||
case 1:
|
||||
return string(text[0])
|
||||
case 2:
|
||||
// Special case for common small values.
|
||||
// Remove if github.com/golang/go/issues/6714 is fixed
|
||||
return string(text[0]) + string(text[1])
|
||||
case 3:
|
||||
// Special case for common small values.
|
||||
// Remove if #6714 is fixed
|
||||
return string(text[0]) + string(text[1]) + string(text[2])
|
||||
}
|
||||
n := 0
|
||||
for i := 0; i < len(text); i++ {
|
||||
n += len(text[i])
|
||||
}
|
||||
|
||||
b := make([]byte, n)
|
||||
bp := copy(b, text[0])
|
||||
for _, str := range text[1:] {
|
||||
bp += copy(b[bp:], str)
|
||||
}
|
||||
return string(b)
|
||||
}
|
23
vendor/github.com/go-ego/murmur/BUILD.bazel
generated
vendored
Normal file
23
vendor/github.com/go-ego/murmur/BUILD.bazel
generated
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["murmur.go"],
|
||||
importmap = "go-common/vendor/github.com/go-ego/murmur",
|
||||
importpath = "github.com/go-ego/murmur",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
201
vendor/github.com/go-ego/murmur/LICENSE
generated
vendored
Normal file
201
vendor/github.com/go-ego/murmur/LICENSE
generated
vendored
Normal file
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
40
vendor/github.com/go-ego/murmur/README.md
generated
vendored
Normal file
40
vendor/github.com/go-ego/murmur/README.md
generated
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
murmur
|
||||
======
|
||||
[](https://circleci.com/gh/go-ego/murmur)
|
||||
[](https://codecov.io/gh/go-ego/murmur)
|
||||
[](https://travis-ci.org/go-ego/murmur)
|
||||
[](https://goreportcard.com/report/github.com/go-ego/murmur)
|
||||
[](https://godoc.org/github.com/go-ego/murmur)
|
||||
[](https://github.com/go-ego/murmur/releases/latest)
|
||||
[](https://gitter.im/go-ego/ego?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
Go Murmur3 hash implementation
|
||||
|
||||
Based on [MurmurHash](http://en.wikipedia.org/wiki/MurmurHash), [murmur](https://github.com/huichen/murmur).
|
||||
|
||||
## Installing
|
||||
```Go
|
||||
go get -u github.com/go-ego/murmur
|
||||
```
|
||||
|
||||
## Use
|
||||
|
||||
```Go
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/go-ego/murmur"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var str = "github.com"
|
||||
|
||||
hash32 := murmur.Murmur3([]byte(str))
|
||||
log.Println("hash32...", hash32)
|
||||
|
||||
sum32 := murmur.Sum32(str)
|
||||
log.Println("hash32...", sum32)
|
||||
}
|
||||
```
|
22
vendor/github.com/go-ego/murmur/circle.yml
generated
vendored
Normal file
22
vendor/github.com/go-ego/murmur/circle.yml
generated
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
# circle.yml #
|
||||
# machine:
|
||||
# go:
|
||||
# version: 1.9.1
|
||||
|
||||
version: 2
|
||||
|
||||
jobs:
|
||||
build:
|
||||
docker:
|
||||
- image: govgo/go:1.10.3
|
||||
working_directory: /gopath/src/github.com/go-ego/murmur
|
||||
steps:
|
||||
- checkout
|
||||
# specify any bash command here prefixed with `run: `
|
||||
- run: go get -v -t -d ./...
|
||||
- run: go test -v ./...
|
||||
# codecov.io
|
||||
- run: go test -v -covermode=count -coverprofile=coverage.out
|
||||
- run: bash <(curl -s https://codecov.io/bash)
|
||||
|
||||
|
91
vendor/github.com/go-ego/murmur/murmur.go
generated
vendored
Normal file
91
vendor/github.com/go-ego/murmur/murmur.go
generated
vendored
Normal file
@ -0,0 +1,91 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
/* Package murmur Murmur3 32bit hash function based on
|
||||
http://en.wikipedia.org/wiki/MurmurHash
|
||||
*/
|
||||
|
||||
package murmur
|
||||
|
||||
const (
|
||||
c1 = 0xcc9e2d51
|
||||
c2 = 0x1b873593
|
||||
c3 = 0x85ebca6b
|
||||
c4 = 0xc2b2ae35
|
||||
r1 = 15
|
||||
r2 = 13
|
||||
m = 5
|
||||
n = 0xe6546b64
|
||||
)
|
||||
|
||||
var (
|
||||
// defaultSeed default murmur seed
|
||||
defaultSeed = uint32(1)
|
||||
)
|
||||
|
||||
// Sum32 returns a hash from the provided key.
|
||||
func Sum32(key string, seed ...uint32) (hash uint32) {
|
||||
if len(seed) > 0 {
|
||||
return Murmur3([]byte(key), seed[0])
|
||||
}
|
||||
|
||||
return Murmur3([]byte(key))
|
||||
}
|
||||
|
||||
// Murmur3 murmur []byte Hash32
|
||||
func Murmur3(key []byte, seed ...uint32) (hash uint32) {
|
||||
hash = defaultSeed
|
||||
if len(seed) > 0 {
|
||||
hash = seed[0]
|
||||
}
|
||||
|
||||
iByte := 0
|
||||
for ; iByte+4 <= len(key); iByte += 4 {
|
||||
k := uint32(key[iByte]) | uint32(key[iByte+1])<<8 |
|
||||
uint32(key[iByte+2])<<16 | uint32(key[iByte+3])<<24
|
||||
|
||||
k *= c1
|
||||
k = (k << r1) | (k >> (32 - r1))
|
||||
k *= c2
|
||||
hash ^= k
|
||||
hash = (hash << r2) | (hash >> (32 - r2))
|
||||
hash = hash*m + n
|
||||
}
|
||||
|
||||
var remainingBytes uint32
|
||||
switch len(key) - iByte {
|
||||
case 3:
|
||||
remainingBytes += uint32(key[iByte+2]) << 16
|
||||
fallthrough
|
||||
case 2:
|
||||
remainingBytes += uint32(key[iByte+1]) << 8
|
||||
fallthrough
|
||||
case 1:
|
||||
remainingBytes += uint32(key[iByte])
|
||||
remainingBytes *= c1
|
||||
remainingBytes = (remainingBytes << r1) | (remainingBytes >> (32 - r1))
|
||||
remainingBytes = remainingBytes * c2
|
||||
hash ^= remainingBytes
|
||||
}
|
||||
|
||||
hash ^= uint32(len(key))
|
||||
hash ^= hash >> 16
|
||||
hash *= c3
|
||||
hash ^= hash >> 13
|
||||
hash *= c4
|
||||
hash ^= hash >> 16
|
||||
|
||||
return
|
||||
}
|
55
vendor/github.com/go-ego/riot/BUILD.bazel
generated
vendored
Normal file
55
vendor/github.com/go-ego/riot/BUILD.bazel
generated
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"counters.go",
|
||||
"engine.go",
|
||||
"indexer_worker.go",
|
||||
"info.go",
|
||||
"ranker_worker.go",
|
||||
"riot.go",
|
||||
"riot_pkg.go",
|
||||
"segment.go",
|
||||
"stop_tokens.go",
|
||||
"store_worker.go",
|
||||
],
|
||||
importmap = "go-common/vendor/github.com/go-ego/riot",
|
||||
importpath = "github.com/go-ego/riot",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//vendor/github.com/coreos/bbolt:go_default_library",
|
||||
"//vendor/github.com/dgraph-io/badger:go_default_library",
|
||||
"//vendor/github.com/go-ego/gpy:go_default_library",
|
||||
"//vendor/github.com/go-ego/gse:go_default_library",
|
||||
"//vendor/github.com/go-ego/murmur:go_default_library",
|
||||
"//vendor/github.com/go-ego/riot/core:go_default_library",
|
||||
"//vendor/github.com/go-ego/riot/store:go_default_library",
|
||||
"//vendor/github.com/go-ego/riot/types:go_default_library",
|
||||
"//vendor/github.com/go-ego/riot/utils:go_default_library",
|
||||
"//vendor/github.com/go-vgo/gt/conf:go_default_library",
|
||||
"//vendor/github.com/go-vgo/gt/info:go_default_library",
|
||||
"//vendor/github.com/shirou/gopsutil/mem:go_default_library",
|
||||
"//vendor/github.com/syndtr/goleveldb/leveldb:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [
|
||||
":package-srcs",
|
||||
"//vendor/github.com/go-ego/riot/core:all-srcs",
|
||||
"//vendor/github.com/go-ego/riot/store:all-srcs",
|
||||
"//vendor/github.com/go-ego/riot/types:all-srcs",
|
||||
"//vendor/github.com/go-ego/riot/utils:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
73
vendor/github.com/go-ego/riot/CONTRIBUTING.md
generated
vendored
Normal file
73
vendor/github.com/go-ego/riot/CONTRIBUTING.md
generated
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
# Contribution Guidelines
|
||||
|
||||
## Introduction
|
||||
|
||||
This document explains how to contribute changes to the Ego project. It assumes you have followed the README.md and [API Document](https://github.com/go-ego/riot/tree/master/docs). <!--Sensitive security-related issues should be reported to [security@Ego.io](mailto:security@Ego.io.)-->
|
||||
|
||||
## Bug reports
|
||||
|
||||
Please search the issues on the issue tracker with a variety of keywords to ensure your bug is not already reported.
|
||||
|
||||
If unique, [open an issue](https://github.com/go-ego/riot/issues/new) and answer the questions so we can understand and reproduce the problematic behavior.
|
||||
|
||||
The burden is on you to convince us that it is actually a bug in Ego. This is easiest to do when you write clear, concise instructions so we can reproduce the behavior (even if it seems obvious). The more detailed and specific you are, the faster we will be able to help you. Check out [How to Report Bugs Effectively](http://www.chiark.greenend.org.uk/~sgtatham/bugs.html).
|
||||
|
||||
Please be kind, remember that Ego comes at no cost to you, and you're getting free help.
|
||||
|
||||
## Discuss your design
|
||||
|
||||
The project welcomes submissions but please let everyone know what you're working on if you want to change or add something to the Ego repositories.
|
||||
|
||||
Before starting to write something new for the Ego project, please [file an issue](https://github.com/go-ego/riot/issues/new). Significant changes must go through the [change proposal process](https://github.com/go-ego/proposals) before they can be accepted.
|
||||
|
||||
This process gives everyone a chance to validate the design, helps prevent duplication of effort, and ensures that the idea fits inside the goals for the project and tools. It also checks that the design is sound before code is written; the code review tool is not the place for high-level discussions.
|
||||
|
||||
## Testing redux
|
||||
|
||||
Before sending code out for review, run all the tests for the whole tree to make sure the changes don't break other usage and keep the compatibility on upgrade. You must be test on Mac, Windows, Linux and other. You should install the CLI for Circle CI, as we are using the server for continous testing.
|
||||
|
||||
## Code review
|
||||
|
||||
In addition to the owner, Changes to Ego must be reviewed before they are accepted, no matter who makes the change even if it is a maintainer. We use GitHub's pull request workflow to do that and we also use [LGTM](http://lgtm.co) to ensure every PR is reviewed by vz or least 2 maintainers.
|
||||
|
||||
|
||||
## Sign your work
|
||||
|
||||
The sign-off is a simple line at the end of the explanation for the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch.
|
||||
|
||||
## Maintainers
|
||||
|
||||
To make sure every PR is checked, we got team maintainers. A maintainer should be a contributor of Ego and contributed at least 4 accepted PRs.
|
||||
|
||||
## Owners
|
||||
|
||||
Since Ego is a pure community organization without any company support, Copyright 2016 The go-ego Project Developers.
|
||||
|
||||
|
||||
## Versions
|
||||
|
||||
Ego has the `master` branch as a tip branch and has version branches such as `v0.30.0`. `v0.40.0` is a release branch and we will tag `v0.40.0` for binary download. If `v0.40.0` has bugs, we will accept pull requests on the `v0.40.0` branch and publish a `v0.40.1` tag, after bringing the bug fix also to the master branch.
|
||||
|
||||
Since the `master` branch is a tip version, if you wish to use Ego in production, please download the latest release tag version. All the branches will be protected via GitHub, all the PRs to every branch must be reviewed by two maintainers and must pass the automatic tests.
|
||||
|
||||
## Copyright
|
||||
|
||||
Code that you contribute should use the standard copyright header:
|
||||
|
||||
```
|
||||
// Copyright 2016 The go-ego Project Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
```
|
||||
|
||||
Files in the repository contain copyright from the year they are added to the year they are last changed. If the copyright author is changed, just paste the header below the old one.
|
323
vendor/github.com/go-ego/riot/Gopkg.lock
generated
vendored
Normal file
323
vendor/github.com/go-ego/riot/Gopkg.lock
generated
vendored
Normal file
@ -0,0 +1,323 @@
|
||||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/AndreasBriese/bbloom"
|
||||
packages = ["."]
|
||||
revision = "28f7e881ca57bc00e028f9ede9f0d9104cfeef5e"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/BurntSushi/toml"
|
||||
packages = ["."]
|
||||
revision = "b26d9c308763d68093482582cea63d69be07a0f0"
|
||||
version = "v0.3.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/StackExchange/wmi"
|
||||
packages = ["."]
|
||||
revision = "5d049714c4a64225c3c79a7cf7d02f7fb5b96338"
|
||||
version = "1.0.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/coreos/bbolt"
|
||||
packages = ["."]
|
||||
revision = "583e8937c61f1af6513608ccc75c97b6abdf4ff9"
|
||||
version = "v1.3.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/coreos/etcd"
|
||||
packages = [
|
||||
"auth/authpb",
|
||||
"clientv3",
|
||||
"etcdserver/api/v3rpc/rpctypes",
|
||||
"etcdserver/etcdserverpb",
|
||||
"mvcc/mvccpb",
|
||||
"pkg/types"
|
||||
]
|
||||
revision = "fca8add78a9d926166eb739b8e4a124434025ba3"
|
||||
version = "v3.3.9"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/dgraph-io/badger"
|
||||
packages = [
|
||||
".",
|
||||
"options",
|
||||
"protos",
|
||||
"skl",
|
||||
"table",
|
||||
"y"
|
||||
]
|
||||
revision = "391b6d3b93e6014fe8c2971fcc0c1266e47dbbd9"
|
||||
version = "v1.5.3"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/dgryski/go-farm"
|
||||
packages = ["."]
|
||||
revision = "2de33835d10275975374b37b2dcfd22c9020a1f5"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/fsnotify/fsnotify"
|
||||
packages = ["."]
|
||||
revision = "c2828203cd70a50dcccfb2761f8b1f8ceef9a8e9"
|
||||
version = "v1.4.7"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-ego/cedar"
|
||||
packages = ["."]
|
||||
revision = "39a3301a49c052d18c55ade2bd080cda1ccc0446"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-ego/gpy"
|
||||
packages = ["."]
|
||||
revision = "5e7198f4b498342cd0e29af3b199470cbbcb7c08"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-ego/gse"
|
||||
packages = ["."]
|
||||
revision = "78fb4a3717d7d60731b5b95e651e86325419c064"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-ego/murmur"
|
||||
packages = ["."]
|
||||
revision = "0e80721bb094033c73e7b6159d3c1cbd116e9b38"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/go-ole/go-ole"
|
||||
packages = [
|
||||
".",
|
||||
"oleutil"
|
||||
]
|
||||
revision = "a41e3c4b706f6ae8dfbff342b06e40fa4d2d0506"
|
||||
version = "v1.2.1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-vgo/grpclb"
|
||||
packages = ["."]
|
||||
revision = "434b4da1cea2111c03df2a91cc96cbff17110e43"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-vgo/gt"
|
||||
packages = [
|
||||
"conf",
|
||||
"info",
|
||||
"zlog"
|
||||
]
|
||||
revision = "765190081bade7de90c53a7117f1c6a3dee5b0de"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/gogo/protobuf"
|
||||
packages = [
|
||||
"gogoproto",
|
||||
"proto",
|
||||
"protoc-gen-gogo/descriptor"
|
||||
]
|
||||
revision = "636bf0302bc95575d69441b25a2603156ffdddf1"
|
||||
version = "v1.1.1"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/golang/protobuf"
|
||||
packages = [
|
||||
"proto",
|
||||
"protoc-gen-go/descriptor",
|
||||
"ptypes",
|
||||
"ptypes/any",
|
||||
"ptypes/duration",
|
||||
"ptypes/timestamp"
|
||||
]
|
||||
revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/golang/snappy"
|
||||
packages = ["."]
|
||||
revision = "2e65f85255dbc3072edf28d6b5b8efc472979f5a"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/pelletier/go-toml"
|
||||
packages = ["."]
|
||||
revision = "c01d1270ff3e442a8a57cddc1c92dc1138598194"
|
||||
version = "v1.2.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/pkg/errors"
|
||||
packages = ["."]
|
||||
revision = "645ef00459ed84a119197bfb8d8205042c6df63d"
|
||||
version = "v0.8.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/shirou/gopsutil"
|
||||
packages = [
|
||||
"cpu",
|
||||
"disk",
|
||||
"host",
|
||||
"internal/common",
|
||||
"mem",
|
||||
"net",
|
||||
"process"
|
||||
]
|
||||
revision = "8048a2e9c5773235122027dd585cf821b2af1249"
|
||||
version = "v2.18.07"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/shirou/w32"
|
||||
packages = ["."]
|
||||
revision = "bb4de0191aa41b5507caa14b0650cdbddcd9280b"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/syndtr/goleveldb"
|
||||
packages = [
|
||||
"leveldb",
|
||||
"leveldb/cache",
|
||||
"leveldb/comparer",
|
||||
"leveldb/errors",
|
||||
"leveldb/filter",
|
||||
"leveldb/iterator",
|
||||
"leveldb/journal",
|
||||
"leveldb/memdb",
|
||||
"leveldb/opt",
|
||||
"leveldb/storage",
|
||||
"leveldb/table",
|
||||
"leveldb/util"
|
||||
]
|
||||
revision = "c4c61651e9e37fa117f53c5a906d3b63090d8445"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/vcaesar/tt"
|
||||
packages = ["."]
|
||||
revision = "bafafb004f106551bfbcb6cac4ec3f9932365a57"
|
||||
|
||||
[[projects]]
|
||||
name = "go.uber.org/atomic"
|
||||
packages = ["."]
|
||||
revision = "1ea20fb1cbb1cc08cbd0d913a96dead89aa18289"
|
||||
version = "v1.3.2"
|
||||
|
||||
[[projects]]
|
||||
name = "go.uber.org/multierr"
|
||||
packages = ["."]
|
||||
revision = "3c4937480c32f4c13a875a1829af76c98ca3d40a"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
name = "go.uber.org/zap"
|
||||
packages = [
|
||||
".",
|
||||
"buffer",
|
||||
"internal/bufferpool",
|
||||
"internal/color",
|
||||
"internal/exit",
|
||||
"zapcore"
|
||||
]
|
||||
revision = "4d45f9617f7d90f7a663ff21c7a4321dbe78098b"
|
||||
version = "v1.9.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "golang.org/x/net"
|
||||
packages = [
|
||||
"context",
|
||||
"http/httpguts",
|
||||
"http2",
|
||||
"http2/hpack",
|
||||
"idna",
|
||||
"internal/timeseries",
|
||||
"trace"
|
||||
]
|
||||
revision = "32f9bdbd7df18e8641d215e7ea68be88b971feb0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "golang.org/x/sys"
|
||||
packages = [
|
||||
"unix",
|
||||
"windows"
|
||||
]
|
||||
revision = "bd9dbc187b6e1dacfdd2722a87e83093c2d7bd6e"
|
||||
|
||||
[[projects]]
|
||||
name = "golang.org/x/text"
|
||||
packages = [
|
||||
"collate",
|
||||
"collate/build",
|
||||
"internal/colltab",
|
||||
"internal/gen",
|
||||
"internal/tag",
|
||||
"internal/triegen",
|
||||
"internal/ucd",
|
||||
"language",
|
||||
"secure/bidirule",
|
||||
"transform",
|
||||
"unicode/bidi",
|
||||
"unicode/cldr",
|
||||
"unicode/norm",
|
||||
"unicode/rangetable"
|
||||
]
|
||||
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
|
||||
version = "v0.3.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "google.golang.org/genproto"
|
||||
packages = ["googleapis/rpc/status"]
|
||||
revision = "daca94659cb50e9f37c1b834680f2e46358f10b0"
|
||||
|
||||
[[projects]]
|
||||
name = "google.golang.org/grpc"
|
||||
packages = [
|
||||
".",
|
||||
"balancer",
|
||||
"balancer/base",
|
||||
"balancer/roundrobin",
|
||||
"codes",
|
||||
"connectivity",
|
||||
"credentials",
|
||||
"encoding",
|
||||
"encoding/proto",
|
||||
"grpclog",
|
||||
"health/grpc_health_v1",
|
||||
"internal",
|
||||
"internal/backoff",
|
||||
"internal/channelz",
|
||||
"internal/envconfig",
|
||||
"internal/grpcrand",
|
||||
"internal/transport",
|
||||
"keepalive",
|
||||
"metadata",
|
||||
"naming",
|
||||
"peer",
|
||||
"reflection",
|
||||
"reflection/grpc_reflection_v1alpha",
|
||||
"resolver",
|
||||
"resolver/dns",
|
||||
"resolver/passthrough",
|
||||
"stats",
|
||||
"status",
|
||||
"tap"
|
||||
]
|
||||
revision = "32fb0ac620c32ba40a4626ddf94d90d12cce3455"
|
||||
version = "v1.14.0"
|
||||
|
||||
[[projects]]
|
||||
name = "gopkg.in/natefinch/lumberjack.v2"
|
||||
packages = ["."]
|
||||
revision = "a96e63847dc3c67d17befa69c303767e2f84e54f"
|
||||
version = "v2.1"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "bbf0b187aabef1ad7a86a7870838007190e413053fa4f3063da4f2faa34b671e"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
82
vendor/github.com/go-ego/riot/Gopkg.toml
generated
vendored
Normal file
82
vendor/github.com/go-ego/riot/Gopkg.toml
generated
vendored
Normal file
@ -0,0 +1,82 @@
|
||||
# Gopkg.toml example
|
||||
#
|
||||
# Refer to https://golang.github.io/dep/docs/Gopkg.toml.html
|
||||
# for detailed Gopkg.toml documentation.
|
||||
#
|
||||
# required = ["github.com/user/thing/cmd/thing"]
|
||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project"
|
||||
# version = "1.0.0"
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project2"
|
||||
# branch = "dev"
|
||||
# source = "github.com/myfork/project2"
|
||||
#
|
||||
# [[override]]
|
||||
# name = "github.com/x/y"
|
||||
# version = "2.4.0"
|
||||
#
|
||||
# [prune]
|
||||
# non-go = false
|
||||
# go-tests = true
|
||||
# unused-packages = true
|
||||
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/coreos/bbolt"
|
||||
version = "1.3.0"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/dgraph-io/badger"
|
||||
version = "1.5.1"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/go-ego/gpy"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/go-ego/gse"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/go-ego/murmur"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/go-vgo/grpclb"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/go-vgo/gt"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/gogo/protobuf"
|
||||
version = "1.0.0"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/shirou/gopsutil"
|
||||
version = "2.18.05"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/syndtr/goleveldb"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/vcaesar/tt"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "golang.org/x/net"
|
||||
|
||||
[[constraint]]
|
||||
name = "google.golang.org/grpc"
|
||||
version = "1.12.2"
|
||||
|
||||
[prune]
|
||||
go-tests = true
|
||||
unused-packages = true
|
201
vendor/github.com/go-ego/riot/LICENSE
generated
vendored
Normal file
201
vendor/github.com/go-ego/riot/LICENSE
generated
vendored
Normal file
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
178
vendor/github.com/go-ego/riot/README.md
generated
vendored
Normal file
178
vendor/github.com/go-ego/riot/README.md
generated
vendored
Normal file
@ -0,0 +1,178 @@
|
||||
# Riot search
|
||||
|
||||
<img align="right" src="logo/512px.svg" width="15%"/>
|
||||
|
||||
<!--<img align="right" src="https://raw.githubusercontent.com/go-ego/ego/master/logo.jpg">-->
|
||||
<!--<a href="https://circleci.com/gh/go-ego/ego/tree/dev"><img src="https://img.shields.io/circleci/project/go-ego/ego/dev.svg" alt="Build Status"></a>-->
|
||||
[](https://circleci.com/gh/go-ego/riot)
|
||||

|
||||
[](https://codecov.io/gh/go-ego/riot)
|
||||
[](https://travis-ci.org/go-ego/riot)
|
||||
[](https://goreportcard.com/report/github.com/go-ego/riot)
|
||||
[](https://godoc.org/github.com/go-ego/riot)
|
||||
[](https://github.com/go-ego/riot/releases/latest)
|
||||
[](https://gitter.im/go-ego/ego?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
<!--<a href="https://github.com/go-ego/ego/releases"><img src="https://img.shields.io/badge/%20version%20-%206.0.0%20-blue.svg?style=flat-square" alt="Releases"></a>-->
|
||||
|
||||
<!--  -->
|
||||
Go Open Source, Distributed, Simple and efficient full text search engine.
|
||||
|
||||
[简体中文](https://github.com/go-ego/riot/blob/master/README_zh.md)
|
||||
|
||||
# Features
|
||||
|
||||
* [Efficient indexing and search](/docs/en/benchmarking.md) (1M blog 500M data 28 seconds index finished, 1.65 ms search response time, 19K search QPS)
|
||||
* Support for [logical search](https://github.com/go-ego/riot/blob/master/docs/en/logic.md)
|
||||
* Support Chinese word segmentation (use [gse word segmentation package](https://github.com/go-ego/gse) concurrent word, speed 27MB / s)
|
||||
* Support the calculation of the keyword in the text [close to the distance](/docs/en/token_proximity.md)(token proximity)
|
||||
* Support calculation [BM25 correlation](/docs/en/bm25.md)
|
||||
* Support [custom scoring field and scoring rules](/docs/en/custom_scoring_criteria.md)
|
||||
* Support [add online, delete index](/docs/en/realtime_indexing.md)
|
||||
* Support heartbeat
|
||||
* Support multiple [persistent storage](/docs/en/persistent_storage.md)
|
||||
* Support [distributed index and search](https://github.com/go-ego/riot/tree/master/data)
|
||||
* Can be achieved [distributed index and search](/docs/en/distributed_indexing_and_search.md)
|
||||
|
||||
* [Look at Word segmentation rules](https://github.com/go-ego/riot/blob/master/docs/en/segmenter.md)
|
||||
|
||||
|
||||
Riot v0.10.0 was released in Nov 2017, check the [Changelog](https://github.com/go-ego/riot/blob/master/docs/CHANGELOG.md) for the full details.
|
||||
|
||||
## Requirements
|
||||
Go version >= 1.8
|
||||
|
||||
### Vendored Dependencies
|
||||
|
||||
Riot uses [dep](https://github.com/golang/dep) to vendor dependencies, but we don't commit the vendored packages themselves to the Riot git repository. Therefore, a simple go get is not supported because the command is not vendor aware.
|
||||
|
||||
Please manage it with dep, run `dep ensure`, to clone dependencies.
|
||||
|
||||
## Installation/Update
|
||||
|
||||
```
|
||||
go get -u github.com/go-ego/riot
|
||||
```
|
||||
|
||||
## [Build-tools](https://github.com/go-ego/re)
|
||||
```
|
||||
go get -u github.com/go-ego/re
|
||||
```
|
||||
### re riot
|
||||
To create a new riot application
|
||||
|
||||
```
|
||||
$ re riot my-riotapp
|
||||
```
|
||||
|
||||
### re run
|
||||
|
||||
To run the application we just created, you can navigate to the application folder and execute:
|
||||
```
|
||||
$ cd my-riotapp && re run
|
||||
```
|
||||
|
||||
## Usage:
|
||||
|
||||
#### [Look at an example](/examples/simple/main.go)
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/go-ego/riot"
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
var (
|
||||
// searcher is coroutine safe
|
||||
searcher = riot.Engine{}
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Init
|
||||
searcher.Init(types.EngineOpts{
|
||||
// Using: 4,
|
||||
NotUseGse: true,
|
||||
})
|
||||
defer searcher.Close()
|
||||
|
||||
text := "Google Is Experimenting With Virtual Reality Advertising"
|
||||
text1 := `Google accidentally pushed Bluetooth update for Home
|
||||
speaker early`
|
||||
text2 := `Google is testing another Search results layout with
|
||||
rounded cards, new colors, and the 4 mysterious colored dots again`
|
||||
|
||||
// Add the document to the index, docId starts at 1
|
||||
searcher.Index(1, types.DocData{Content: text})
|
||||
searcher.Index(2, types.DocData{Content: text1}, false)
|
||||
searcher.IndexDoc(3, types.DocData{Content: text2}, true)
|
||||
|
||||
// Wait for the index to refresh
|
||||
searcher.Flush()
|
||||
// engine.FlushIndex()
|
||||
|
||||
// The search output format is found in the types.SearchResp structure
|
||||
log.Print(searcher.Search(types.SearchReq{Text:"google testing"}))
|
||||
}
|
||||
```
|
||||
|
||||
It is very simple!
|
||||
|
||||
### Use default engine:
|
||||
|
||||
```Go
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/go-ego/riot"
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
var (
|
||||
searcher = riot.New("zh")
|
||||
)
|
||||
|
||||
func main() {
|
||||
data := types.DocData{Content: `I wonder how, I wonder why
|
||||
, I wonder where they are`}
|
||||
data1 := types.DocData{Content: "所以, 你好, 再见"}
|
||||
data2 := types.DocData{Content: "没有理由"}
|
||||
searcher.Index(1, data)
|
||||
searcher.Index(2, data1)
|
||||
searcher.Index(3, data2)
|
||||
searcher.Flush()
|
||||
|
||||
req := types.SearchReq{Text: "你好"}
|
||||
search := searcher.Search(req)
|
||||
log.Println("search...", search)
|
||||
}
|
||||
```
|
||||
|
||||
#### [Look at more Examples](https://github.com/go-ego/riot/tree/master/examples)
|
||||
|
||||
#### [Look at Store example](https://github.com/go-ego/riot/blob/master/examples/store/main.go)
|
||||
#### [Look at Logic search example](https://github.com/go-ego/riot/blob/master/examples/logic/main.go)
|
||||
|
||||
#### [Look at Pinyin search example](https://github.com/go-ego/riot/blob/master/examples/pinyin/main.go)
|
||||
|
||||
#### [Look at different dict and language search example](https://github.com/go-ego/riot/blob/master/examples/dict/main.go)
|
||||
|
||||
#### [Look at benchmark example](https://github.com/go-ego/riot/blob/master/examples/benchmark/benchmark.go)
|
||||
|
||||
#### [Riot search engine templates, client and dictionaries](https://github.com/go-ego/riot/tree/master/data)
|
||||
|
||||
## Donate
|
||||
|
||||
Supporting riot, [buy me a coffee](https://github.com/go-vgo/buy-me-a-coffee).
|
||||
|
||||
#### Paypal
|
||||
|
||||
Donate money by [paypal](https://www.paypal.me/veni0/25) to my account [vzvway@gmail.com](vzvway@gmail.com)
|
||||
|
||||
## License
|
||||
|
||||
Riot is primarily distributed under the terms of the Apache License (Version 2.0), base on [wukong](https://github.com/huichen/wukong).
|
194
vendor/github.com/go-ego/riot/README_zh.md
generated
vendored
Normal file
194
vendor/github.com/go-ego/riot/README_zh.md
generated
vendored
Normal file
@ -0,0 +1,194 @@
|
||||
# [Riot 搜索引擎](https://github.com/go-ego/riot)
|
||||
|
||||
<!--<img align="right" src="https://raw.githubusercontent.com/go-ego/ego/master/logo.jpg">-->
|
||||
<!--<a href="https://circleci.com/gh/go-ego/ego/tree/dev"><img src="https://img.shields.io/circleci/project/go-ego/ego/dev.svg" alt="Build Status"></a>-->
|
||||
[](https://circleci.com/gh/go-ego/riot)
|
||||

|
||||
[](https://codecov.io/gh/go-ego/riot)
|
||||
[](https://travis-ci.org/go-ego/riot)
|
||||
[](https://goreportcard.com/report/github.com/go-ego/riot)
|
||||
[](https://godoc.org/github.com/go-ego/riot)
|
||||
[](https://github.com/go-ego/riot/releases/latest)
|
||||
[](https://gitter.im/go-ego/ego?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
<!--<a href="https://github.com/go-ego/ego/releases"><img src="https://img.shields.io/badge/%20version%20-%206.0.0%20-blue.svg?style=flat-square" alt="Releases"></a>-->
|
||||
|
||||
|
||||
Go Open Source, Distributed, Simple and efficient full text search engine.
|
||||
|
||||
# Features
|
||||
|
||||
* [高效索引和搜索](/docs/zh/benchmarking.md)(1M 条微博 500M 数据28秒索引完,1.65毫秒搜索响应时间,19K 搜索 QPS)
|
||||
* 支持中文分词(使用 [gse 分词包](https://github.com/go-ego/gse)并发分词,速度 27MB/秒)
|
||||
* 支持[逻辑搜索](https://github.com/go-ego/riot/blob/master/docs/zh/logic.md)
|
||||
* 支持中文转拼音搜索(使用 [gpy](https://github.com/go-ego/gpy) 中文转拼音)
|
||||
* 支持计算关键词在文本中的[紧邻距离](/docs/zh/token_proximity.md)(token proximity)
|
||||
* 支持计算[BM25相关度](/docs/zh/bm25.md)
|
||||
* 支持[自定义评分字段和评分规则](/docs/zh/custom_scoring_criteria.md)
|
||||
* 支持[在线添加、删除索引](/docs/zh/realtime_indexing.md)
|
||||
* 支持多种[持久存储](/docs/zh/persistent_storage.md)
|
||||
* 支持 heartbeat
|
||||
* 支持[分布式索引和搜索](https://github.com/go-ego/riot/tree/master/data)
|
||||
* 可实现[分布式索引和搜索](/docs/zh/distributed_indexing_and_search.md)
|
||||
* 采用对商业应用友好的[Apache License v2](/LICENSE)发布
|
||||
|
||||
* [查看分词规则](https://github.com/go-ego/riot/blob/master/docs/zh/segmenter.md)
|
||||
|
||||
Riot v0.10.0 was released in Nov 2017, check the [Changelog](https://github.com/go-ego/riot/blob/master/docs/CHANGELOG.md) for the full details.
|
||||
|
||||
QQ 群: 120563750
|
||||
|
||||
## 安装/更新
|
||||
|
||||
```
|
||||
go get -u github.com/go-ego/riot
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
需要 Go 版本至少 1.8
|
||||
|
||||
### Vendored Dependencies
|
||||
|
||||
Riot 使用 [dep](https://github.com/golang/dep) 管理 vendor 依赖, but we don't commit the vendored packages themselves to the Riot git repository. Therefore, a simple go get is not supported because the command is not vendor aware.
|
||||
|
||||
请用 dep 管理它, 运行 `dep ensure` 克隆依赖.
|
||||
|
||||
## [Build-tools](https://github.com/go-ego/re)
|
||||
```
|
||||
go get -u github.com/go-ego/re
|
||||
```
|
||||
### re riot
|
||||
创建 riot 项目
|
||||
|
||||
```
|
||||
$ re riot my-riotapp
|
||||
```
|
||||
|
||||
### re run
|
||||
|
||||
运行我们创建的 riot 项目, 你可以导航到应用程序文件夹并执行:
|
||||
```
|
||||
$ cd my-riotapp && re run
|
||||
```
|
||||
|
||||
## 使用
|
||||
|
||||
先看一个例子(来自 [simplest_example.go](/examples/simple/zh/main.go))
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/go-ego/riot"
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
var (
|
||||
// searcher 是协程安全的
|
||||
searcher = riot.Engine{}
|
||||
)
|
||||
|
||||
func main() {
|
||||
// 初始化
|
||||
searcher.Init(types.EngineOpts{
|
||||
Using: 3,
|
||||
GseDict: "zh",
|
||||
// GseDict: "your gopath"+"/src/github.com/go-ego/riot/data/dict/dictionary.txt",
|
||||
})
|
||||
defer searcher.Close()
|
||||
|
||||
text := "此次百度收购将成中国互联网最大并购"
|
||||
text1 := "百度宣布拟全资收购91无线业务"
|
||||
text2 := "百度是中国最大的搜索引擎"
|
||||
|
||||
// 将文档加入索引,docId 从1开始
|
||||
searcher.Index(1, types.DocData{Content: text})
|
||||
searcher.Index(2, types.DocData{Content: text1}, false)
|
||||
searcher.Index(3, types.DocData{Content: text2}, true)
|
||||
|
||||
// 等待索引刷新完毕
|
||||
searcher.Flush()
|
||||
// engine.FlushIndex()
|
||||
|
||||
// 搜索输出格式见 types.SearchResp 结构体
|
||||
log.Print(searcher.Search(types.SearchReq{Text:"百度中国"}))
|
||||
}
|
||||
```
|
||||
|
||||
是不是很简单!
|
||||
|
||||
然后看看一个[入门教程](/docs/zh/codelab.md),教你用不到200行 Go 代码实现一个微博搜索网站。
|
||||
|
||||
### 使用默认引擎:
|
||||
|
||||
```Go
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/go-ego/riot"
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
var (
|
||||
searcher = riot.New("zh")
|
||||
)
|
||||
|
||||
func main() {
|
||||
data := types.DocData{Content: `I wonder how, I wonder why
|
||||
, I wonder where they are`}
|
||||
data1 := types.DocData{Content: "所以, 你好, 再见"}
|
||||
data2 := types.DocData{Content: "没有理由"}
|
||||
searcher.Index(1, data)
|
||||
searcher.Index(2, data1)
|
||||
searcher.IndexDoc(3, data2)
|
||||
searcher.Flush()
|
||||
|
||||
req := types.SearchReq{Text: "你好"}
|
||||
search := searcher.Search(req)
|
||||
log.Println("search...", search)
|
||||
}
|
||||
```
|
||||
|
||||
#### [查看更多例子](https://github.com/go-ego/riot/tree/master/examples)
|
||||
|
||||
#### [持久化的例子](https://github.com/go-ego/riot/blob/master/examples/store/main.go)
|
||||
#### [逻辑搜索的例子](https://github.com/go-ego/riot/blob/master/examples/logic/main.go)
|
||||
|
||||
#### [拼音搜索的例子](https://github.com/go-ego/riot/blob/master/examples/pinyin/main.go)
|
||||
|
||||
#### [不同字典和语言例子](https://github.com/go-ego/riot/blob/master/examples/dict/main.go)
|
||||
|
||||
#### [benchmark](https://github.com/go-ego/riot/blob/master/examples/benchmark/benchmark.go)
|
||||
|
||||
#### [Riot 搜索模板, 客户端和字典](https://github.com/go-ego/riot/tree/master/data)
|
||||
|
||||
## 主要改进:
|
||||
|
||||
- 增加逻辑搜索
|
||||
- 增加拼音搜索
|
||||
- 增加分布式
|
||||
- 分词等改进
|
||||
- 增加更多 api
|
||||
- 支持 heartbeat
|
||||
- 修复 bug
|
||||
- 删除依赖 cgo 的存储引擎, 增加 badger和 leveldb 持久化引擎
|
||||
|
||||
## Donate
|
||||
|
||||
支持 riot, [buy me a coffee](https://github.com/go-vgo/buy-me-a-coffee).
|
||||
|
||||
#### Paypal
|
||||
|
||||
Donate money by [paypal](https://www.paypal.me/veni0/25) to my account [vzvway@gmail.com](vzvway@gmail.com)
|
||||
|
||||
## 其它
|
||||
|
||||
* [为什么要有 riot 引擎](/docs/zh/why_riot.md)
|
||||
* [联系方式](/docs/zh/feedback.md)
|
||||
|
||||
## License
|
||||
|
||||
Riot is primarily distributed under the terms of the Apache License (Version 2.0), base on [wukong](https://github.com/huichen/wukong).
|
42
vendor/github.com/go-ego/riot/appveyor.yml
generated
vendored
Normal file
42
vendor/github.com/go-ego/riot/appveyor.yml
generated
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
version: "{build}"
|
||||
image: 'Visual Studio 2017'
|
||||
# platform: x64
|
||||
|
||||
clone_folder: c:\gopath\src\github.com\go-ego\riot
|
||||
# max_jobs: 1
|
||||
|
||||
environment:
|
||||
global:
|
||||
GOPATH: C:\gopath
|
||||
# CC: gcc.exe
|
||||
matrix:
|
||||
- GOARCH: amd64
|
||||
# - GOARCH: 386
|
||||
GOVERSION: 1.10.3
|
||||
# GOPATH: c:\gopath
|
||||
|
||||
install:
|
||||
- set PATH=%GOPATH%\bin;c:\go\bin;%PATH%
|
||||
- git submodule update --init
|
||||
- rmdir C:\go /s /q
|
||||
- appveyor DownloadFile https://storage.googleapis.com/golang/go%GOVERSION%.windows-%GOARCH%.zip
|
||||
- 7z x go%GOVERSION%.windows-%GOARCH%.zip -y -oC:\ > NUL
|
||||
- go version
|
||||
- go env
|
||||
# - gcc --version
|
||||
|
||||
# To run your custom scripts instead of automatic MSBuild
|
||||
build_script:
|
||||
# We need to disable firewall - https://github.com/appveyor/ci/issues/1579#issuecomment-309830648
|
||||
- ps: Disable-NetFirewallRule -DisplayName 'File and Printer Sharing (SMB-Out)'
|
||||
- cd c:\gopath\src\github.com\go-ego\riot
|
||||
- git branch
|
||||
- go get -t ./...
|
||||
|
||||
# To run your custom scripts instead of automatic tests
|
||||
test_script:
|
||||
# Unit tests
|
||||
- ps: Add-AppveyorTest "Unit Tests" -Outcome Running
|
||||
# - go test -u github.com/go-ego/riot/...
|
||||
- go test -v github.com/go-ego/riot/...
|
||||
- ps: Update-AppveyorTest "Unit Tests" -Outcome Passed
|
34
vendor/github.com/go-ego/riot/circle.yml
generated
vendored
Normal file
34
vendor/github.com/go-ego/riot/circle.yml
generated
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
# circle.yml #
|
||||
# machine:
|
||||
# go:
|
||||
# version: 1.9.1
|
||||
|
||||
version: 2
|
||||
|
||||
jobs:
|
||||
build:
|
||||
docker:
|
||||
- image: govgo/go:1.10.3
|
||||
working_directory: /gopath/src/github.com/go-ego/riot
|
||||
steps:
|
||||
- checkout
|
||||
# specify any bash command here prefixed with `run: `
|
||||
# - run: go get -u github.com/go-ego/gse
|
||||
# - run: go get -u github.com/go-ego/gpy
|
||||
# - run: go get -u github.com/go-ego/murmur
|
||||
# - run: go get -u golang.org/x/sys/unix
|
||||
# - run: go get -u github.com/shirou/gopsutil
|
||||
- run: go get -v -t -d ./...
|
||||
- run: go test -v ./...
|
||||
# codecov.io
|
||||
- run: go test -v -covermode=count -coverprofile=coverage.out
|
||||
- run: bash <(curl -s https://codecov.io/bash)
|
||||
|
||||
# script:
|
||||
# - ./go.test.sh
|
||||
|
||||
# test:
|
||||
# post:
|
||||
# - go test -v -covermode=count -coverprofile=coverage.out
|
||||
# - bash <(curl -s https://codecov.io/bash)
|
||||
|
33
vendor/github.com/go-ego/riot/core/BUILD.bazel
generated
vendored
Normal file
33
vendor/github.com/go-ego/riot/core/BUILD.bazel
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"data.go",
|
||||
"indexer.go",
|
||||
"ranker.go",
|
||||
"test_utils.go",
|
||||
"uint64.go",
|
||||
],
|
||||
importmap = "go-common/vendor/github.com/go-ego/riot/core",
|
||||
importpath = "github.com/go-ego/riot/core",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//vendor/github.com/go-ego/riot/types:go_default_library",
|
||||
"//vendor/github.com/go-ego/riot/utils:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
94
vendor/github.com/go-ego/riot/core/data.go
generated
vendored
Normal file
94
vendor/github.com/go-ego/riot/core/data.go
generated
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package core
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
var (
|
||||
// DocInfoGroup 文档信息 [shard][id]info
|
||||
DocInfoGroup = make(map[int]*types.DocInfosShard)
|
||||
docInfosGroupRWMutex sync.RWMutex
|
||||
)
|
||||
|
||||
// AddDocInfosShard add document infos shard
|
||||
func AddDocInfosShard(shard int) {
|
||||
docInfosGroupRWMutex.Lock()
|
||||
defer docInfosGroupRWMutex.Unlock()
|
||||
if _, found := DocInfoGroup[shard]; !found {
|
||||
DocInfoGroup[shard] = &types.DocInfosShard{
|
||||
DocInfos: make(map[uint64]*types.DocInfo),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddDocInfo add documents info
|
||||
func AddDocInfo(shard int, docId uint64, docinfo *types.DocInfo) {
|
||||
docInfosGroupRWMutex.Lock()
|
||||
defer docInfosGroupRWMutex.Unlock()
|
||||
if _, ok := DocInfoGroup[shard]; !ok {
|
||||
DocInfoGroup[shard] = &types.DocInfosShard{
|
||||
DocInfos: make(map[uint64]*types.DocInfo),
|
||||
}
|
||||
}
|
||||
DocInfoGroup[shard].DocInfos[docId] = docinfo
|
||||
DocInfoGroup[shard].NumDocs++
|
||||
}
|
||||
|
||||
// IsDocExist doc is exist
|
||||
func IsDocExist(docId uint64) bool {
|
||||
docInfosGroupRWMutex.RLock()
|
||||
defer docInfosGroupRWMutex.RUnlock()
|
||||
for _, docInfosShard := range DocInfoGroup {
|
||||
_, found := docInfosShard.DocInfos[docId]
|
||||
if found {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
var (
|
||||
// InvertedIndexGroup 反向索引表([shard][关键词]反向索引表)
|
||||
InvertedIndexGroup = make(map[int]*types.InvertedIndexShard)
|
||||
invertedIndexGroupRWMutex sync.RWMutex
|
||||
)
|
||||
|
||||
// AddInvertedIndexShard add inverted index shard
|
||||
func AddInvertedIndexShard(shard int) {
|
||||
invertedIndexGroupRWMutex.Lock()
|
||||
defer invertedIndexGroupRWMutex.Unlock()
|
||||
if _, found := InvertedIndexGroup[shard]; !found {
|
||||
InvertedIndexGroup[shard] = &types.InvertedIndexShard{
|
||||
InvertedIndex: make(map[string]*types.KeywordIndices),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddKeywordIndices add keyword indices
|
||||
func AddKeywordIndices(shard int, keyword string, keywordIndices *types.KeywordIndices) {
|
||||
invertedIndexGroupRWMutex.Lock()
|
||||
defer invertedIndexGroupRWMutex.Unlock()
|
||||
if _, ok := InvertedIndexGroup[shard]; !ok {
|
||||
InvertedIndexGroup[shard] = &types.InvertedIndexShard{
|
||||
InvertedIndex: make(map[string]*types.KeywordIndices),
|
||||
}
|
||||
}
|
||||
InvertedIndexGroup[shard].InvertedIndex[keyword] = keywordIndices
|
||||
InvertedIndexGroup[shard].TotalTokenLen++
|
||||
}
|
881
vendor/github.com/go-ego/riot/core/indexer.go
generated
vendored
Normal file
881
vendor/github.com/go-ego/riot/core/indexer.go
generated
vendored
Normal file
@ -0,0 +1,881 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
/*
|
||||
|
||||
Package core is riot core
|
||||
*/
|
||||
package core
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/go-ego/riot/types"
|
||||
"github.com/go-ego/riot/utils"
|
||||
)
|
||||
|
||||
// Indexer 索引器
|
||||
type Indexer struct {
|
||||
// 从搜索键到文档列表的反向索引
|
||||
// 加了读写锁以保证读写安全
|
||||
tableLock struct {
|
||||
sync.RWMutex
|
||||
table map[string]*KeywordIndices
|
||||
docsState map[uint64]int // nil: 表示无状态记录,0: 存在于索引中,1: 等待删除,2: 等待加入
|
||||
}
|
||||
|
||||
addCacheLock struct {
|
||||
sync.RWMutex
|
||||
addCachePointer int
|
||||
addCache types.DocsIndex
|
||||
}
|
||||
|
||||
removeCacheLock struct {
|
||||
sync.RWMutex
|
||||
removeCachePointer int
|
||||
removeCache types.DocsId
|
||||
}
|
||||
|
||||
initOptions types.IndexerOpts
|
||||
initialized bool
|
||||
|
||||
// 这实际上是总文档数的一个近似
|
||||
numDocs uint64
|
||||
|
||||
// 所有被索引文本的总关键词数
|
||||
totalTokenLen float32
|
||||
|
||||
// 每个文档的关键词长度
|
||||
docTokenLens map[uint64]float32
|
||||
}
|
||||
|
||||
// KeywordIndices 反向索引表的一行,收集了一个搜索键出现的所有文档,按照DocId从小到大排序。
|
||||
type KeywordIndices struct {
|
||||
// 下面的切片是否为空,取决于初始化时IndexType的值
|
||||
docIds []uint64 // 全部类型都有
|
||||
frequencies []float32 // IndexType == FrequenciesIndex
|
||||
locations [][]int // IndexType == LocsIndex
|
||||
}
|
||||
|
||||
// Init 初始化索引器
|
||||
func (indexer *Indexer) Init(options types.IndexerOpts) {
|
||||
if indexer.initialized == true {
|
||||
log.Fatal("The Indexer can not be initialized twice.")
|
||||
}
|
||||
options.Init()
|
||||
indexer.initOptions = options
|
||||
indexer.initialized = true
|
||||
|
||||
indexer.tableLock.table = make(map[string]*KeywordIndices)
|
||||
indexer.tableLock.docsState = make(map[uint64]int)
|
||||
indexer.addCacheLock.addCache = make(
|
||||
[]*types.DocIndex, indexer.initOptions.DocCacheSize)
|
||||
|
||||
indexer.removeCacheLock.removeCache = make(
|
||||
[]uint64, indexer.initOptions.DocCacheSize*2)
|
||||
indexer.docTokenLens = make(map[uint64]float32)
|
||||
}
|
||||
|
||||
// getDocId 从 KeywordIndices 中得到第i个文档的 DocId
|
||||
func (indexer *Indexer) getDocId(ti *KeywordIndices, i int) uint64 {
|
||||
return ti.docIds[i]
|
||||
}
|
||||
|
||||
// HasDoc doc is exist return true
|
||||
func (indexer *Indexer) HasDoc(docId uint64) bool {
|
||||
docState, ok := indexer.tableLock.docsState[docId]
|
||||
if ok && docState == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// getIndexLen 得到 KeywordIndices 中文档总数
|
||||
func (indexer *Indexer) getIndexLen(ti *KeywordIndices) int {
|
||||
return len(ti.docIds)
|
||||
}
|
||||
|
||||
// AddDocToCache 向 ADDCACHE 中加入一个文档
|
||||
func (indexer *Indexer) AddDocToCache(doc *types.DocIndex, forceUpdate bool) {
|
||||
if indexer.initialized == false {
|
||||
log.Fatal("The Indexer has not been initialized.")
|
||||
}
|
||||
|
||||
indexer.addCacheLock.Lock()
|
||||
if doc != nil {
|
||||
indexer.addCacheLock.addCache[indexer.addCacheLock.addCachePointer] = doc
|
||||
indexer.addCacheLock.addCachePointer++
|
||||
}
|
||||
|
||||
if indexer.addCacheLock.addCachePointer >= indexer.initOptions.DocCacheSize ||
|
||||
forceUpdate {
|
||||
indexer.tableLock.Lock()
|
||||
|
||||
position := 0
|
||||
for i := 0; i < indexer.addCacheLock.addCachePointer; i++ {
|
||||
docIndex := indexer.addCacheLock.addCache[i]
|
||||
|
||||
docState, ok := indexer.tableLock.docsState[docIndex.DocId]
|
||||
if ok && docState <= 1 {
|
||||
// ok && docState == 0 表示存在于索引中,需先删除再添加
|
||||
// ok && docState == 1 表示不一定存在于索引中,等待删除,需先删除再添加
|
||||
if position != i {
|
||||
indexer.addCacheLock.addCache[position], indexer.addCacheLock.addCache[i] =
|
||||
indexer.addCacheLock.addCache[i], indexer.addCacheLock.addCache[position]
|
||||
}
|
||||
if docState == 0 {
|
||||
indexer.removeCacheLock.Lock()
|
||||
indexer.removeCacheLock.removeCache[indexer.removeCacheLock.removeCachePointer] =
|
||||
docIndex.DocId
|
||||
indexer.removeCacheLock.removeCachePointer++
|
||||
indexer.removeCacheLock.Unlock()
|
||||
|
||||
indexer.tableLock.docsState[docIndex.DocId] = 1
|
||||
indexer.numDocs--
|
||||
}
|
||||
position++
|
||||
} else if !ok {
|
||||
indexer.tableLock.docsState[docIndex.DocId] = 2
|
||||
}
|
||||
}
|
||||
|
||||
indexer.tableLock.Unlock()
|
||||
if indexer.RemoveDocToCache(0, forceUpdate) {
|
||||
// 只有当存在于索引表中的文档已被删除,其才可以重新加入到索引表中
|
||||
position = 0
|
||||
}
|
||||
|
||||
addCachedDocs := indexer.addCacheLock.addCache[position:indexer.addCacheLock.addCachePointer]
|
||||
indexer.addCacheLock.addCachePointer = position
|
||||
|
||||
indexer.addCacheLock.Unlock()
|
||||
sort.Sort(addCachedDocs)
|
||||
indexer.AddDocs(&addCachedDocs)
|
||||
} else {
|
||||
indexer.addCacheLock.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// AddDocs 向反向索引表中加入 ADDCACHE 中所有文档
|
||||
func (indexer *Indexer) AddDocs(docs *types.DocsIndex) {
|
||||
if indexer.initialized == false {
|
||||
log.Fatal("The Indexer has not been initialized.")
|
||||
}
|
||||
|
||||
indexer.tableLock.Lock()
|
||||
defer indexer.tableLock.Unlock()
|
||||
indexPointers := make(map[string]int, len(indexer.tableLock.table))
|
||||
|
||||
// DocId 递增顺序遍历插入文档保证索引移动次数最少
|
||||
for i, doc := range *docs {
|
||||
if i < len(*docs)-1 && (*docs)[i].DocId == (*docs)[i+1].DocId {
|
||||
// 如果有重复文档加入,因为稳定排序,只加入最后一个
|
||||
continue
|
||||
}
|
||||
|
||||
docState, ok := indexer.tableLock.docsState[doc.DocId]
|
||||
if ok && docState == 1 {
|
||||
// 如果此时 docState 仍为 1,说明该文档需被删除
|
||||
// docState 合法状态为 nil & 2,保证一定不会插入已经在索引表中的文档
|
||||
continue
|
||||
}
|
||||
|
||||
// 更新文档关键词总长度
|
||||
if doc.TokenLen != 0 {
|
||||
indexer.docTokenLens[doc.DocId] = float32(doc.TokenLen)
|
||||
indexer.totalTokenLen += doc.TokenLen
|
||||
}
|
||||
|
||||
docIdIsNew := true
|
||||
for _, keyword := range doc.Keywords {
|
||||
indices, foundKeyword := indexer.tableLock.table[keyword.Text]
|
||||
if !foundKeyword {
|
||||
// 如果没找到该搜索键则加入
|
||||
ti := KeywordIndices{}
|
||||
switch indexer.initOptions.IndexType {
|
||||
case types.LocsIndex:
|
||||
ti.locations = [][]int{keyword.Starts}
|
||||
case types.FrequenciesIndex:
|
||||
ti.frequencies = []float32{keyword.Frequency}
|
||||
}
|
||||
ti.docIds = []uint64{doc.DocId}
|
||||
indexer.tableLock.table[keyword.Text] = &ti
|
||||
continue
|
||||
}
|
||||
|
||||
// 查找应该插入的位置,且索引一定不存在
|
||||
position, _ := indexer.searchIndex(
|
||||
indices, indexPointers[keyword.Text], indexer.getIndexLen(indices)-1, doc.DocId)
|
||||
indexPointers[keyword.Text] = position
|
||||
|
||||
switch indexer.initOptions.IndexType {
|
||||
case types.LocsIndex:
|
||||
indices.locations = append(indices.locations, []int{})
|
||||
copy(indices.locations[position+1:], indices.locations[position:])
|
||||
indices.locations[position] = keyword.Starts
|
||||
case types.FrequenciesIndex:
|
||||
indices.frequencies = append(indices.frequencies, float32(0))
|
||||
copy(indices.frequencies[position+1:], indices.frequencies[position:])
|
||||
indices.frequencies[position] = keyword.Frequency
|
||||
}
|
||||
|
||||
indices.docIds = append(indices.docIds, 0)
|
||||
copy(indices.docIds[position+1:], indices.docIds[position:])
|
||||
indices.docIds[position] = doc.DocId
|
||||
}
|
||||
|
||||
// 更新文章状态和总数
|
||||
if docIdIsNew {
|
||||
indexer.tableLock.docsState[doc.DocId] = 0
|
||||
indexer.numDocs++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// RemoveDocToCache 向 REMOVECACHE 中加入一个待删除文档
|
||||
// 返回值表示文档是否在索引表中被删除
|
||||
func (indexer *Indexer) RemoveDocToCache(docId uint64, forceUpdate bool) bool {
|
||||
if indexer.initialized == false {
|
||||
log.Fatal("The Indexer has not been initialized.")
|
||||
}
|
||||
|
||||
indexer.removeCacheLock.Lock()
|
||||
if docId != 0 {
|
||||
indexer.tableLock.Lock()
|
||||
if docState, ok := indexer.tableLock.docsState[docId]; ok && docState == 0 {
|
||||
indexer.removeCacheLock.removeCache[indexer.removeCacheLock.removeCachePointer] = docId
|
||||
indexer.removeCacheLock.removeCachePointer++
|
||||
indexer.tableLock.docsState[docId] = 1
|
||||
indexer.numDocs--
|
||||
} else if ok && docState == 2 {
|
||||
// 删除一个等待加入的文档
|
||||
indexer.tableLock.docsState[docId] = 1
|
||||
} else if !ok {
|
||||
// 若文档不存在,则无法判断其是否在 addCache 中,需避免这样的操作
|
||||
}
|
||||
indexer.tableLock.Unlock()
|
||||
}
|
||||
|
||||
if indexer.removeCacheLock.removeCachePointer > 0 &&
|
||||
(indexer.removeCacheLock.removeCachePointer >= indexer.initOptions.DocCacheSize ||
|
||||
forceUpdate) {
|
||||
removeCacheddocs := indexer.removeCacheLock.removeCache[:indexer.removeCacheLock.removeCachePointer]
|
||||
indexer.removeCacheLock.removeCachePointer = 0
|
||||
indexer.removeCacheLock.Unlock()
|
||||
sort.Sort(removeCacheddocs)
|
||||
indexer.RemoveDocs(&removeCacheddocs)
|
||||
return true
|
||||
}
|
||||
|
||||
indexer.removeCacheLock.Unlock()
|
||||
return false
|
||||
}
|
||||
|
||||
// RemoveDocs 向反向索引表中删除 REMOVECACHE 中所有文档
|
||||
func (indexer *Indexer) RemoveDocs(docs *types.DocsId) {
|
||||
if indexer.initialized == false {
|
||||
log.Fatal("The Indexer has not been initialized.")
|
||||
}
|
||||
|
||||
indexer.tableLock.Lock()
|
||||
defer indexer.tableLock.Unlock()
|
||||
|
||||
// 更新文档关键词总长度,删除文档状态
|
||||
for _, docId := range *docs {
|
||||
indexer.totalTokenLen -= indexer.docTokenLens[docId]
|
||||
delete(indexer.docTokenLens, docId)
|
||||
delete(indexer.tableLock.docsState, docId)
|
||||
}
|
||||
|
||||
for keyword, indices := range indexer.tableLock.table {
|
||||
indicesTop, indicesPointer := 0, 0
|
||||
docsPointer := sort.Search(
|
||||
len(*docs), func(i int) bool { return (*docs)[i] >= indices.docIds[0] })
|
||||
// 双指针扫描,进行批量删除操作
|
||||
for docsPointer < len(*docs) && indicesPointer < indexer.getIndexLen(indices) {
|
||||
if indices.docIds[indicesPointer] < (*docs)[docsPointer] {
|
||||
if indicesTop != indicesPointer {
|
||||
switch indexer.initOptions.IndexType {
|
||||
case types.LocsIndex:
|
||||
indices.locations[indicesTop] = indices.locations[indicesPointer]
|
||||
case types.FrequenciesIndex:
|
||||
indices.frequencies[indicesTop] = indices.frequencies[indicesPointer]
|
||||
}
|
||||
|
||||
indices.docIds[indicesTop] = indices.docIds[indicesPointer]
|
||||
}
|
||||
|
||||
indicesTop++
|
||||
indicesPointer++
|
||||
} else if indices.docIds[indicesPointer] == (*docs)[docsPointer] {
|
||||
indicesPointer++
|
||||
docsPointer++
|
||||
} else {
|
||||
docsPointer++
|
||||
}
|
||||
}
|
||||
if indicesTop != indicesPointer {
|
||||
switch indexer.initOptions.IndexType {
|
||||
case types.LocsIndex:
|
||||
indices.locations = append(
|
||||
indices.locations[:indicesTop], indices.locations[indicesPointer:]...)
|
||||
case types.FrequenciesIndex:
|
||||
indices.frequencies = append(
|
||||
indices.frequencies[:indicesTop], indices.frequencies[indicesPointer:]...)
|
||||
}
|
||||
|
||||
indices.docIds = append(
|
||||
indices.docIds[:indicesTop], indices.docIds[indicesPointer:]...)
|
||||
}
|
||||
|
||||
if len(indices.docIds) == 0 {
|
||||
delete(indexer.tableLock.table, keyword)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Lookup lookup docs
|
||||
// 查找包含全部搜索键(AND操作)的文档
|
||||
// 当 docIds 不为 nil 时仅从 docIds 指定的文档中查找
|
||||
func (indexer *Indexer) Lookup(
|
||||
tokens []string, labels []string, docIds map[uint64]bool, countDocsOnly bool,
|
||||
logic ...types.Logic) (docs []types.IndexedDoc, numDocs int) {
|
||||
|
||||
if indexer.initialized == false {
|
||||
log.Fatal("The Indexer has not been initialized.")
|
||||
}
|
||||
|
||||
if indexer.numDocs == 0 {
|
||||
return
|
||||
}
|
||||
numDocs = 0
|
||||
|
||||
// 合并关键词和标签为搜索键
|
||||
keywords := make([]string, len(tokens)+len(labels))
|
||||
copy(keywords, tokens)
|
||||
copy(keywords[len(tokens):], labels)
|
||||
|
||||
if len(logic) > 0 {
|
||||
if logic != nil && len(keywords) > 0 && logic[0].Must == true ||
|
||||
logic[0].Should == true || logic[0].NotIn == true {
|
||||
|
||||
docs, numDocs = indexer.LogicLookup(
|
||||
docIds, countDocsOnly, keywords, logic[0])
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if logic != nil && (len(logic[0].LogicExpr.MustLabels) > 0 ||
|
||||
len(logic[0].LogicExpr.ShouldLabels) > 0) &&
|
||||
len(logic[0].LogicExpr.NotInLabels) >= 0 {
|
||||
|
||||
docs, numDocs = indexer.LogicLookup(
|
||||
docIds, countDocsOnly, keywords, logic[0])
|
||||
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
indexer.tableLock.RLock()
|
||||
defer indexer.tableLock.RUnlock()
|
||||
|
||||
table := make([]*KeywordIndices, len(keywords))
|
||||
for i, keyword := range keywords {
|
||||
indices, found := indexer.tableLock.table[keyword]
|
||||
if !found {
|
||||
// 当反向索引表中无此搜索键时直接返回
|
||||
return
|
||||
}
|
||||
// 否则加入反向表中
|
||||
table[i] = indices
|
||||
}
|
||||
|
||||
// 当没有找到时直接返回
|
||||
if len(table) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// 归并查找各个搜索键出现文档的交集
|
||||
// 从后向前查保证先输出 DocId 较大文档
|
||||
indexPointers := make([]int, len(table))
|
||||
for iTable := 0; iTable < len(table); iTable++ {
|
||||
indexPointers[iTable] = indexer.getIndexLen(table[iTable]) - 1
|
||||
}
|
||||
|
||||
// 平均文本关键词长度,用于计算BM25
|
||||
avgDocLength := indexer.totalTokenLen / float32(indexer.numDocs)
|
||||
for ; indexPointers[0] >= 0; indexPointers[0]-- {
|
||||
// 以第一个搜索键出现的文档作为基准,并遍历其他搜索键搜索同一文档
|
||||
baseDocId := indexer.getDocId(table[0], indexPointers[0])
|
||||
if docIds != nil {
|
||||
if _, found := docIds[baseDocId]; !found {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
iTable := 1
|
||||
found := true
|
||||
for ; iTable < len(table); iTable++ {
|
||||
// 二分法比简单的顺序归并效率高,也有更高效率的算法,
|
||||
// 但顺序归并也许是更好的选择,考虑到将来需要用链表重新实现
|
||||
// 以避免反向表添加新文档时的写锁。
|
||||
// TODO: 进一步研究不同求交集算法的速度和可扩展性。
|
||||
position, foundBaseDocId := indexer.searchIndex(table[iTable],
|
||||
0, indexPointers[iTable], baseDocId)
|
||||
|
||||
if foundBaseDocId {
|
||||
indexPointers[iTable] = position
|
||||
} else {
|
||||
if position == 0 {
|
||||
// 该搜索键中所有的文档 ID 都比 baseDocId 大,因此已经没有
|
||||
// 继续查找的必要。
|
||||
return
|
||||
}
|
||||
|
||||
// 继续下一 indexPointers[0] 的查找
|
||||
indexPointers[iTable] = position - 1
|
||||
found = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if found {
|
||||
if docState, ok := indexer.tableLock.docsState[baseDocId]; !ok || docState != 0 {
|
||||
continue
|
||||
}
|
||||
indexedDoc := types.IndexedDoc{}
|
||||
|
||||
// 当为 LocsIndex 时计算关键词紧邻距离
|
||||
if indexer.initOptions.IndexType == types.LocsIndex {
|
||||
// 计算有多少关键词是带有距离信息的
|
||||
numTokensWithLocations := 0
|
||||
for i, t := range table[:len(tokens)] {
|
||||
if len(t.locations[indexPointers[i]]) > 0 {
|
||||
numTokensWithLocations++
|
||||
}
|
||||
}
|
||||
if numTokensWithLocations != len(tokens) {
|
||||
if !countDocsOnly {
|
||||
docs = append(docs, types.IndexedDoc{
|
||||
DocId: baseDocId,
|
||||
})
|
||||
}
|
||||
numDocs++
|
||||
//当某个关键字对应多个文档且有 lable 关键字存在时,若直接 break,
|
||||
// 将会丢失相当一部分搜索结果
|
||||
continue
|
||||
}
|
||||
|
||||
// 计算搜索键在文档中的紧邻距离
|
||||
tokenProximity, TokenLocs := computeTokenProximity(
|
||||
table[:len(tokens)], indexPointers, tokens)
|
||||
|
||||
indexedDoc.TokenProximity = int32(tokenProximity)
|
||||
indexedDoc.TokenSnippetLocs = TokenLocs
|
||||
|
||||
// 添加 TokenLocs
|
||||
indexedDoc.TokenLocs = make([][]int, len(tokens))
|
||||
for i, t := range table[:len(tokens)] {
|
||||
indexedDoc.TokenLocs[i] = t.locations[indexPointers[i]]
|
||||
}
|
||||
}
|
||||
|
||||
// 当为 LocsIndex 或者 FrequenciesIndex 时计算BM25
|
||||
if indexer.initOptions.IndexType == types.LocsIndex ||
|
||||
indexer.initOptions.IndexType == types.FrequenciesIndex {
|
||||
bm25 := float32(0)
|
||||
d := indexer.docTokenLens[baseDocId]
|
||||
for i, t := range table[:len(tokens)] {
|
||||
var frequency float32
|
||||
if indexer.initOptions.IndexType == types.LocsIndex {
|
||||
frequency = float32(len(t.locations[indexPointers[i]]))
|
||||
} else {
|
||||
frequency = t.frequencies[indexPointers[i]]
|
||||
}
|
||||
|
||||
// 计算 BM25
|
||||
if len(t.docIds) > 0 && frequency > 0 &&
|
||||
indexer.initOptions.BM25Parameters != nil && avgDocLength != 0 {
|
||||
// 带平滑的 idf
|
||||
idf := float32(math.Log2(float64(indexer.numDocs)/float64(len(t.docIds)) + 1))
|
||||
k1 := indexer.initOptions.BM25Parameters.K1
|
||||
b := indexer.initOptions.BM25Parameters.B
|
||||
bm25 += idf * frequency * (k1 + 1) / (frequency + k1*(1-b+b*d/avgDocLength))
|
||||
}
|
||||
}
|
||||
indexedDoc.BM25 = float32(bm25)
|
||||
}
|
||||
|
||||
indexedDoc.DocId = baseDocId
|
||||
if !countDocsOnly {
|
||||
docs = append(docs, indexedDoc)
|
||||
}
|
||||
numDocs++
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// searchIndex 二分法查找 indices 中某文档的索引项
|
||||
// 第一个返回参数为找到的位置或需要插入的位置
|
||||
// 第二个返回参数标明是否找到
|
||||
func (indexer *Indexer) searchIndex(indices *KeywordIndices,
|
||||
start int, end int, docId uint64) (int, bool) {
|
||||
// 特殊情况
|
||||
if indexer.getIndexLen(indices) == start {
|
||||
return start, false
|
||||
}
|
||||
if docId < indexer.getDocId(indices, start) {
|
||||
return start, false
|
||||
} else if docId == indexer.getDocId(indices, start) {
|
||||
return start, true
|
||||
}
|
||||
if docId > indexer.getDocId(indices, end) {
|
||||
return end + 1, false
|
||||
} else if docId == indexer.getDocId(indices, end) {
|
||||
return end, true
|
||||
}
|
||||
|
||||
// 二分
|
||||
var middle int
|
||||
for end-start > 1 {
|
||||
middle = (start + end) / 2
|
||||
if docId == indexer.getDocId(indices, middle) {
|
||||
return middle, true
|
||||
} else if docId > indexer.getDocId(indices, middle) {
|
||||
start = middle
|
||||
} else {
|
||||
end = middle
|
||||
}
|
||||
}
|
||||
|
||||
return end, false
|
||||
}
|
||||
|
||||
// computeTokenProximity 计算搜索键在文本中的紧邻距离
|
||||
//
|
||||
// 假定第 i 个搜索键首字节出现在文本中的位置为 P_i,长度 L_i
|
||||
// 紧邻距离计算公式为
|
||||
//
|
||||
// ArgMin(Sum(Abs(P_(i+1) - P_i - L_i)))
|
||||
//
|
||||
// 具体由动态规划实现,依次计算前 i 个 token 在每个出现位置的最优值。
|
||||
// 选定的 P_i 通过 TokenLocs 参数传回。
|
||||
func computeTokenProximity(table []*KeywordIndices,
|
||||
indexPointers []int, tokens []string) (
|
||||
minTokenProximity int, TokenLocs []int) {
|
||||
minTokenProximity = -1
|
||||
TokenLocs = make([]int, len(tokens))
|
||||
|
||||
var (
|
||||
currentLocations, nextLocations []int
|
||||
currentMinValues, nextMinValues []int
|
||||
path [][]int
|
||||
)
|
||||
|
||||
// 初始化路径数组
|
||||
path = make([][]int, len(tokens))
|
||||
for i := 1; i < len(path); i++ {
|
||||
path[i] = make([]int, len(table[i].locations[indexPointers[i]]))
|
||||
}
|
||||
|
||||
// 动态规划
|
||||
currentLocations = table[0].locations[indexPointers[0]]
|
||||
currentMinValues = make([]int, len(currentLocations))
|
||||
for i := 1; i < len(tokens); i++ {
|
||||
nextLocations = table[i].locations[indexPointers[i]]
|
||||
nextMinValues = make([]int, len(nextLocations))
|
||||
for j := range nextMinValues {
|
||||
nextMinValues[j] = -1
|
||||
}
|
||||
|
||||
var iNext int
|
||||
for iCurrent, currentLocation := range currentLocations {
|
||||
if currentMinValues[iCurrent] == -1 {
|
||||
continue
|
||||
}
|
||||
for iNext+1 < len(nextLocations) &&
|
||||
nextLocations[iNext+1] < currentLocation {
|
||||
iNext++
|
||||
}
|
||||
|
||||
update := func(from int, to int) {
|
||||
if to >= len(nextLocations) {
|
||||
return
|
||||
}
|
||||
value := currentMinValues[from] +
|
||||
utils.AbsInt(nextLocations[to]-currentLocations[from]-len(tokens[i-1]))
|
||||
|
||||
if nextMinValues[to] == -1 || value < nextMinValues[to] {
|
||||
nextMinValues[to] = value
|
||||
path[i][to] = from
|
||||
}
|
||||
}
|
||||
|
||||
// 最优解的状态转移只发生在左右最接近的位置
|
||||
update(iCurrent, iNext)
|
||||
update(iCurrent, iNext+1)
|
||||
}
|
||||
|
||||
currentLocations = nextLocations
|
||||
currentMinValues = nextMinValues
|
||||
}
|
||||
|
||||
// 找出最优解
|
||||
var cursor int
|
||||
for i, value := range currentMinValues {
|
||||
if value == -1 {
|
||||
continue
|
||||
}
|
||||
if minTokenProximity == -1 || value < minTokenProximity {
|
||||
minTokenProximity = value
|
||||
cursor = i
|
||||
}
|
||||
}
|
||||
|
||||
// 从路径倒推出最优解的位置
|
||||
for i := len(tokens) - 1; i >= 0; i-- {
|
||||
if i != len(tokens)-1 {
|
||||
cursor = path[i+1][cursor]
|
||||
}
|
||||
TokenLocs[i] = table[i].locations[indexPointers[i]][cursor]
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// LogicLookup logic Lookup
|
||||
func (indexer *Indexer) LogicLookup(
|
||||
docIds map[uint64]bool, countDocsOnly bool, LogicExpr []string,
|
||||
logic types.Logic) (docs []types.IndexedDoc, numDocs int) {
|
||||
|
||||
indexer.tableLock.RLock()
|
||||
defer indexer.tableLock.RUnlock()
|
||||
|
||||
// // 有效性检查, 不允许只出现逻辑非检索, 也不允许与或非都不存在
|
||||
// if Logic.Must == true && Logic.Should == true && Logic.NotIn == true {
|
||||
// return
|
||||
// }
|
||||
|
||||
// MustTable 中的搜索键检查
|
||||
// 如果存在与搜索键, 则要求所有的与搜索键都有对应的反向表
|
||||
MustTable := make([]*KeywordIndices, 0)
|
||||
|
||||
if len(logic.LogicExpr.MustLabels) > 0 {
|
||||
LogicExpr = logic.LogicExpr.MustLabels
|
||||
}
|
||||
if logic.Must == true || len(logic.LogicExpr.MustLabels) > 0 {
|
||||
for _, keyword := range LogicExpr {
|
||||
indices, found := indexer.tableLock.table[keyword]
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
|
||||
MustTable = append(MustTable, indices)
|
||||
}
|
||||
}
|
||||
|
||||
// 逻辑或搜索键检查
|
||||
// 1. 如果存在逻辑或搜索键, 则至少有一个存在反向表
|
||||
// 2. 逻辑或和逻辑与之间是与关系
|
||||
ShouldTable := make([]*KeywordIndices, 0)
|
||||
|
||||
if len(logic.LogicExpr.ShouldLabels) > 0 {
|
||||
LogicExpr = logic.LogicExpr.ShouldLabels
|
||||
}
|
||||
|
||||
if logic.Should == true || len(logic.LogicExpr.ShouldLabels) > 0 {
|
||||
for _, keyword := range LogicExpr {
|
||||
indices, found := indexer.tableLock.table[keyword]
|
||||
if found {
|
||||
ShouldTable = append(ShouldTable, indices)
|
||||
}
|
||||
}
|
||||
if len(ShouldTable) == 0 {
|
||||
// 如果存在逻辑或搜索键, 但是对应的反向表全部为空, 则返回
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 逻辑非中的搜索键检查
|
||||
// 可以不存在逻辑非搜索(NotInTable为空), 允许逻辑非搜索键对应的反向表为空
|
||||
NotInTable := make([]*KeywordIndices, 0)
|
||||
|
||||
if len(logic.LogicExpr.NotInLabels) > 0 {
|
||||
LogicExpr = logic.LogicExpr.NotInLabels
|
||||
}
|
||||
if logic.NotIn == true || len(logic.LogicExpr.NotInLabels) > 0 {
|
||||
for _, keyword := range LogicExpr {
|
||||
indices, found := indexer.tableLock.table[keyword]
|
||||
if found {
|
||||
NotInTable = append(NotInTable, indices)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 开始检索
|
||||
numDocs = 0
|
||||
if logic.Must == true || len(logic.LogicExpr.MustLabels) > 0 {
|
||||
// 如果存在逻辑与检索
|
||||
for idx := indexer.getIndexLen(MustTable[0]) - 1; idx >= 0; idx-- {
|
||||
baseDocId := indexer.getDocId(MustTable[0], idx)
|
||||
if docIds != nil {
|
||||
_, found := docIds[baseDocId]
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
mustFound := indexer.findInMustTable(MustTable[1:], baseDocId)
|
||||
shouldFound := indexer.findInShouldTable(ShouldTable, baseDocId)
|
||||
notInFound := indexer.findInNotInTable(NotInTable, baseDocId)
|
||||
|
||||
if mustFound && shouldFound && !notInFound {
|
||||
indexedDoc := types.IndexedDoc{}
|
||||
indexedDoc.DocId = baseDocId
|
||||
if !countDocsOnly {
|
||||
docs = append(docs, indexedDoc)
|
||||
}
|
||||
numDocs++
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// 不存在逻辑与检索, 则必须存在逻辑或检索
|
||||
// 这时进行求并集操作
|
||||
if logic.Should == true || len(logic.LogicExpr.ShouldLabels) > 0 {
|
||||
docs, numDocs = indexer.unionTable(ShouldTable, NotInTable, countDocsOnly)
|
||||
} else {
|
||||
uintDocIds := make([]uint64, 0)
|
||||
// 当前直接返回 Not 逻辑数据
|
||||
for i := 0; i < len(NotInTable); i++ {
|
||||
for _, docid := range NotInTable[i].docIds {
|
||||
if indexer.findInNotInTable(NotInTable, docid) {
|
||||
uintDocIds = append(uintDocIds, docid)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
StableDesc(uintDocIds)
|
||||
|
||||
numDocs = 0
|
||||
for _, doc := range uintDocIds {
|
||||
indexedDoc := types.IndexedDoc{}
|
||||
indexedDoc.DocId = doc
|
||||
if !countDocsOnly {
|
||||
docs = append(docs, indexedDoc)
|
||||
}
|
||||
numDocs++
|
||||
}
|
||||
}
|
||||
|
||||
// fmt.Println(docs, numDocs)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// 在逻辑与反向表中对docid进行查找, 若每个反向表都找到,
|
||||
// 则返回 true, 有一个找不到则返回 false
|
||||
func (indexer *Indexer) findInMustTable(table []*KeywordIndices, docId uint64) bool {
|
||||
for i := 0; i < len(table); i++ {
|
||||
_, foundDocId := indexer.searchIndex(table[i],
|
||||
0, indexer.getIndexLen(table[i])-1, docId)
|
||||
if !foundDocId {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// 在逻辑或反向表中对 docid 进行查找, 若有一个找到则返回 true,
|
||||
// 都找不到则返回 false
|
||||
// 如果 table 为空, 则返回 true
|
||||
func (indexer *Indexer) findInShouldTable(table []*KeywordIndices, docId uint64) bool {
|
||||
for i := 0; i < len(table); i++ {
|
||||
_, foundDocId := indexer.searchIndex(table[i],
|
||||
0, indexer.getIndexLen(table[i])-1, docId)
|
||||
if foundDocId {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if len(table) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// findInNotInTable 在逻辑非反向表中对 docid 进行查找,
|
||||
// 若有一个找到则返回 true, 都找不到则返回 false
|
||||
// 如果 table 为空, 则返回 false
|
||||
func (indexer *Indexer) findInNotInTable(table []*KeywordIndices, docId uint64) bool {
|
||||
for i := 0; i < len(table); i++ {
|
||||
_, foundDocId := indexer.searchIndex(table[i],
|
||||
0, indexer.getIndexLen(table[i])-1, docId)
|
||||
if foundDocId {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// unionTable 如果不存在与逻辑检索, 则需要对逻辑或反向表求并集
|
||||
// 先求差集再求并集, 可以减小内存占用
|
||||
// docid 要保序
|
||||
func (indexer *Indexer) unionTable(table []*KeywordIndices,
|
||||
notInTable []*KeywordIndices, countDocsOnly bool) (
|
||||
docs []types.IndexedDoc, numDocs int) {
|
||||
docIds := make([]uint64, 0)
|
||||
// 求并集
|
||||
for i := 0; i < len(table); i++ {
|
||||
for _, docid := range table[i].docIds {
|
||||
if !indexer.findInNotInTable(notInTable, docid) {
|
||||
found := false
|
||||
for _, v := range docIds {
|
||||
if v == docid {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
docIds = append(docIds, docid)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// 排序
|
||||
// sortUint64.StableDesc(docIds)
|
||||
StableDesc(docIds)
|
||||
|
||||
numDocs = 0
|
||||
for _, doc := range docIds {
|
||||
indexedDoc := types.IndexedDoc{}
|
||||
indexedDoc.DocId = doc
|
||||
if !countDocsOnly {
|
||||
docs = append(docs, indexedDoc)
|
||||
}
|
||||
numDocs++
|
||||
}
|
||||
|
||||
return
|
||||
}
|
243
vendor/github.com/go-ego/riot/core/ranker.go
generated
vendored
Normal file
243
vendor/github.com/go-ego/riot/core/ranker.go
generated
vendored
Normal file
@ -0,0 +1,243 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package core
|
||||
|
||||
import (
|
||||
// "fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/go-ego/riot/types"
|
||||
"github.com/go-ego/riot/utils"
|
||||
)
|
||||
|
||||
// Ranker ranker
|
||||
type Ranker struct {
|
||||
idOnly bool
|
||||
|
||||
lock struct {
|
||||
sync.RWMutex
|
||||
fields map[uint64]interface{}
|
||||
docs map[uint64]bool
|
||||
// new
|
||||
content map[uint64]string
|
||||
attri map[uint64]interface{}
|
||||
}
|
||||
|
||||
initialized bool
|
||||
}
|
||||
|
||||
// Init init ranker
|
||||
func (ranker *Ranker) Init(onlyID ...bool) {
|
||||
if ranker.initialized == true {
|
||||
log.Fatal("The Ranker can not be initialized twice.")
|
||||
}
|
||||
ranker.initialized = true
|
||||
|
||||
ranker.lock.fields = make(map[uint64]interface{})
|
||||
ranker.lock.docs = make(map[uint64]bool)
|
||||
|
||||
if len(onlyID) > 0 {
|
||||
ranker.idOnly = onlyID[0]
|
||||
}
|
||||
|
||||
if !ranker.idOnly {
|
||||
// new
|
||||
ranker.lock.content = make(map[uint64]string)
|
||||
ranker.lock.attri = make(map[uint64]interface{})
|
||||
}
|
||||
}
|
||||
|
||||
// AddDoc add doc
|
||||
// 给某个文档添加评分字段
|
||||
func (ranker *Ranker) AddDoc(
|
||||
// docId uint64, fields interface{}, content string, attri interface{}) {
|
||||
docId uint64, fields interface{}, content ...interface{}) {
|
||||
if ranker.initialized == false {
|
||||
log.Fatal("The Ranker has not been initialized.")
|
||||
}
|
||||
|
||||
ranker.lock.Lock()
|
||||
ranker.lock.fields[docId] = fields
|
||||
ranker.lock.docs[docId] = true
|
||||
|
||||
if !ranker.idOnly {
|
||||
// new
|
||||
if len(content) > 0 {
|
||||
ranker.lock.content[docId] = content[0].(string)
|
||||
}
|
||||
|
||||
if len(content) > 1 {
|
||||
ranker.lock.attri[docId] = content[1]
|
||||
// ranker.lock.attri[docId] = attri
|
||||
}
|
||||
}
|
||||
|
||||
ranker.lock.Unlock()
|
||||
}
|
||||
|
||||
// RemoveDoc 删除某个文档的评分字段
|
||||
func (ranker *Ranker) RemoveDoc(docId uint64) {
|
||||
if ranker.initialized == false {
|
||||
log.Fatal("The Ranker has not been initialized.")
|
||||
}
|
||||
|
||||
ranker.lock.Lock()
|
||||
delete(ranker.lock.fields, docId)
|
||||
delete(ranker.lock.docs, docId)
|
||||
|
||||
if !ranker.idOnly {
|
||||
// new
|
||||
delete(ranker.lock.content, docId)
|
||||
delete(ranker.lock.attri, docId)
|
||||
}
|
||||
|
||||
ranker.lock.Unlock()
|
||||
}
|
||||
|
||||
// RankDocId rank docs by types.ScoredIDs
|
||||
func (ranker *Ranker) RankDocId(docs []types.IndexedDoc,
|
||||
options types.RankOpts, countDocsOnly bool) (types.ScoredIDs, int) {
|
||||
var outputDocs types.ScoredIDs
|
||||
numDocs := 0
|
||||
|
||||
for _, d := range docs {
|
||||
ranker.lock.RLock()
|
||||
// 判断 doc 是否存在
|
||||
if _, ok := ranker.lock.docs[d.DocId]; ok {
|
||||
|
||||
fs := ranker.lock.fields[d.DocId]
|
||||
|
||||
ranker.lock.RUnlock()
|
||||
// 计算评分并剔除没有分值的文档
|
||||
scores := options.ScoringCriteria.Score(d, fs)
|
||||
if len(scores) > 0 {
|
||||
if !countDocsOnly {
|
||||
outputDocs = append(outputDocs, types.ScoredID{
|
||||
DocId: d.DocId,
|
||||
Scores: scores,
|
||||
TokenSnippetLocs: d.TokenSnippetLocs,
|
||||
TokenLocs: d.TokenLocs})
|
||||
}
|
||||
numDocs++
|
||||
}
|
||||
} else {
|
||||
ranker.lock.RUnlock()
|
||||
}
|
||||
}
|
||||
|
||||
// 排序
|
||||
if !countDocsOnly {
|
||||
if options.ReverseOrder {
|
||||
sort.Sort(sort.Reverse(outputDocs))
|
||||
} else {
|
||||
sort.Sort(outputDocs)
|
||||
}
|
||||
// 当用户要求只返回部分结果时返回部分结果
|
||||
var start, end int
|
||||
if options.MaxOutputs != 0 {
|
||||
start = utils.MinInt(options.OutputOffset, len(outputDocs))
|
||||
end = utils.MinInt(options.OutputOffset+options.MaxOutputs, len(outputDocs))
|
||||
} else {
|
||||
start = utils.MinInt(options.OutputOffset, len(outputDocs))
|
||||
end = len(outputDocs)
|
||||
}
|
||||
return outputDocs[start:end], numDocs
|
||||
}
|
||||
|
||||
return outputDocs, numDocs
|
||||
}
|
||||
|
||||
// RankDocs rank docs by types.ScoredDocs
|
||||
func (ranker *Ranker) RankDocs(docs []types.IndexedDoc,
|
||||
options types.RankOpts, countDocsOnly bool) (types.ScoredDocs, int) {
|
||||
var outputDocs types.ScoredDocs
|
||||
numDocs := 0
|
||||
|
||||
for _, d := range docs {
|
||||
ranker.lock.RLock()
|
||||
// 判断 doc 是否存在
|
||||
if _, ok := ranker.lock.docs[d.DocId]; ok {
|
||||
|
||||
fs := ranker.lock.fields[d.DocId]
|
||||
content := ranker.lock.content[d.DocId]
|
||||
attri := ranker.lock.attri[d.DocId]
|
||||
|
||||
ranker.lock.RUnlock()
|
||||
// 计算评分并剔除没有分值的文档
|
||||
scores := options.ScoringCriteria.Score(d, fs)
|
||||
if len(scores) > 0 {
|
||||
if !countDocsOnly {
|
||||
outputDocs = append(outputDocs, types.ScoredDoc{
|
||||
DocId: d.DocId,
|
||||
// new
|
||||
Fields: fs,
|
||||
Content: content,
|
||||
Attri: attri,
|
||||
//
|
||||
Scores: scores,
|
||||
TokenSnippetLocs: d.TokenSnippetLocs,
|
||||
TokenLocs: d.TokenLocs})
|
||||
}
|
||||
numDocs++
|
||||
}
|
||||
} else {
|
||||
ranker.lock.RUnlock()
|
||||
}
|
||||
}
|
||||
|
||||
// 排序
|
||||
if !countDocsOnly {
|
||||
if options.ReverseOrder {
|
||||
sort.Sort(sort.Reverse(outputDocs))
|
||||
} else {
|
||||
sort.Sort(outputDocs)
|
||||
}
|
||||
// 当用户要求只返回部分结果时返回部分结果
|
||||
var start, end int
|
||||
if options.MaxOutputs != 0 {
|
||||
start = utils.MinInt(options.OutputOffset, len(outputDocs))
|
||||
end = utils.MinInt(options.OutputOffset+options.MaxOutputs, len(outputDocs))
|
||||
} else {
|
||||
start = utils.MinInt(options.OutputOffset, len(outputDocs))
|
||||
end = len(outputDocs)
|
||||
}
|
||||
return outputDocs[start:end], numDocs
|
||||
}
|
||||
|
||||
return outputDocs, numDocs
|
||||
}
|
||||
|
||||
// Rank rank docs
|
||||
// 给文档评分并排序
|
||||
func (ranker *Ranker) Rank(docs []types.IndexedDoc,
|
||||
options types.RankOpts, countDocsOnly bool) (
|
||||
interface{}, int) {
|
||||
|
||||
if ranker.initialized == false {
|
||||
log.Fatal("The Ranker has not been initialized.")
|
||||
}
|
||||
|
||||
// 对每个文档评分
|
||||
if ranker.idOnly {
|
||||
outputDocs, numDocs := ranker.RankDocId(docs, options, countDocsOnly)
|
||||
return outputDocs, numDocs
|
||||
}
|
||||
|
||||
outputDocs, numDocs := ranker.RankDocs(docs, options, countDocsOnly)
|
||||
return outputDocs, numDocs
|
||||
}
|
44
vendor/github.com/go-ego/riot/core/test_utils.go
generated
vendored
Normal file
44
vendor/github.com/go-ego/riot/core/test_utils.go
generated
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
func indicesToString(indexer *Indexer, token string) (output string) {
|
||||
if indices, ok := indexer.tableLock.table[token]; ok {
|
||||
for i := 0; i < indexer.getIndexLen(indices); i++ {
|
||||
output += fmt.Sprintf("%d ",
|
||||
indexer.getDocId(indices, i))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func indexedDocsToString(docs []types.IndexedDoc, numDocs int) (output string) {
|
||||
for _, doc := range docs {
|
||||
output += fmt.Sprintf("[%d %d %v] ",
|
||||
doc.DocId, doc.TokenProximity, doc.TokenSnippetLocs)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func scoredDocsToString(docs []types.ScoredDoc) (output string) {
|
||||
for _, doc := range docs {
|
||||
output += fmt.Sprintf("[%d [", doc.DocId)
|
||||
for _, score := range doc.Scores {
|
||||
output += fmt.Sprintf("%d ", int(score*1000))
|
||||
}
|
||||
output += "]] "
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func indexedDocIdsToString(docs []types.IndexedDoc, numDocs int) (output string) {
|
||||
for _, doc := range docs {
|
||||
output += fmt.Sprintf("[%d] ",
|
||||
doc.DocId)
|
||||
}
|
||||
return
|
||||
}
|
496
vendor/github.com/go-ego/riot/core/uint64.go
generated
vendored
Normal file
496
vendor/github.com/go-ego/riot/core/uint64.go
generated
vendored
Normal file
@ -0,0 +1,496 @@
|
||||
package core
|
||||
|
||||
// ================= COMMON =================
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// ------------- ASCENDING -------------
|
||||
|
||||
func heapSortAsc(data []uint64, a, b int) {
|
||||
first := a
|
||||
lo := 0
|
||||
hi := b - a
|
||||
for i := (hi - 1) / 2; i >= 0; i-- {
|
||||
siftDownAsc(data, i, hi, first)
|
||||
}
|
||||
for i := hi - 1; i >= 0; i-- {
|
||||
data[first], data[first+i] = data[first+i], data[first]
|
||||
siftDownAsc(data, lo, i, first)
|
||||
}
|
||||
}
|
||||
|
||||
func insertionSortAsc(data []uint64, a, b int) {
|
||||
var j int
|
||||
for i := a + 1; i < b; i++ {
|
||||
for j = i; j > a && data[j] < data[j-1]; j-- {
|
||||
data[j], data[j-1] = data[j-1], data[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func siftDownAsc(data []uint64, lo, hi, first int) {
|
||||
root := lo
|
||||
for {
|
||||
child := 2*root + 1
|
||||
if child >= hi {
|
||||
break
|
||||
}
|
||||
if child+1 < hi && data[first+child] < data[first+child+1] {
|
||||
child++
|
||||
}
|
||||
if data[first+root] >= data[first+child] {
|
||||
return
|
||||
}
|
||||
data[first+root], data[first+child] = data[first+child], data[first+root]
|
||||
root = child
|
||||
}
|
||||
}
|
||||
|
||||
func medianOfThreeAsc(data []uint64, m1, m0, m2 int) {
|
||||
// bubble sort on 3 elements
|
||||
if data[m1] < data[m0] {
|
||||
data[m1], data[m0] = data[m0], data[m1]
|
||||
}
|
||||
if data[m2] < data[m1] {
|
||||
data[m2], data[m1] = data[m1], data[m2]
|
||||
}
|
||||
if data[m1] < data[m0] {
|
||||
data[m1], data[m0] = data[m0], data[m1]
|
||||
}
|
||||
}
|
||||
|
||||
func swapRangeAsc(data []uint64, a, b, n int) {
|
||||
for i := 0; i < n; i++ {
|
||||
data[a], data[b] = data[b], data[a]
|
||||
a++
|
||||
b++
|
||||
}
|
||||
}
|
||||
|
||||
func doPivotAsc(data []uint64, lo, hi int) (midlo, midhi int) {
|
||||
m := lo + (hi-lo)/2
|
||||
if hi-lo > 40 {
|
||||
s := (hi - lo) / 8
|
||||
medianOfThreeAsc(data, lo, lo+s, lo+2*s)
|
||||
medianOfThreeAsc(data, m, m-s, m+s)
|
||||
medianOfThreeAsc(data, hi-1, hi-1-s, hi-1-2*s)
|
||||
}
|
||||
medianOfThreeAsc(data, lo, m, hi-1)
|
||||
|
||||
pivot := lo
|
||||
a, b, c, d := lo+1, lo+1, hi, hi
|
||||
for {
|
||||
for b < c {
|
||||
if data[b] < data[pivot] {
|
||||
b++
|
||||
} else if data[pivot] >= data[b] {
|
||||
data[a], data[b] = data[b], data[a]
|
||||
a++
|
||||
b++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
for b < c {
|
||||
if data[pivot] < data[c-1] {
|
||||
c--
|
||||
} else if data[c-1] >= data[pivot] {
|
||||
data[c-1], data[d-1] = data[d-1], data[c-1]
|
||||
c--
|
||||
d--
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if b >= c {
|
||||
break
|
||||
}
|
||||
data[b], data[c-1] = data[c-1], data[b]
|
||||
b++
|
||||
c--
|
||||
}
|
||||
|
||||
n := min(b-a, a-lo)
|
||||
swapRangeAsc(data, lo, b-n, n)
|
||||
|
||||
n = min(hi-d, d-c)
|
||||
swapRangeAsc(data, c, hi-n, n)
|
||||
|
||||
return lo + b - a, hi - (d - c)
|
||||
}
|
||||
|
||||
func quickSortAsc(data []uint64, a, b, maxDepth int) {
|
||||
var mlo, mhi int
|
||||
for b-a > 7 {
|
||||
if maxDepth == 0 {
|
||||
heapSortAsc(data, a, b)
|
||||
return
|
||||
}
|
||||
maxDepth--
|
||||
mlo, mhi = doPivotAsc(data, a, b)
|
||||
if mlo-a < b-mhi {
|
||||
quickSortAsc(data, a, mlo, maxDepth)
|
||||
a = mhi
|
||||
} else {
|
||||
quickSortAsc(data, mhi, b, maxDepth)
|
||||
b = mlo
|
||||
}
|
||||
}
|
||||
if b-a > 1 {
|
||||
insertionSortAsc(data, a, b)
|
||||
}
|
||||
}
|
||||
|
||||
// Asc asc
|
||||
func Asc(data []uint64) {
|
||||
maxDepth := 0
|
||||
for i := len(data); i > 0; i >>= 1 {
|
||||
maxDepth++
|
||||
}
|
||||
maxDepth *= 2
|
||||
quickSortAsc(data, 0, len(data), maxDepth)
|
||||
}
|
||||
|
||||
// IsSortedAsc sorted by Asc
|
||||
func IsSortedAsc(data []uint64) bool {
|
||||
for i := len(data) - 1; i > 0; i-- {
|
||||
if data[i] < data[i-1] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// StableAsc stable Asc
|
||||
func StableAsc(data []uint64) {
|
||||
n := len(data)
|
||||
blockSize := 20
|
||||
a, b := 0, blockSize
|
||||
for b <= n {
|
||||
insertionSortAsc(data, a, b)
|
||||
a = b
|
||||
b += blockSize
|
||||
}
|
||||
insertionSortAsc(data, a, n)
|
||||
|
||||
for blockSize < n {
|
||||
a, b = 0, 2*blockSize
|
||||
for b <= n {
|
||||
symMergeAsc(data, a, a+blockSize, b)
|
||||
a = b
|
||||
b += 2 * blockSize
|
||||
}
|
||||
symMergeAsc(data, a, a+blockSize, n)
|
||||
blockSize *= 2
|
||||
}
|
||||
}
|
||||
|
||||
func symMergeAsc(data []uint64, a, m, b int) {
|
||||
if a >= m || m >= b {
|
||||
return
|
||||
}
|
||||
mid := a + (b-a)/2
|
||||
n := mid + m
|
||||
var start, c, r, p int
|
||||
if m > mid {
|
||||
start = n - b
|
||||
r, p = mid, n-1
|
||||
for start < r {
|
||||
c = start + (r-start)/2
|
||||
if data[p-c] >= data[c] {
|
||||
start = c + 1
|
||||
} else {
|
||||
r = c
|
||||
}
|
||||
}
|
||||
} else {
|
||||
start = a
|
||||
r, p = m, n-1
|
||||
for start < r {
|
||||
c = start + (r-start)/2
|
||||
if data[p-c] >= data[c] {
|
||||
start = c + 1
|
||||
} else {
|
||||
r = c
|
||||
}
|
||||
}
|
||||
}
|
||||
end := n - start
|
||||
rotateAsc(data, start, m, end)
|
||||
symMergeAsc(data, a, start, mid)
|
||||
symMergeAsc(data, mid, end, b)
|
||||
}
|
||||
|
||||
func rotateAsc(data []uint64, a, m, b int) {
|
||||
i := m - a
|
||||
if i == 0 {
|
||||
return
|
||||
}
|
||||
j := b - m
|
||||
if j == 0 {
|
||||
return
|
||||
}
|
||||
if i == j {
|
||||
swapRangeAsc(data, a, m, i)
|
||||
return
|
||||
}
|
||||
p := a + i
|
||||
for i != j {
|
||||
if i > j {
|
||||
swapRangeAsc(data, p-i, p, j)
|
||||
i -= j
|
||||
} else {
|
||||
swapRangeAsc(data, p-i, p+j-i, i)
|
||||
j -= i
|
||||
}
|
||||
}
|
||||
swapRangeAsc(data, p-i, p, i)
|
||||
}
|
||||
|
||||
// ------------- DESCENDING -------------
|
||||
|
||||
func heapSortDesc(data []uint64, a, b int) {
|
||||
first := a
|
||||
lo := 0
|
||||
hi := b - a
|
||||
for i := (hi - 1) / 2; i >= 0; i-- {
|
||||
siftDownDesc(data, i, hi, first)
|
||||
}
|
||||
for i := hi - 1; i >= 0; i-- {
|
||||
data[first], data[first+i] = data[first+i], data[first]
|
||||
siftDownDesc(data, lo, i, first)
|
||||
}
|
||||
}
|
||||
|
||||
func insertionSortDesc(data []uint64, a, b int) {
|
||||
var j int
|
||||
for i := a + 1; i < b; i++ {
|
||||
for j = i; j > a && data[j] > data[j-1]; j-- {
|
||||
data[j], data[j-1] = data[j-1], data[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func siftDownDesc(data []uint64, lo, hi, first int) {
|
||||
root := lo
|
||||
for {
|
||||
child := 2*root + 1
|
||||
if child >= hi {
|
||||
break
|
||||
}
|
||||
if child+1 < hi && data[first+child] > data[first+child+1] {
|
||||
child++
|
||||
}
|
||||
if data[first+root] <= data[first+child] {
|
||||
return
|
||||
}
|
||||
data[first+root], data[first+child] = data[first+child], data[first+root]
|
||||
root = child
|
||||
}
|
||||
}
|
||||
|
||||
func medianOfThreeDesc(data []uint64, m1, m0, m2 int) {
|
||||
// bubble sort on 3 elements
|
||||
if data[m1] > data[m0] {
|
||||
data[m1], data[m0] = data[m0], data[m1]
|
||||
}
|
||||
if data[m2] > data[m1] {
|
||||
data[m2], data[m1] = data[m1], data[m2]
|
||||
}
|
||||
if data[m1] > data[m0] {
|
||||
data[m1], data[m0] = data[m0], data[m1]
|
||||
}
|
||||
}
|
||||
|
||||
func swapRangeDesc(data []uint64, a, b, n int) {
|
||||
for i := 0; i < n; i++ {
|
||||
data[a], data[b] = data[b], data[a]
|
||||
a++
|
||||
b++
|
||||
}
|
||||
}
|
||||
|
||||
func doPivotDesc(data []uint64, lo, hi int) (midlo, midhi int) {
|
||||
m := lo + (hi-lo)/2
|
||||
if hi-lo > 40 {
|
||||
s := (hi - lo) / 8
|
||||
medianOfThreeDesc(data, lo, lo+s, lo+2*s)
|
||||
medianOfThreeDesc(data, m, m-s, m+s)
|
||||
medianOfThreeDesc(data, hi-1, hi-1-s, hi-1-2*s)
|
||||
}
|
||||
medianOfThreeDesc(data, lo, m, hi-1)
|
||||
|
||||
pivot := lo
|
||||
a, b, c, d := lo+1, lo+1, hi, hi
|
||||
for {
|
||||
for b < c {
|
||||
if data[b] > data[pivot] {
|
||||
b++
|
||||
} else if data[pivot] <= data[b] {
|
||||
data[a], data[b] = data[b], data[a]
|
||||
a++
|
||||
b++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
for b < c {
|
||||
if data[pivot] > data[c-1] {
|
||||
c--
|
||||
} else if data[c-1] <= data[pivot] {
|
||||
data[c-1], data[d-1] = data[d-1], data[c-1]
|
||||
c--
|
||||
d--
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if b >= c {
|
||||
break
|
||||
}
|
||||
data[b], data[c-1] = data[c-1], data[b]
|
||||
b++
|
||||
c--
|
||||
}
|
||||
|
||||
n := min(b-a, a-lo)
|
||||
swapRangeDesc(data, lo, b-n, n)
|
||||
|
||||
n = min(hi-d, d-c)
|
||||
swapRangeDesc(data, c, hi-n, n)
|
||||
|
||||
return lo + b - a, hi - (d - c)
|
||||
}
|
||||
|
||||
func quickSortDesc(data []uint64, a, b, maxDepth int) {
|
||||
var mlo, mhi int
|
||||
for b-a > 7 {
|
||||
if maxDepth == 0 {
|
||||
heapSortDesc(data, a, b)
|
||||
return
|
||||
}
|
||||
maxDepth--
|
||||
mlo, mhi = doPivotDesc(data, a, b)
|
||||
if mlo-a < b-mhi {
|
||||
quickSortDesc(data, a, mlo, maxDepth)
|
||||
a = mhi
|
||||
} else {
|
||||
quickSortDesc(data, mhi, b, maxDepth)
|
||||
b = mlo
|
||||
}
|
||||
}
|
||||
if b-a > 1 {
|
||||
insertionSortDesc(data, a, b)
|
||||
}
|
||||
}
|
||||
|
||||
// Desc desc
|
||||
func Desc(data []uint64) {
|
||||
maxDepth := 0
|
||||
for i := len(data); i > 0; i >>= 1 {
|
||||
maxDepth++
|
||||
}
|
||||
maxDepth *= 2
|
||||
quickSortDesc(data, 0, len(data), maxDepth)
|
||||
}
|
||||
|
||||
// IsSortedDesc sorted by Desc
|
||||
func IsSortedDesc(data []uint64) bool {
|
||||
for i := len(data) - 1; i > 0; i-- {
|
||||
if data[i] > data[i-1] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// StableDesc stable desc
|
||||
func StableDesc(data []uint64) {
|
||||
n := len(data)
|
||||
blockSize := 20
|
||||
a, b := 0, blockSize
|
||||
for b <= n {
|
||||
insertionSortDesc(data, a, b)
|
||||
a = b
|
||||
b += blockSize
|
||||
}
|
||||
insertionSortDesc(data, a, n)
|
||||
|
||||
for blockSize < n {
|
||||
a, b = 0, 2*blockSize
|
||||
for b <= n {
|
||||
symMergeDesc(data, a, a+blockSize, b)
|
||||
a = b
|
||||
b += 2 * blockSize
|
||||
}
|
||||
symMergeDesc(data, a, a+blockSize, n)
|
||||
blockSize *= 2
|
||||
}
|
||||
}
|
||||
|
||||
func symMergeDesc(data []uint64, a, m, b int) {
|
||||
if a >= m || m >= b {
|
||||
return
|
||||
}
|
||||
mid := a + (b-a)/2
|
||||
n := mid + m
|
||||
var start, c, r, p int
|
||||
if m > mid {
|
||||
start = n - b
|
||||
r, p = mid, n-1
|
||||
for start < r {
|
||||
c = start + (r-start)/2
|
||||
if data[p-c] < data[c] {
|
||||
start = c + 1
|
||||
} else {
|
||||
r = c
|
||||
}
|
||||
}
|
||||
} else {
|
||||
start = a
|
||||
r, p = m, n-1
|
||||
for start < r {
|
||||
c = start + (r-start)/2
|
||||
if data[p-c] < data[c] {
|
||||
start = c + 1
|
||||
} else {
|
||||
r = c
|
||||
}
|
||||
}
|
||||
}
|
||||
end := n - start
|
||||
rotateDesc(data, start, m, end)
|
||||
symMergeDesc(data, a, start, mid)
|
||||
symMergeDesc(data, mid, end, b)
|
||||
}
|
||||
|
||||
func rotateDesc(data []uint64, a, m, b int) {
|
||||
i := m - a
|
||||
if i == 0 {
|
||||
return
|
||||
}
|
||||
j := b - m
|
||||
if j == 0 {
|
||||
return
|
||||
}
|
||||
if i == j {
|
||||
swapRangeDesc(data, a, m, i)
|
||||
return
|
||||
}
|
||||
p := a + i
|
||||
for i != j {
|
||||
if i > j {
|
||||
swapRangeDesc(data, p-i, p, j)
|
||||
i -= j
|
||||
} else {
|
||||
swapRangeDesc(data, p-i, p+j-i, i)
|
||||
j -= i
|
||||
}
|
||||
}
|
||||
swapRangeDesc(data, p-i, p, i)
|
||||
}
|
31
vendor/github.com/go-ego/riot/counters.go
generated
vendored
Normal file
31
vendor/github.com/go-ego/riot/counters.go
generated
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package riot
|
||||
|
||||
// NumTokenIndexAdded added token index number
|
||||
func (engine *Engine) NumTokenIndexAdded() uint64 {
|
||||
return engine.numTokenIndexAdded
|
||||
}
|
||||
|
||||
// NumDocsIndexed documents indexed number
|
||||
func (engine *Engine) NumDocsIndexed() uint64 {
|
||||
return engine.numDocsIndexed
|
||||
}
|
||||
|
||||
// NumDocsRemoved documents removed number
|
||||
func (engine *Engine) NumDocsRemoved() uint64 {
|
||||
return engine.numDocsRemoved
|
||||
}
|
790
vendor/github.com/go-ego/riot/engine.go
generated
vendored
Normal file
790
vendor/github.com/go-ego/riot/engine.go
generated
vendored
Normal file
@ -0,0 +1,790 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
/*
|
||||
|
||||
Package riot is riot engine
|
||||
*/
|
||||
package riot
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
// "reflect"
|
||||
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/go-ego/riot/core"
|
||||
"github.com/go-ego/riot/store"
|
||||
"github.com/go-ego/riot/types"
|
||||
"github.com/go-ego/riot/utils"
|
||||
|
||||
"github.com/go-ego/gse"
|
||||
"github.com/go-ego/murmur"
|
||||
"github.com/shirou/gopsutil/mem"
|
||||
)
|
||||
|
||||
const (
|
||||
// Version get the riot version
|
||||
Version string = "v0.10.0.425, Danube River!"
|
||||
|
||||
// NumNanosecondsInAMillisecond nano-seconds in a milli-second num
|
||||
NumNanosecondsInAMillisecond = 1000000
|
||||
// StoreFilePrefix persistent store file prefix
|
||||
StoreFilePrefix = "riot"
|
||||
|
||||
// DefaultPath default db path
|
||||
DefaultPath = "./riot-index"
|
||||
)
|
||||
|
||||
// GetVersion get the riot version
|
||||
func GetVersion() string {
|
||||
return Version
|
||||
}
|
||||
|
||||
// Engine initialize the engine
|
||||
type Engine struct {
|
||||
loc sync.RWMutex
|
||||
|
||||
// 计数器,用来统计有多少文档被索引等信息
|
||||
numDocsIndexed uint64
|
||||
numDocsRemoved uint64
|
||||
numDocsForceUpdated uint64
|
||||
numIndexingReqs uint64
|
||||
numRemovingReqs uint64
|
||||
numForceUpdatingReqs uint64
|
||||
numTokenIndexAdded uint64
|
||||
numDocsStored uint64
|
||||
|
||||
// 记录初始化参数
|
||||
initOptions types.EngineOpts
|
||||
initialized bool
|
||||
|
||||
indexers []core.Indexer
|
||||
rankers []core.Ranker
|
||||
segmenter gse.Segmenter
|
||||
loaded bool
|
||||
stopTokens StopTokens
|
||||
dbs []store.Store
|
||||
|
||||
// 建立索引器使用的通信通道
|
||||
segmenterChan chan segmenterReq
|
||||
indexerAddDocChans []chan indexerAddDocReq
|
||||
indexerRemoveDocChans []chan indexerRemoveDocReq
|
||||
rankerAddDocChans []chan rankerAddDocReq
|
||||
|
||||
// 建立排序器使用的通信通道
|
||||
indexerLookupChans []chan indexerLookupReq
|
||||
rankerRankChans []chan rankerRankReq
|
||||
rankerRemoveDocChans []chan rankerRemoveDocReq
|
||||
|
||||
// 建立持久存储使用的通信通道
|
||||
storeIndexDocChans []chan storeIndexDocReq
|
||||
storeInitChan chan bool
|
||||
}
|
||||
|
||||
// Indexer initialize the indexer channel
|
||||
func (engine *Engine) Indexer(options types.EngineOpts) {
|
||||
engine.indexerAddDocChans = make(
|
||||
[]chan indexerAddDocReq, options.NumShards)
|
||||
|
||||
engine.indexerRemoveDocChans = make(
|
||||
[]chan indexerRemoveDocReq, options.NumShards)
|
||||
|
||||
engine.indexerLookupChans = make(
|
||||
[]chan indexerLookupReq, options.NumShards)
|
||||
|
||||
for shard := 0; shard < options.NumShards; shard++ {
|
||||
engine.indexerAddDocChans[shard] = make(
|
||||
chan indexerAddDocReq, options.IndexerBufLen)
|
||||
|
||||
engine.indexerRemoveDocChans[shard] = make(
|
||||
chan indexerRemoveDocReq, options.IndexerBufLen)
|
||||
|
||||
engine.indexerLookupChans[shard] = make(
|
||||
chan indexerLookupReq, options.IndexerBufLen)
|
||||
}
|
||||
}
|
||||
|
||||
// Ranker initialize the ranker channel
|
||||
func (engine *Engine) Ranker(options types.EngineOpts) {
|
||||
engine.rankerAddDocChans = make(
|
||||
[]chan rankerAddDocReq, options.NumShards)
|
||||
|
||||
engine.rankerRankChans = make(
|
||||
[]chan rankerRankReq, options.NumShards)
|
||||
|
||||
engine.rankerRemoveDocChans = make(
|
||||
[]chan rankerRemoveDocReq, options.NumShards)
|
||||
|
||||
for shard := 0; shard < options.NumShards; shard++ {
|
||||
engine.rankerAddDocChans[shard] = make(
|
||||
chan rankerAddDocReq, options.RankerBufLen)
|
||||
|
||||
engine.rankerRankChans[shard] = make(
|
||||
chan rankerRankReq, options.RankerBufLen)
|
||||
|
||||
engine.rankerRemoveDocChans[shard] = make(
|
||||
chan rankerRemoveDocReq, options.RankerBufLen)
|
||||
}
|
||||
}
|
||||
|
||||
// InitStore initialize the persistent store channel
|
||||
func (engine *Engine) InitStore() {
|
||||
engine.storeIndexDocChans = make(
|
||||
[]chan storeIndexDocReq, engine.initOptions.StoreShards)
|
||||
|
||||
for shard := 0; shard < engine.initOptions.StoreShards; shard++ {
|
||||
engine.storeIndexDocChans[shard] = make(
|
||||
chan storeIndexDocReq)
|
||||
}
|
||||
engine.storeInitChan = make(
|
||||
chan bool, engine.initOptions.StoreShards)
|
||||
}
|
||||
|
||||
// CheckMem check the memory when the memory is larger
|
||||
// than 99.99% using the store
|
||||
func (engine *Engine) CheckMem() {
|
||||
// Todo test
|
||||
if !engine.initOptions.UseStore {
|
||||
log.Println("Check virtualMemory...")
|
||||
|
||||
vmem, _ := mem.VirtualMemory()
|
||||
log.Printf("Total: %v, Free: %v, UsedPercent: %f%%\n",
|
||||
vmem.Total, vmem.Free, vmem.UsedPercent)
|
||||
|
||||
useMem := fmt.Sprintf("%.2f", vmem.UsedPercent)
|
||||
if useMem == "99.99" {
|
||||
engine.initOptions.UseStore = true
|
||||
engine.initOptions.StoreFolder = DefaultPath
|
||||
// os.MkdirAll(DefaultPath, 0777)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store start the persistent store work connection
|
||||
func (engine *Engine) Store() {
|
||||
// if engine.initOptions.UseStore {
|
||||
err := os.MkdirAll(engine.initOptions.StoreFolder, 0700)
|
||||
if err != nil {
|
||||
log.Fatalf("Can not create directory: %s ; %v",
|
||||
engine.initOptions.StoreFolder, err)
|
||||
}
|
||||
|
||||
// 打开或者创建数据库
|
||||
engine.dbs = make([]store.Store, engine.initOptions.StoreShards)
|
||||
for shard := 0; shard < engine.initOptions.StoreShards; shard++ {
|
||||
dbPath := engine.initOptions.StoreFolder + "/" +
|
||||
StoreFilePrefix + "." + strconv.Itoa(shard)
|
||||
|
||||
db, err := store.OpenStore(dbPath, engine.initOptions.StoreEngine)
|
||||
if db == nil || err != nil {
|
||||
log.Fatal("Unable to open database ", dbPath, ": ", err)
|
||||
}
|
||||
engine.dbs[shard] = db
|
||||
}
|
||||
|
||||
// 从数据库中恢复
|
||||
for shard := 0; shard < engine.initOptions.StoreShards; shard++ {
|
||||
go engine.storeInitWorker(shard)
|
||||
}
|
||||
|
||||
// 等待恢复完成
|
||||
for shard := 0; shard < engine.initOptions.StoreShards; shard++ {
|
||||
<-engine.storeInitChan
|
||||
}
|
||||
|
||||
for {
|
||||
runtime.Gosched()
|
||||
|
||||
engine.loc.RLock()
|
||||
numDoced := engine.numIndexingReqs == engine.numDocsIndexed
|
||||
engine.loc.RUnlock()
|
||||
|
||||
if numDoced {
|
||||
break
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 关闭并重新打开数据库
|
||||
for shard := 0; shard < engine.initOptions.StoreShards; shard++ {
|
||||
engine.dbs[shard].Close()
|
||||
dbPath := engine.initOptions.StoreFolder + "/" +
|
||||
StoreFilePrefix + "." + strconv.Itoa(shard)
|
||||
|
||||
db, err := store.OpenStore(dbPath, engine.initOptions.StoreEngine)
|
||||
if db == nil || err != nil {
|
||||
log.Fatal("Unable to open database ", dbPath, ": ", err)
|
||||
}
|
||||
engine.dbs[shard] = db
|
||||
}
|
||||
|
||||
for shard := 0; shard < engine.initOptions.StoreShards; shard++ {
|
||||
go engine.storeIndexDocWorker(shard)
|
||||
}
|
||||
// }
|
||||
}
|
||||
|
||||
// WithGse Using user defined segmenter
|
||||
// If using a not nil segmenter and the dictionary is loaded,
|
||||
// the `opt.GseDict` will be ignore.
|
||||
func (engine *Engine) WithGse(segmenter gse.Segmenter) *Engine {
|
||||
if engine.initialized {
|
||||
log.Fatal(`Do not re-initialize the engine,
|
||||
WithGse should call before initialize the engine.`)
|
||||
}
|
||||
|
||||
engine.segmenter = segmenter
|
||||
engine.loaded = true
|
||||
return engine
|
||||
}
|
||||
|
||||
// Init initialize the engine
|
||||
func (engine *Engine) Init(options types.EngineOpts) {
|
||||
// 将线程数设置为CPU数
|
||||
// runtime.GOMAXPROCS(runtime.NumCPU())
|
||||
// runtime.GOMAXPROCS(128)
|
||||
|
||||
// 初始化初始参数
|
||||
if engine.initialized {
|
||||
log.Fatal("Do not re-initialize the engine.")
|
||||
}
|
||||
|
||||
if options.GseDict == "" && !options.NotUseGse && !engine.loaded {
|
||||
log.Printf("Dictionary file path is empty, load the default dictionary file.")
|
||||
options.GseDict = "zh"
|
||||
}
|
||||
|
||||
if options.UseStore == true && options.StoreFolder == "" {
|
||||
log.Printf("Store file path is empty, use default folder path.")
|
||||
options.StoreFolder = DefaultPath
|
||||
// os.MkdirAll(DefaultPath, 0777)
|
||||
}
|
||||
|
||||
options.Init()
|
||||
engine.initOptions = options
|
||||
engine.initialized = true
|
||||
|
||||
if !options.NotUseGse {
|
||||
if !engine.loaded {
|
||||
// 载入分词器词典
|
||||
engine.segmenter.LoadDict(options.GseDict)
|
||||
engine.loaded = true
|
||||
}
|
||||
|
||||
// 初始化停用词
|
||||
engine.stopTokens.Init(options.StopTokenFile)
|
||||
}
|
||||
|
||||
// 初始化索引器和排序器
|
||||
for shard := 0; shard < options.NumShards; shard++ {
|
||||
engine.indexers = append(engine.indexers, core.Indexer{})
|
||||
engine.indexers[shard].Init(*options.IndexerOpts)
|
||||
|
||||
engine.rankers = append(engine.rankers, core.Ranker{})
|
||||
engine.rankers[shard].Init(options.IDOnly)
|
||||
}
|
||||
|
||||
// 初始化分词器通道
|
||||
engine.segmenterChan = make(
|
||||
chan segmenterReq, options.NumGseThreads)
|
||||
|
||||
// 初始化索引器通道
|
||||
engine.Indexer(options)
|
||||
|
||||
// 初始化排序器通道
|
||||
engine.Ranker(options)
|
||||
|
||||
// engine.CheckMem(engine.initOptions.UseStore)
|
||||
engine.CheckMem()
|
||||
|
||||
// 初始化持久化存储通道
|
||||
if engine.initOptions.UseStore {
|
||||
engine.InitStore()
|
||||
}
|
||||
|
||||
// 启动分词器
|
||||
for iThread := 0; iThread < options.NumGseThreads; iThread++ {
|
||||
go engine.segmenterWorker()
|
||||
}
|
||||
|
||||
// 启动索引器和排序器
|
||||
for shard := 0; shard < options.NumShards; shard++ {
|
||||
go engine.indexerAddDocWorker(shard)
|
||||
go engine.indexerRemoveDocWorker(shard)
|
||||
go engine.rankerAddDocWorker(shard)
|
||||
go engine.rankerRemoveDocWorker(shard)
|
||||
|
||||
for i := 0; i < options.NumIndexerThreadsPerShard; i++ {
|
||||
go engine.indexerLookupWorker(shard)
|
||||
}
|
||||
for i := 0; i < options.NumRankerThreadsPerShard; i++ {
|
||||
go engine.rankerRankWorker(shard)
|
||||
}
|
||||
}
|
||||
|
||||
// 启动持久化存储工作协程
|
||||
if engine.initOptions.UseStore {
|
||||
engine.Store()
|
||||
}
|
||||
|
||||
atomic.AddUint64(&engine.numDocsStored, engine.numIndexingReqs)
|
||||
}
|
||||
|
||||
// IndexDoc add the document to the index
|
||||
// 将文档加入索引
|
||||
//
|
||||
// 输入参数:
|
||||
// docId 标识文档编号,必须唯一,docId == 0 表示非法文档(用于强制刷新索引),[1, +oo) 表示合法文档
|
||||
// data 见 DocIndexData 注释
|
||||
// forceUpdate 是否强制刷新 cache,如果设为 true,则尽快添加到索引,否则等待 cache 满之后一次全量添加
|
||||
//
|
||||
// 注意:
|
||||
// 1. 这个函数是线程安全的,请尽可能并发调用以提高索引速度
|
||||
// 2. 这个函数调用是非同步的,也就是说在函数返回时有可能文档还没有加入索引中,因此
|
||||
// 如果立刻调用Search可能无法查询到这个文档。强制刷新索引请调用FlushIndex函数。
|
||||
func (engine *Engine) IndexDoc(docId uint64, data types.DocData,
|
||||
forceUpdate ...bool) {
|
||||
engine.Index(docId, data, forceUpdate...)
|
||||
}
|
||||
|
||||
// Index add the document to the index
|
||||
func (engine *Engine) Index(docId uint64, data types.DocData,
|
||||
forceUpdate ...bool) {
|
||||
|
||||
var force bool
|
||||
if len(forceUpdate) > 0 {
|
||||
force = forceUpdate[0]
|
||||
}
|
||||
|
||||
// if engine.HasDoc(docId) {
|
||||
// engine.RemoveDoc(docId)
|
||||
// }
|
||||
|
||||
// data.Tokens
|
||||
engine.internalIndexDoc(docId, data, force)
|
||||
|
||||
hash := murmur.Sum32(fmt.Sprintf("%d", docId)) %
|
||||
uint32(engine.initOptions.StoreShards)
|
||||
|
||||
if engine.initOptions.UseStore && docId != 0 {
|
||||
engine.storeIndexDocChans[hash] <- storeIndexDocReq{
|
||||
docId: docId, data: data}
|
||||
}
|
||||
}
|
||||
|
||||
func (engine *Engine) internalIndexDoc(docId uint64, data types.DocData,
|
||||
forceUpdate bool) {
|
||||
|
||||
if !engine.initialized {
|
||||
log.Fatal("The engine must be initialized first.")
|
||||
}
|
||||
|
||||
if docId != 0 {
|
||||
atomic.AddUint64(&engine.numIndexingReqs, 1)
|
||||
}
|
||||
if forceUpdate {
|
||||
atomic.AddUint64(&engine.numForceUpdatingReqs, 1)
|
||||
}
|
||||
|
||||
hash := murmur.Sum32(fmt.Sprintf("%d%s", docId, data.Content))
|
||||
engine.segmenterChan <- segmenterReq{
|
||||
docId: docId, hash: hash, data: data, forceUpdate: forceUpdate}
|
||||
}
|
||||
|
||||
// RemoveDoc remove the document from the index
|
||||
// 将文档从索引中删除
|
||||
//
|
||||
// 输入参数:
|
||||
// docId 标识文档编号,必须唯一,docId == 0 表示非法文档(用于强制刷新索引),[1, +oo) 表示合法文档
|
||||
// forceUpdate 是否强制刷新 cache,如果设为 true,则尽快删除索引,否则等待 cache 满之后一次全量删除
|
||||
//
|
||||
// 注意:
|
||||
// 1. 这个函数是线程安全的,请尽可能并发调用以提高索引速度
|
||||
// 2. 这个函数调用是非同步的,也就是说在函数返回时有可能文档还没有加入索引中,因此
|
||||
// 如果立刻调用 Search 可能无法查询到这个文档。强制刷新索引请调用 FlushIndex 函数。
|
||||
func (engine *Engine) RemoveDoc(docId uint64, forceUpdate ...bool) {
|
||||
var force bool
|
||||
if len(forceUpdate) > 0 {
|
||||
force = forceUpdate[0]
|
||||
}
|
||||
|
||||
if !engine.initialized {
|
||||
log.Fatal("The engine must be initialized first.")
|
||||
}
|
||||
|
||||
if docId != 0 {
|
||||
atomic.AddUint64(&engine.numRemovingReqs, 1)
|
||||
}
|
||||
if force {
|
||||
atomic.AddUint64(&engine.numForceUpdatingReqs, 1)
|
||||
}
|
||||
for shard := 0; shard < engine.initOptions.NumShards; shard++ {
|
||||
engine.indexerRemoveDocChans[shard] <- indexerRemoveDocReq{
|
||||
docId: docId, forceUpdate: force}
|
||||
|
||||
if docId == 0 {
|
||||
continue
|
||||
}
|
||||
engine.rankerRemoveDocChans[shard] <- rankerRemoveDocReq{docId: docId}
|
||||
}
|
||||
|
||||
if engine.initOptions.UseStore && docId != 0 {
|
||||
// 从数据库中删除
|
||||
hash := murmur.Sum32(fmt.Sprintf("%d", docId)) %
|
||||
uint32(engine.initOptions.StoreShards)
|
||||
|
||||
go engine.storeRemoveDocWorker(docId, hash)
|
||||
}
|
||||
}
|
||||
|
||||
// // 获取文本的分词结果
|
||||
// func (engine *Engine) Tokens(text []byte) (tokens []string) {
|
||||
// querySegments := engine.segmenter.Segment(text)
|
||||
// for _, s := range querySegments {
|
||||
// token := s.Token().Text()
|
||||
// if !engine.stopTokens.IsStopToken(token) {
|
||||
// tokens = append(tokens, token)
|
||||
// }
|
||||
// }
|
||||
// return tokens
|
||||
// }
|
||||
|
||||
// Segment get the word segmentation result of the text
|
||||
// 获取文本的分词结果, 只分词与过滤弃用词
|
||||
func (engine *Engine) Segment(content string) (keywords []string) {
|
||||
segments := engine.segmenter.ModeSegment([]byte(content),
|
||||
engine.initOptions.GseMode)
|
||||
|
||||
for _, segment := range segments {
|
||||
token := segment.Token().Text()
|
||||
if !engine.stopTokens.IsStopToken(token) {
|
||||
keywords = append(keywords, token)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Tokens get the engine tokens
|
||||
func (engine *Engine) Tokens(request types.SearchReq) (tokens []string) {
|
||||
// 收集关键词
|
||||
// tokens := []string{}
|
||||
if request.Text != "" {
|
||||
request.Text = strings.ToLower(request.Text)
|
||||
if engine.initOptions.NotUseGse {
|
||||
tokens = strings.Split(request.Text, " ")
|
||||
} else {
|
||||
// querySegments := engine.segmenter.Segment([]byte(request.Text))
|
||||
// tokens = engine.Tokens([]byte(request.Text))
|
||||
tokens = engine.Segment(request.Text)
|
||||
}
|
||||
|
||||
// 叠加 tokens
|
||||
for _, t := range request.Tokens {
|
||||
tokens = append(tokens, t)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
for _, t := range request.Tokens {
|
||||
tokens = append(tokens, t)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// RankId rank docs by types.ScoredIDs
|
||||
func (engine *Engine) RankId(request types.SearchReq, RankOpts types.RankOpts,
|
||||
tokens []string, rankerReturnChan chan rankerReturnReq) (
|
||||
output types.SearchResp) {
|
||||
// 从通信通道读取排序器的输出
|
||||
numDocs := 0
|
||||
var rankOutput types.ScoredIDs
|
||||
// var rankOutput interface{}
|
||||
|
||||
//**********/ begin
|
||||
timeout := request.Timeout
|
||||
isTimeout := false
|
||||
if timeout <= 0 {
|
||||
// 不设置超时
|
||||
for shard := 0; shard < engine.initOptions.NumShards; shard++ {
|
||||
rankerOutput := <-rankerReturnChan
|
||||
if !request.CountDocsOnly {
|
||||
if rankerOutput.docs != nil {
|
||||
for _, doc := range rankerOutput.docs.(types.ScoredIDs) {
|
||||
rankOutput = append(rankOutput, doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
numDocs += rankerOutput.numDocs
|
||||
}
|
||||
} else {
|
||||
// 设置超时
|
||||
deadline := time.Now().Add(time.Nanosecond *
|
||||
time.Duration(NumNanosecondsInAMillisecond*request.Timeout))
|
||||
|
||||
for shard := 0; shard < engine.initOptions.NumShards; shard++ {
|
||||
select {
|
||||
case rankerOutput := <-rankerReturnChan:
|
||||
if !request.CountDocsOnly {
|
||||
if rankerOutput.docs != nil {
|
||||
for _, doc := range rankerOutput.docs.(types.ScoredIDs) {
|
||||
rankOutput = append(rankOutput, doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
numDocs += rankerOutput.numDocs
|
||||
case <-time.After(deadline.Sub(time.Now())):
|
||||
isTimeout = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 再排序
|
||||
if !request.CountDocsOnly && !request.Orderless {
|
||||
if RankOpts.ReverseOrder {
|
||||
sort.Sort(sort.Reverse(rankOutput))
|
||||
} else {
|
||||
sort.Sort(rankOutput)
|
||||
}
|
||||
}
|
||||
|
||||
// 准备输出
|
||||
output.Tokens = tokens
|
||||
// 仅当 CountDocsOnly 为 false 时才充填 output.Docs
|
||||
if !request.CountDocsOnly {
|
||||
if request.Orderless {
|
||||
// 无序状态无需对 Offset 截断
|
||||
output.Docs = rankOutput
|
||||
} else {
|
||||
var start, end int
|
||||
if RankOpts.MaxOutputs == 0 {
|
||||
start = utils.MinInt(RankOpts.OutputOffset, len(rankOutput))
|
||||
end = len(rankOutput)
|
||||
} else {
|
||||
start = utils.MinInt(RankOpts.OutputOffset, len(rankOutput))
|
||||
end = utils.MinInt(start+RankOpts.MaxOutputs, len(rankOutput))
|
||||
}
|
||||
output.Docs = rankOutput[start:end]
|
||||
}
|
||||
}
|
||||
|
||||
output.NumDocs = numDocs
|
||||
output.Timeout = isTimeout
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Ranks rank docs by types.ScoredDocs
|
||||
func (engine *Engine) Ranks(request types.SearchReq, RankOpts types.RankOpts,
|
||||
tokens []string, rankerReturnChan chan rankerReturnReq) (
|
||||
output types.SearchResp) {
|
||||
// 从通信通道读取排序器的输出
|
||||
numDocs := 0
|
||||
rankOutput := types.ScoredDocs{}
|
||||
|
||||
//**********/ begin
|
||||
timeout := request.Timeout
|
||||
isTimeout := false
|
||||
if timeout <= 0 {
|
||||
// 不设置超时
|
||||
for shard := 0; shard < engine.initOptions.NumShards; shard++ {
|
||||
rankerOutput := <-rankerReturnChan
|
||||
if !request.CountDocsOnly {
|
||||
if rankerOutput.docs != nil {
|
||||
for _, doc := range rankerOutput.docs.(types.ScoredDocs) {
|
||||
rankOutput = append(rankOutput, doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
numDocs += rankerOutput.numDocs
|
||||
}
|
||||
} else {
|
||||
// 设置超时
|
||||
deadline := time.Now().Add(time.Nanosecond *
|
||||
time.Duration(NumNanosecondsInAMillisecond*request.Timeout))
|
||||
|
||||
for shard := 0; shard < engine.initOptions.NumShards; shard++ {
|
||||
select {
|
||||
case rankerOutput := <-rankerReturnChan:
|
||||
if !request.CountDocsOnly {
|
||||
if rankerOutput.docs != nil {
|
||||
for _, doc := range rankerOutput.docs.(types.ScoredDocs) {
|
||||
rankOutput = append(rankOutput, doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
numDocs += rankerOutput.numDocs
|
||||
case <-time.After(deadline.Sub(time.Now())):
|
||||
isTimeout = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 再排序
|
||||
if !request.CountDocsOnly && !request.Orderless {
|
||||
if RankOpts.ReverseOrder {
|
||||
sort.Sort(sort.Reverse(rankOutput))
|
||||
} else {
|
||||
sort.Sort(rankOutput)
|
||||
}
|
||||
}
|
||||
|
||||
// 准备输出
|
||||
output.Tokens = tokens
|
||||
// 仅当 CountDocsOnly 为 false 时才充填 output.Docs
|
||||
if !request.CountDocsOnly {
|
||||
if request.Orderless {
|
||||
// 无序状态无需对 Offset 截断
|
||||
output.Docs = rankOutput
|
||||
} else {
|
||||
var start, end int
|
||||
if RankOpts.MaxOutputs == 0 {
|
||||
start = utils.MinInt(RankOpts.OutputOffset, len(rankOutput))
|
||||
end = len(rankOutput)
|
||||
} else {
|
||||
start = utils.MinInt(RankOpts.OutputOffset, len(rankOutput))
|
||||
end = utils.MinInt(start+RankOpts.MaxOutputs, len(rankOutput))
|
||||
}
|
||||
output.Docs = rankOutput[start:end]
|
||||
}
|
||||
}
|
||||
|
||||
output.NumDocs = numDocs
|
||||
output.Timeout = isTimeout
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Search find the document that satisfies the search criteria.
|
||||
// This function is thread safe
|
||||
// 查找满足搜索条件的文档,此函数线程安全
|
||||
func (engine *Engine) Search(request types.SearchReq) (output types.SearchResp) {
|
||||
if !engine.initialized {
|
||||
log.Fatal("The engine must be initialized first.")
|
||||
}
|
||||
|
||||
tokens := engine.Tokens(request)
|
||||
|
||||
var RankOpts types.RankOpts
|
||||
if request.RankOpts == nil {
|
||||
RankOpts = *engine.initOptions.DefaultRankOpts
|
||||
} else {
|
||||
RankOpts = *request.RankOpts
|
||||
}
|
||||
if RankOpts.ScoringCriteria == nil {
|
||||
RankOpts.ScoringCriteria = engine.initOptions.DefaultRankOpts.ScoringCriteria
|
||||
}
|
||||
|
||||
// 建立排序器返回的通信通道
|
||||
rankerReturnChan := make(
|
||||
chan rankerReturnReq, engine.initOptions.NumShards)
|
||||
|
||||
// 生成查找请求
|
||||
lookupRequest := indexerLookupReq{
|
||||
countDocsOnly: request.CountDocsOnly,
|
||||
tokens: tokens,
|
||||
labels: request.Labels,
|
||||
docIds: request.DocIds,
|
||||
options: RankOpts,
|
||||
rankerReturnChan: rankerReturnChan,
|
||||
orderless: request.Orderless,
|
||||
logic: request.Logic,
|
||||
}
|
||||
|
||||
// 向索引器发送查找请求
|
||||
for shard := 0; shard < engine.initOptions.NumShards; shard++ {
|
||||
engine.indexerLookupChans[shard] <- lookupRequest
|
||||
}
|
||||
|
||||
if engine.initOptions.IDOnly {
|
||||
output = engine.RankId(request, RankOpts, tokens, rankerReturnChan)
|
||||
return
|
||||
}
|
||||
|
||||
output = engine.Ranks(request, RankOpts, tokens, rankerReturnChan)
|
||||
return
|
||||
}
|
||||
|
||||
// Flush block wait until all indexes are added
|
||||
// 阻塞等待直到所有索引添加完毕
|
||||
func (engine *Engine) Flush() {
|
||||
for {
|
||||
runtime.Gosched()
|
||||
|
||||
engine.loc.RLock()
|
||||
inxd := engine.numIndexingReqs == engine.numDocsIndexed
|
||||
rmd := engine.numRemovingReqs*uint64(engine.initOptions.NumShards) ==
|
||||
engine.numDocsRemoved
|
||||
stored := !engine.initOptions.UseStore || engine.numIndexingReqs ==
|
||||
engine.numDocsStored
|
||||
engine.loc.RUnlock()
|
||||
|
||||
if inxd && rmd && stored {
|
||||
// 保证 CHANNEL 中 REQUESTS 全部被执行完
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// 强制更新,保证其为最后的请求
|
||||
engine.IndexDoc(0, types.DocData{}, true)
|
||||
for {
|
||||
runtime.Gosched()
|
||||
|
||||
engine.loc.RLock()
|
||||
forced := engine.numForceUpdatingReqs*uint64(engine.initOptions.NumShards) ==
|
||||
engine.numDocsForceUpdated
|
||||
engine.loc.RUnlock()
|
||||
|
||||
if forced {
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// FlushIndex block wait until all indexes are added
|
||||
// 阻塞等待直到所有索引添加完毕
|
||||
func (engine *Engine) FlushIndex() {
|
||||
engine.Flush()
|
||||
}
|
||||
|
||||
// Close close the engine
|
||||
// 关闭引擎
|
||||
func (engine *Engine) Close() {
|
||||
engine.Flush()
|
||||
if engine.initOptions.UseStore {
|
||||
for _, db := range engine.dbs {
|
||||
db.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 从文本hash得到要分配到的 shard
|
||||
func (engine *Engine) getShard(hash uint32) int {
|
||||
return int(hash - hash/uint32(engine.initOptions.NumShards)*
|
||||
uint32(engine.initOptions.NumShards))
|
||||
}
|
166
vendor/github.com/go-ego/riot/indexer_worker.go
generated
vendored
Normal file
166
vendor/github.com/go-ego/riot/indexer_worker.go
generated
vendored
Normal file
@ -0,0 +1,166 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package riot
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
type indexerAddDocReq struct {
|
||||
doc *types.DocIndex
|
||||
forceUpdate bool
|
||||
}
|
||||
|
||||
type indexerLookupReq struct {
|
||||
countDocsOnly bool
|
||||
tokens []string
|
||||
labels []string
|
||||
docIds map[uint64]bool
|
||||
options types.RankOpts
|
||||
rankerReturnChan chan rankerReturnReq
|
||||
orderless bool
|
||||
logic types.Logic
|
||||
}
|
||||
|
||||
type indexerRemoveDocReq struct {
|
||||
docId uint64
|
||||
forceUpdate bool
|
||||
}
|
||||
|
||||
func (engine *Engine) indexerAddDocWorker(shard int) {
|
||||
for {
|
||||
request := <-engine.indexerAddDocChans[shard]
|
||||
engine.indexers[shard].AddDocToCache(request.doc, request.forceUpdate)
|
||||
if request.doc != nil {
|
||||
atomic.AddUint64(&engine.numTokenIndexAdded,
|
||||
uint64(len(request.doc.Keywords)))
|
||||
|
||||
engine.loc.Lock()
|
||||
atomic.AddUint64(&engine.numDocsIndexed, 1)
|
||||
// engine.numDocsIndexed++
|
||||
engine.loc.Unlock()
|
||||
}
|
||||
if request.forceUpdate {
|
||||
engine.loc.Lock()
|
||||
atomic.AddUint64(&engine.numDocsForceUpdated, 1)
|
||||
engine.loc.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (engine *Engine) indexerRemoveDocWorker(shard int) {
|
||||
for {
|
||||
request := <-engine.indexerRemoveDocChans[shard]
|
||||
engine.indexers[shard].RemoveDocToCache(request.docId, request.forceUpdate)
|
||||
if request.docId != 0 {
|
||||
engine.loc.Lock()
|
||||
atomic.AddUint64(&engine.numDocsRemoved, 1)
|
||||
engine.loc.Unlock()
|
||||
}
|
||||
if request.forceUpdate {
|
||||
engine.loc.Lock()
|
||||
atomic.AddUint64(&engine.numDocsForceUpdated, 1)
|
||||
engine.loc.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (engine *Engine) orderLess(
|
||||
request indexerLookupReq, docs []types.IndexedDoc) {
|
||||
|
||||
if engine.initOptions.IDOnly {
|
||||
var outputDocs []types.ScoredID
|
||||
// var outputDocs types.ScoredIDs
|
||||
for _, d := range docs {
|
||||
outputDocs = append(outputDocs, types.ScoredID{
|
||||
DocId: d.DocId,
|
||||
TokenSnippetLocs: d.TokenSnippetLocs,
|
||||
TokenLocs: d.TokenLocs})
|
||||
}
|
||||
|
||||
request.rankerReturnChan <- rankerReturnReq{
|
||||
docs: types.ScoredIDs(outputDocs),
|
||||
numDocs: len(outputDocs),
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
var outputDocs []types.ScoredDoc
|
||||
// var outputDocs types.ScoredDocs
|
||||
for _, d := range docs {
|
||||
outputDocs = append(outputDocs, types.ScoredDoc{
|
||||
DocId: d.DocId,
|
||||
TokenSnippetLocs: d.TokenSnippetLocs,
|
||||
TokenLocs: d.TokenLocs})
|
||||
}
|
||||
|
||||
request.rankerReturnChan <- rankerReturnReq{
|
||||
docs: types.ScoredDocs(outputDocs),
|
||||
numDocs: len(outputDocs),
|
||||
}
|
||||
}
|
||||
|
||||
func (engine *Engine) indexerLookupWorker(shard int) {
|
||||
for {
|
||||
request := <-engine.indexerLookupChans[shard]
|
||||
|
||||
var (
|
||||
docs []types.IndexedDoc
|
||||
numDocs int
|
||||
)
|
||||
if request.docIds == nil {
|
||||
docs, numDocs = engine.indexers[shard].Lookup(
|
||||
request.tokens, request.labels,
|
||||
nil, request.countDocsOnly, request.logic)
|
||||
// docs, numDocs = engine.indexers[shard].Lookup(request.tokens,
|
||||
// request.labels, nil, request.countDocsOnly)
|
||||
} else {
|
||||
docs, numDocs = engine.indexers[shard].Lookup(
|
||||
request.tokens, request.labels,
|
||||
request.docIds, request.countDocsOnly, request.logic)
|
||||
// docs, numDocs = engine.indexers[shard].Lookup(request.tokens,
|
||||
// request.labels, request.docIds, request.countDocsOnly)
|
||||
}
|
||||
|
||||
if request.countDocsOnly {
|
||||
request.rankerReturnChan <- rankerReturnReq{numDocs: numDocs}
|
||||
continue
|
||||
}
|
||||
|
||||
if len(docs) == 0 {
|
||||
request.rankerReturnChan <- rankerReturnReq{}
|
||||
continue
|
||||
}
|
||||
|
||||
if request.orderless {
|
||||
// var outputDocs interface{}
|
||||
engine.orderLess(request, docs)
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
rankerRequest := rankerRankReq{
|
||||
countDocsOnly: request.countDocsOnly,
|
||||
docs: docs,
|
||||
options: request.options,
|
||||
rankerReturnChan: request.rankerReturnChan,
|
||||
}
|
||||
engine.rankerRankChans[shard] <- rankerRequest
|
||||
}
|
||||
}
|
149
vendor/github.com/go-ego/riot/info.go
generated
vendored
Normal file
149
vendor/github.com/go-ego/riot/info.go
generated
vendored
Normal file
@ -0,0 +1,149 @@
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package riot
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/go-vgo/gt/info"
|
||||
)
|
||||
|
||||
var (
|
||||
lck sync.RWMutex
|
||||
|
||||
// InitMemUsed init mem used
|
||||
InitMemUsed uint64
|
||||
// InitDiskUsed init disk used
|
||||
InitDiskUsed uint64
|
||||
)
|
||||
|
||||
func init() {
|
||||
lck.Lock()
|
||||
InitMemUsed, _ = MemUsed()
|
||||
InitDiskUsed, _ = DiskUsed()
|
||||
lck.Unlock()
|
||||
}
|
||||
|
||||
// MemPercent returns the amount of use memory in percent.
|
||||
func MemPercent() (string, error) {
|
||||
return info.MemPercent()
|
||||
}
|
||||
|
||||
// MemUsed returns the amount of used memory in bytes.
|
||||
func MemUsed() (uint64, error) {
|
||||
return info.MemUsed()
|
||||
}
|
||||
|
||||
// UsedMem returns the amount of riot used memory in bytes
|
||||
// after init() func.
|
||||
func (engine *Engine) UsedMem() (uint64, error) {
|
||||
memUsed, err := MemUsed()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return memUsed - InitMemUsed, err
|
||||
}
|
||||
|
||||
// MemTotal returns the amount of total memory in bytes.
|
||||
func MemTotal() (uint64, error) {
|
||||
return info.MemTotal()
|
||||
}
|
||||
|
||||
// MemFree returns the amount of free memory in bytes.
|
||||
func MemFree() (uint64, error) {
|
||||
return info.MemFree()
|
||||
}
|
||||
|
||||
// ToKB bytes to kb
|
||||
func ToKB(data uint64) uint64 {
|
||||
return data / 1024
|
||||
}
|
||||
|
||||
// ToMB bytes to mb
|
||||
func ToMB(data uint64) uint64 {
|
||||
return data / 1024 / 1024
|
||||
}
|
||||
|
||||
// ToGB bytes to gb
|
||||
func ToGB(data uint64) uint64 {
|
||||
return data / 1024 / 1024 / 1024
|
||||
}
|
||||
|
||||
// Disk init the disk
|
||||
// func Disk(pt ...bool) ([]*disk.UsageStat, error) {
|
||||
// return info.Disk(pt...)
|
||||
// }
|
||||
|
||||
// DiskPercent returns the amount of use disk in percent.
|
||||
func DiskPercent() (string, error) {
|
||||
return info.DiskPercent()
|
||||
}
|
||||
|
||||
// DiskUsed returns the amount of use disk in bytes.
|
||||
func DiskUsed() (uint64, error) {
|
||||
return info.DiskUsed()
|
||||
}
|
||||
|
||||
// UsedDisk returns the amount of use disk in bytes
|
||||
// after init() func.
|
||||
func (engine *Engine) UsedDisk() (uint64, error) {
|
||||
diskUsed, err := DiskUsed()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return diskUsed - InitDiskUsed, err
|
||||
}
|
||||
|
||||
// DiskTotal returns the amount of total disk in bytes.
|
||||
func DiskTotal() (uint64, error) {
|
||||
return info.DiskTotal()
|
||||
}
|
||||
|
||||
// DiskFree returns the amount of free disk in bytes.
|
||||
func DiskFree() (uint64, error) {
|
||||
return info.DiskFree()
|
||||
}
|
||||
|
||||
// CPUInfo returns the cpu info
|
||||
func CPUInfo(args ...int) (string, error) {
|
||||
return info.CPUInfo(args...)
|
||||
}
|
||||
|
||||
// CPUPercent returns the amount of use cpu in percent.
|
||||
func CPUPercent() ([]float64, error) {
|
||||
return info.CPUPercent()
|
||||
}
|
||||
|
||||
// Uptime returns the system uptime in seconds.
|
||||
func Uptime() (uptime uint64, err error) {
|
||||
return info.Uptime()
|
||||
}
|
||||
|
||||
// PlatformInfo fetches system platform information.
|
||||
func PlatformInfo() (platform, family, osVersion string, err error) {
|
||||
return info.PlatformInfo()
|
||||
}
|
||||
|
||||
// Platform returns the platform name and OS Version.
|
||||
func Platform() (string, error) {
|
||||
return info.Platform()
|
||||
}
|
||||
|
||||
// KernelVer returns the kernel version as a string.
|
||||
func KernelVer() (string, error) {
|
||||
return info.KernelVer()
|
||||
}
|
82
vendor/github.com/go-ego/riot/ranker_worker.go
generated
vendored
Normal file
82
vendor/github.com/go-ego/riot/ranker_worker.go
generated
vendored
Normal file
@ -0,0 +1,82 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package riot
|
||||
|
||||
import (
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
type rankerAddDocReq struct {
|
||||
docId uint64
|
||||
fields interface{}
|
||||
// new
|
||||
content string
|
||||
// new 属性
|
||||
attri interface{}
|
||||
}
|
||||
|
||||
type rankerRankReq struct {
|
||||
docs []types.IndexedDoc
|
||||
options types.RankOpts
|
||||
rankerReturnChan chan rankerReturnReq
|
||||
countDocsOnly bool
|
||||
}
|
||||
|
||||
type rankerReturnReq struct {
|
||||
// docs types.ScoredDocs
|
||||
docs interface{}
|
||||
numDocs int
|
||||
}
|
||||
|
||||
type rankerRemoveDocReq struct {
|
||||
docId uint64
|
||||
}
|
||||
|
||||
func (engine *Engine) rankerAddDocWorker(shard int) {
|
||||
for {
|
||||
request := <-engine.rankerAddDocChans[shard]
|
||||
if engine.initOptions.IDOnly {
|
||||
engine.rankers[shard].AddDoc(request.docId, request.fields)
|
||||
return
|
||||
}
|
||||
// } else {
|
||||
engine.rankers[shard].AddDoc(request.docId, request.fields,
|
||||
request.content, request.attri)
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
func (engine *Engine) rankerRankWorker(shard int) {
|
||||
for {
|
||||
request := <-engine.rankerRankChans[shard]
|
||||
if request.options.MaxOutputs != 0 {
|
||||
request.options.MaxOutputs += request.options.OutputOffset
|
||||
}
|
||||
request.options.OutputOffset = 0
|
||||
outputDocs, numDocs := engine.rankers[shard].Rank(request.docs,
|
||||
request.options, request.countDocsOnly)
|
||||
|
||||
request.rankerReturnChan <- rankerReturnReq{
|
||||
docs: outputDocs, numDocs: numDocs}
|
||||
}
|
||||
}
|
||||
|
||||
func (engine *Engine) rankerRemoveDocWorker(shard int) {
|
||||
for {
|
||||
request := <-engine.rankerRemoveDocChans[shard]
|
||||
engine.rankers[shard].RemoveDoc(request.docId)
|
||||
}
|
||||
}
|
198
vendor/github.com/go-ego/riot/riot.go
generated
vendored
Normal file
198
vendor/github.com/go-ego/riot/riot.go
generated
vendored
Normal file
@ -0,0 +1,198 @@
|
||||
// Copyright 2017 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package riot
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"encoding/binary"
|
||||
"encoding/gob"
|
||||
|
||||
"github.com/go-ego/murmur"
|
||||
"github.com/go-ego/riot/core"
|
||||
"github.com/go-ego/riot/types"
|
||||
toml "github.com/go-vgo/gt/conf"
|
||||
)
|
||||
|
||||
// New create a new engine with mode
|
||||
func New(conf ...interface{}) *Engine {
|
||||
// func (engine *Engine) New(conf com.Config) *Engine{
|
||||
if len(conf) > 0 && strings.HasSuffix(conf[0].(string), ".toml") {
|
||||
var (
|
||||
config types.EngineOpts
|
||||
searcher = &Engine{}
|
||||
)
|
||||
|
||||
fs := conf[0].(string)
|
||||
log.Println("conf path is: ", fs)
|
||||
toml.Init(fs, &config)
|
||||
go toml.Watch(fs, &config)
|
||||
|
||||
searcher.Init(config)
|
||||
return searcher
|
||||
}
|
||||
|
||||
return NewEngine(conf...)
|
||||
}
|
||||
|
||||
// NewEngine create a new engine
|
||||
func NewEngine(conf ...interface{}) *Engine {
|
||||
var (
|
||||
searcher = &Engine{}
|
||||
|
||||
path = DefaultPath
|
||||
storageShards = 10
|
||||
numShards = 10
|
||||
|
||||
segmentDict string
|
||||
)
|
||||
|
||||
if len(conf) > 0 {
|
||||
segmentDict = conf[0].(string)
|
||||
}
|
||||
|
||||
if len(conf) > 1 {
|
||||
path = conf[1].(string)
|
||||
}
|
||||
|
||||
if len(conf) > 2 {
|
||||
numShards = conf[2].(int)
|
||||
storageShards = conf[2].(int)
|
||||
}
|
||||
|
||||
searcher.Init(types.EngineOpts{
|
||||
// Using: using,
|
||||
StoreShards: storageShards,
|
||||
NumShards: numShards,
|
||||
IndexerOpts: &types.IndexerOpts{
|
||||
IndexType: types.DocIdsIndex,
|
||||
},
|
||||
UseStore: true,
|
||||
StoreFolder: path,
|
||||
// StoreEngine: storageEngine,
|
||||
GseDict: segmentDict,
|
||||
// StopTokenFile: stopTokenFile,
|
||||
})
|
||||
|
||||
// defer searcher.Close()
|
||||
os.MkdirAll(path, 0777)
|
||||
|
||||
// 等待索引刷新完毕
|
||||
// searcher.Flush()
|
||||
// log.Println("recover index number: ", searcher.NumDocsIndexed())
|
||||
|
||||
return searcher
|
||||
}
|
||||
|
||||
// func (engine *Engine) IsDocExist(docId uint64) bool {
|
||||
// return core.IsDocExist(docId)
|
||||
// }
|
||||
|
||||
// HasDoc if the document is exist return true
|
||||
func (engine *Engine) HasDoc(docId uint64) bool {
|
||||
for shard := 0; shard < engine.initOptions.NumShards; shard++ {
|
||||
engine.indexers = append(engine.indexers, core.Indexer{})
|
||||
|
||||
has := engine.indexers[shard].HasDoc(docId)
|
||||
|
||||
if has {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// HasDocDB if the document is exist in the database
|
||||
// return true
|
||||
func (engine *Engine) HasDocDB(docId uint64) bool {
|
||||
b := make([]byte, 10)
|
||||
length := binary.PutUvarint(b, docId)
|
||||
|
||||
shard := murmur.Sum32(fmt.Sprintf("%d", docId)) %
|
||||
uint32(engine.initOptions.StoreShards)
|
||||
|
||||
has, err := engine.dbs[shard].Has(b[0:length])
|
||||
if err != nil {
|
||||
log.Println("engine.dbs[shard].Has(b[0:length]): ", err)
|
||||
}
|
||||
|
||||
return has
|
||||
}
|
||||
|
||||
// GetDBAllIds get all the DocId from the storage database
|
||||
// and return
|
||||
// 从数据库遍历所有的 DocId, 并返回
|
||||
func (engine *Engine) GetDBAllIds() []uint64 {
|
||||
docsId := make([]uint64, 0)
|
||||
for i := range engine.dbs {
|
||||
engine.dbs[i].ForEach(func(k, v []byte) error {
|
||||
// fmt.Println(k, v)
|
||||
docId, _ := binary.Uvarint(k)
|
||||
docsId = append(docsId, docId)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
return docsId
|
||||
}
|
||||
|
||||
// GetDBAllDocs get the db all docs
|
||||
func (engine *Engine) GetDBAllDocs() (
|
||||
docsId []uint64, docsData []types.DocData) {
|
||||
for i := range engine.dbs {
|
||||
engine.dbs[i].ForEach(func(key, val []byte) error {
|
||||
// fmt.Println(k, v)
|
||||
docId, _ := binary.Uvarint(key)
|
||||
docsId = append(docsId, docId)
|
||||
|
||||
buf := bytes.NewReader(val)
|
||||
dec := gob.NewDecoder(buf)
|
||||
|
||||
var data types.DocData
|
||||
err := dec.Decode(&data)
|
||||
if err != nil {
|
||||
log.Println("dec.decode: ", err)
|
||||
}
|
||||
|
||||
docsData = append(docsData, data)
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
return docsId, docsData
|
||||
}
|
||||
|
||||
// GetAllDocIds get all the DocId from the storage database
|
||||
// and return
|
||||
// 从数据库遍历所有的 DocId, 并返回
|
||||
func (engine *Engine) GetAllDocIds() []uint64 {
|
||||
return engine.GetDBAllIds()
|
||||
}
|
||||
|
||||
// Try handler(err)
|
||||
func Try(fun func(), handler func(interface{})) {
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
handler(err)
|
||||
}
|
||||
}()
|
||||
fun()
|
||||
}
|
30
vendor/github.com/go-ego/riot/riot_pkg.go
generated
vendored
Normal file
30
vendor/github.com/go-ego/riot/riot_pkg.go
generated
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
/*
|
||||
|
||||
Package riot full text search engine
|
||||
*/
|
||||
package riot
|
||||
|
||||
import (
|
||||
// _ "github.com/cznic/kv"
|
||||
_ "github.com/coreos/bbolt"
|
||||
// _ "github.com/boltdb/bolt"
|
||||
_ "github.com/dgraph-io/badger"
|
||||
_ "github.com/go-ego/gse"
|
||||
_ "github.com/go-ego/murmur"
|
||||
_ "github.com/syndtr/goleveldb/leveldb"
|
||||
)
|
349
vendor/github.com/go-ego/riot/segment.go
generated
vendored
Normal file
349
vendor/github.com/go-ego/riot/segment.go
generated
vendored
Normal file
@ -0,0 +1,349 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package riot
|
||||
|
||||
import (
|
||||
// "fmt"
|
||||
|
||||
"strings"
|
||||
|
||||
"github.com/go-ego/gpy"
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
// TMap defines the tokens map type map[string][]int
|
||||
type TMap map[string][]int
|
||||
|
||||
type segmenterReq struct {
|
||||
docId uint64
|
||||
hash uint32
|
||||
data types.DocData
|
||||
// data types.DocumentIndexData
|
||||
forceUpdate bool
|
||||
}
|
||||
|
||||
// ForSplitData for split segment's data, segspl
|
||||
func (engine *Engine) ForSplitData(strData []string, num int) (TMap, int) {
|
||||
var (
|
||||
numTokens int
|
||||
splitStr string
|
||||
)
|
||||
tokensMap := make(map[string][]int)
|
||||
|
||||
for i := 0; i < num; i++ {
|
||||
if strData[i] != "" {
|
||||
if !engine.stopTokens.IsStopToken(strData[i]) {
|
||||
numTokens++
|
||||
tokensMap[strData[i]] = append(tokensMap[strData[i]], numTokens)
|
||||
}
|
||||
|
||||
splitStr += strData[i]
|
||||
if !engine.stopTokens.IsStopToken(splitStr) {
|
||||
numTokens++
|
||||
tokensMap[splitStr] = append(tokensMap[splitStr], numTokens)
|
||||
}
|
||||
|
||||
if engine.initOptions.Using == 6 {
|
||||
// more combination
|
||||
var splitsStr string
|
||||
for s := i + 1; s < len(strData); s++ {
|
||||
splitsStr += strData[s]
|
||||
|
||||
if !engine.stopTokens.IsStopToken(splitsStr) {
|
||||
numTokens++
|
||||
tokensMap[splitsStr] = append(tokensMap[splitsStr], numTokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return tokensMap, numTokens
|
||||
}
|
||||
|
||||
func (engine *Engine) splitData(request segmenterReq) (TMap, int) {
|
||||
var (
|
||||
num int
|
||||
numTokens int
|
||||
)
|
||||
tokensMap := make(map[string][]int)
|
||||
|
||||
if request.data.Content != "" {
|
||||
content := strings.ToLower(request.data.Content)
|
||||
if engine.initOptions.Using == 3 {
|
||||
// use segmenter
|
||||
segments := engine.segmenter.ModeSegment([]byte(content),
|
||||
engine.initOptions.GseMode)
|
||||
|
||||
for _, segment := range segments {
|
||||
token := segment.Token().Text()
|
||||
if !engine.stopTokens.IsStopToken(token) {
|
||||
tokensMap[token] = append(tokensMap[token], segment.Start())
|
||||
}
|
||||
}
|
||||
numTokens += len(segments)
|
||||
}
|
||||
|
||||
if engine.initOptions.Using == 4 {
|
||||
tokensMap, numTokens = engine.defaultTokens(content)
|
||||
}
|
||||
|
||||
if engine.initOptions.Using != 4 {
|
||||
strData := strings.Split(content, "")
|
||||
num = len(strData)
|
||||
tokenMap, numToken := engine.ForSplitData(strData, num)
|
||||
numTokens += numToken
|
||||
for key, val := range tokenMap {
|
||||
tokensMap[key] = val
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, t := range request.data.Tokens {
|
||||
if !engine.stopTokens.IsStopToken(t.Text) {
|
||||
tokensMap[t.Text] = t.Locations
|
||||
}
|
||||
}
|
||||
|
||||
numTokens += len(request.data.Tokens)
|
||||
|
||||
return tokensMap, numTokens
|
||||
}
|
||||
|
||||
func (engine *Engine) segmenterData(request segmenterReq) (TMap, int) {
|
||||
tokensMap := make(map[string][]int)
|
||||
numTokens := 0
|
||||
|
||||
if engine.initOptions.Using == 0 && request.data.Content != "" {
|
||||
// Content 分词, 当文档正文不为空时,优先从内容分词中得到关键词
|
||||
segments := engine.segmenter.ModeSegment([]byte(request.data.Content),
|
||||
engine.initOptions.GseMode)
|
||||
|
||||
for _, segment := range segments {
|
||||
token := segment.Token().Text()
|
||||
if !engine.stopTokens.IsStopToken(token) {
|
||||
tokensMap[token] = append(tokensMap[token], segment.Start())
|
||||
}
|
||||
}
|
||||
|
||||
for _, t := range request.data.Tokens {
|
||||
if !engine.stopTokens.IsStopToken(t.Text) {
|
||||
tokensMap[t.Text] = t.Locations
|
||||
}
|
||||
}
|
||||
|
||||
numTokens = len(segments) + len(request.data.Tokens)
|
||||
|
||||
return tokensMap, numTokens
|
||||
}
|
||||
|
||||
if engine.initOptions.Using == 1 && request.data.Content != "" {
|
||||
// Content 分词, 当文档正文不为空时,优先从内容分词中得到关键词
|
||||
segments := engine.segmenter.ModeSegment([]byte(request.data.Content),
|
||||
engine.initOptions.GseMode)
|
||||
|
||||
for _, segment := range segments {
|
||||
token := segment.Token().Text()
|
||||
if !engine.stopTokens.IsStopToken(token) {
|
||||
tokensMap[token] = append(tokensMap[token], segment.Start())
|
||||
}
|
||||
}
|
||||
numTokens = len(segments)
|
||||
|
||||
return tokensMap, numTokens
|
||||
}
|
||||
|
||||
if engine.initOptions.Using == 2 ||
|
||||
((engine.initOptions.Using == 1 || engine.initOptions.Using == 3) &&
|
||||
request.data.Content == "") {
|
||||
for _, t := range request.data.Tokens {
|
||||
if !engine.stopTokens.IsStopToken(t.Text) {
|
||||
tokensMap[t.Text] = t.Locations
|
||||
}
|
||||
}
|
||||
|
||||
numTokens = len(request.data.Tokens)
|
||||
|
||||
return tokensMap, numTokens
|
||||
}
|
||||
|
||||
tokenMap, lenSplitData := engine.splitData(request)
|
||||
|
||||
return tokenMap, lenSplitData
|
||||
}
|
||||
|
||||
func (engine *Engine) defaultTokens(content string) (tokensMap TMap, numTokens int) {
|
||||
// use segmenter
|
||||
tokensMap = make(map[string][]int)
|
||||
strData := strings.Split(content, " ")
|
||||
num := len(strData)
|
||||
// if num == 1 {
|
||||
// tokensMap[request.data.Content] = []int{1}
|
||||
// }
|
||||
|
||||
if num > 0 {
|
||||
tokenMap, numToken := engine.ForSplitData(strData, num)
|
||||
numTokens += numToken
|
||||
|
||||
for key, val := range tokenMap {
|
||||
tokensMap[key] = val
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (engine *Engine) segmenterWorker() {
|
||||
for {
|
||||
request := <-engine.segmenterChan
|
||||
if request.docId == 0 {
|
||||
if request.forceUpdate {
|
||||
for i := 0; i < engine.initOptions.NumShards; i++ {
|
||||
engine.indexerAddDocChans[i] <- indexerAddDocReq{
|
||||
forceUpdate: true}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
shard := engine.getShard(request.hash)
|
||||
tokensMap := make(map[string][]int)
|
||||
numTokens := 0
|
||||
if !(engine.initOptions.NotUseGse && engine.initOptions.Using == 0) {
|
||||
tokensMap, numTokens = engine.segmenterData(request)
|
||||
} else {
|
||||
if request.data.Content != "" {
|
||||
content := strings.ToLower(request.data.Content)
|
||||
tokensMap, numTokens = engine.defaultTokens(content)
|
||||
}
|
||||
|
||||
for _, t := range request.data.Tokens {
|
||||
if !engine.stopTokens.IsStopToken(t.Text) {
|
||||
tokensMap[t.Text] = t.Locations
|
||||
}
|
||||
}
|
||||
|
||||
numTokens += len(request.data.Tokens)
|
||||
}
|
||||
|
||||
// 加入非分词的文档标签
|
||||
for _, label := range request.data.Labels {
|
||||
if !engine.initOptions.NotUseGse {
|
||||
if !engine.stopTokens.IsStopToken(label) {
|
||||
// 当正文中已存在关键字时,若不判断,位置信息将会丢失
|
||||
if _, ok := tokensMap[label]; !ok {
|
||||
tokensMap[label] = []int{}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// 当正文中已存在关键字时,若不判断,位置信息将会丢失
|
||||
if _, ok := tokensMap[label]; !ok {
|
||||
tokensMap[label] = []int{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
indexerRequest := indexerAddDocReq{
|
||||
doc: &types.DocIndex{
|
||||
DocId: request.docId,
|
||||
TokenLen: float32(numTokens),
|
||||
Keywords: make([]types.KeywordIndex, len(tokensMap)),
|
||||
},
|
||||
forceUpdate: request.forceUpdate,
|
||||
}
|
||||
iTokens := 0
|
||||
for k, v := range tokensMap {
|
||||
indexerRequest.doc.Keywords[iTokens] = types.KeywordIndex{
|
||||
Text: k,
|
||||
// 非分词标注的词频设置为0,不参与tf-idf计算
|
||||
Frequency: float32(len(v)),
|
||||
Starts: v}
|
||||
iTokens++
|
||||
}
|
||||
|
||||
engine.indexerAddDocChans[shard] <- indexerRequest
|
||||
if request.forceUpdate {
|
||||
for i := 0; i < engine.initOptions.NumShards; i++ {
|
||||
if i == shard {
|
||||
continue
|
||||
}
|
||||
engine.indexerAddDocChans[i] <- indexerAddDocReq{forceUpdate: true}
|
||||
}
|
||||
}
|
||||
rankerRequest := rankerAddDocReq{
|
||||
// docId: request.docId, fields: request.data.Fields}
|
||||
docId: request.docId, fields: request.data.Fields,
|
||||
content: request.data.Content, attri: request.data.Attri}
|
||||
engine.rankerAddDocChans[shard] <- rankerRequest
|
||||
}
|
||||
}
|
||||
|
||||
// PinYin get the Chinese alphabet and abbreviation
|
||||
func (engine *Engine) PinYin(hans string) []string {
|
||||
var (
|
||||
str string
|
||||
pyStr string
|
||||
strArr []string
|
||||
splitStr string
|
||||
// splitArr []string
|
||||
)
|
||||
|
||||
//
|
||||
splitHans := strings.Split(hans, "")
|
||||
for i := 0; i < len(splitHans); i++ {
|
||||
if splitHans[i] != "" {
|
||||
if !engine.stopTokens.IsStopToken(splitHans[i]) {
|
||||
strArr = append(strArr, splitHans[i])
|
||||
}
|
||||
splitStr += splitHans[i]
|
||||
}
|
||||
if !engine.stopTokens.IsStopToken(splitStr) {
|
||||
strArr = append(strArr, splitStr)
|
||||
}
|
||||
}
|
||||
|
||||
// Segment 分词
|
||||
if !engine.initOptions.NotUseGse {
|
||||
sehans := engine.Segment(hans)
|
||||
for h := 0; h < len(sehans); h++ {
|
||||
if !engine.stopTokens.IsStopToken(sehans[h]) {
|
||||
strArr = append(strArr, sehans[h])
|
||||
}
|
||||
}
|
||||
}
|
||||
//
|
||||
// py := pinyin.LazyConvert(sehans[h], nil)
|
||||
py := gpy.LazyConvert(hans, nil)
|
||||
|
||||
// log.Println("py...", py)
|
||||
for i := 0; i < len(py); i++ {
|
||||
// log.Println("py[i]...", py[i])
|
||||
pyStr += py[i]
|
||||
if !engine.stopTokens.IsStopToken(pyStr) {
|
||||
strArr = append(strArr, pyStr)
|
||||
}
|
||||
|
||||
if len(py[i]) > 0 {
|
||||
str += py[i][0:1]
|
||||
if !engine.stopTokens.IsStopToken(str) {
|
||||
strArr = append(strArr, str)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strArr
|
||||
}
|
57
vendor/github.com/go-ego/riot/stop_tokens.go
generated
vendored
Normal file
57
vendor/github.com/go-ego/riot/stop_tokens.go
generated
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package riot
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
// StopTokens stop tokens map
|
||||
type StopTokens struct {
|
||||
stopTokens map[string]bool
|
||||
}
|
||||
|
||||
// Init 从 stopTokenFile 中读入停用词,一个词一行
|
||||
// 文档索引建立时会跳过这些停用词
|
||||
func (st *StopTokens) Init(stopTokenFile string) {
|
||||
st.stopTokens = make(map[string]bool)
|
||||
if stopTokenFile == "" {
|
||||
return
|
||||
}
|
||||
|
||||
file, err := os.Open(stopTokenFile)
|
||||
if err != nil {
|
||||
log.Fatal("Open stop token file error: ", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
text := scanner.Text()
|
||||
if text != "" {
|
||||
st.stopTokens[text] = true
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// IsStopToken to determine whether to stop token
|
||||
func (st *StopTokens) IsStopToken(token string) bool {
|
||||
_, found := st.stopTokens[token]
|
||||
return found
|
||||
}
|
33
vendor/github.com/go-ego/riot/store/BUILD.bazel
generated
vendored
Normal file
33
vendor/github.com/go-ego/riot/store/BUILD.bazel
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"badger_store.go",
|
||||
"bolt_store.go",
|
||||
"ldb_store.go",
|
||||
"store.go",
|
||||
],
|
||||
importmap = "go-common/vendor/github.com/go-ego/riot/store",
|
||||
importpath = "github.com/go-ego/riot/store",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//vendor/github.com/coreos/bbolt:go_default_library",
|
||||
"//vendor/github.com/dgraph-io/badger:go_default_library",
|
||||
"//vendor/github.com/syndtr/goleveldb/leveldb:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
140
vendor/github.com/go-ego/riot/store/badger_store.go
generated
vendored
Normal file
140
vendor/github.com/go-ego/riot/store/badger_store.go
generated
vendored
Normal file
@ -0,0 +1,140 @@
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package store
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/dgraph-io/badger"
|
||||
)
|
||||
|
||||
// Badger badger.KV db store
|
||||
type Badger struct {
|
||||
db *badger.DB
|
||||
}
|
||||
|
||||
// OpenBadger open Badger store
|
||||
func OpenBadger(dbPath string) (Store, error) {
|
||||
// err := os.MkdirAll(dbPath, 0777)
|
||||
// if err != nil {
|
||||
// log.Fatal("os.MkdirAll: ", err)
|
||||
// os.Exit(1)
|
||||
// }
|
||||
// os.MkdirAll(path.Dir(dbPath), os.ModePerm)
|
||||
|
||||
opt := badger.DefaultOptions
|
||||
opt.Dir = dbPath
|
||||
opt.ValueDir = dbPath
|
||||
opt.SyncWrites = true
|
||||
kv, err := badger.Open(opt)
|
||||
if err != nil {
|
||||
log.Fatal("badger NewKV: ", err)
|
||||
}
|
||||
|
||||
return &Badger{kv}, err
|
||||
}
|
||||
|
||||
// WALName is useless for this kv database
|
||||
func (s *Badger) WALName() string {
|
||||
return "" // 对于此数据库,本函数没用~
|
||||
}
|
||||
|
||||
// Set sets the provided value for a given key.
|
||||
// If key is not present, it is created. If it is present,
|
||||
// the existing value is overwritten with the one provided.
|
||||
func (s *Badger) Set(k, v []byte) error {
|
||||
err := s.db.Update(func(txn *badger.Txn) error {
|
||||
// return txn.Set(k, v, 0x00)
|
||||
return txn.Set(k, v)
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Get looks for key and returns a value.
|
||||
// If key is not found, value is nil.
|
||||
func (s *Badger) Get(k []byte) ([]byte, error) {
|
||||
var ival []byte
|
||||
err := s.db.View(func(txn *badger.Txn) error {
|
||||
item, err := txn.Get(k)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ival, err = item.Value()
|
||||
return err
|
||||
})
|
||||
|
||||
return ival, err
|
||||
}
|
||||
|
||||
// Delete deletes a key. Exposing this so that user does not
|
||||
// have to specify the Entry directly. For example, BitDelete
|
||||
// seems internal to badger.
|
||||
func (s *Badger) Delete(k []byte) error {
|
||||
err := s.db.Update(func(txn *badger.Txn) error {
|
||||
return txn.Delete(k)
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Has returns true if the DB does contains the given key.
|
||||
func (s *Badger) Has(k []byte) (bool, error) {
|
||||
// return s.db.Exists(k)
|
||||
val, err := s.Get(k)
|
||||
if string(val) == "" && err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, err
|
||||
}
|
||||
|
||||
// Len returns the size of lsm and value log files in bytes.
|
||||
// It can be used to decide how often to call RunValueLogGC.
|
||||
func (s *Badger) Len() (int64, int64) {
|
||||
return s.db.Size()
|
||||
}
|
||||
|
||||
// ForEach get all key and value
|
||||
func (s *Badger) ForEach(fn func(k, v []byte) error) error {
|
||||
err := s.db.View(func(txn *badger.Txn) error {
|
||||
opts := badger.DefaultIteratorOptions
|
||||
opts.PrefetchSize = 1000
|
||||
it := txn.NewIterator(opts)
|
||||
defer it.Close()
|
||||
for it.Rewind(); it.Valid(); it.Next() {
|
||||
item := it.Item()
|
||||
key := item.Key()
|
||||
val, err := item.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := fn(key, val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Close closes a KV. It's crucial to call it to ensure
|
||||
// all the pending updates make their way to disk.
|
||||
func (s *Badger) Close() error {
|
||||
return s.db.Close()
|
||||
}
|
118
vendor/github.com/go-ego/riot/store/bolt_store.go
generated
vendored
Normal file
118
vendor/github.com/go-ego/riot/store/bolt_store.go
generated
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package store
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/coreos/bbolt"
|
||||
// "github.com/boltdb/bolt"
|
||||
)
|
||||
|
||||
var gdocs = []byte("gdocs")
|
||||
|
||||
// Bolt bolt store struct
|
||||
type Bolt struct {
|
||||
db *bolt.DB
|
||||
}
|
||||
|
||||
// OpenBolt open Bolt store
|
||||
func OpenBolt(dbPath string) (Store, error) {
|
||||
db, err := bolt.Open(dbPath, 0600, &bolt.Options{Timeout: 3600 * time.Second})
|
||||
// db, err := bolt.Open(dbPath, 0600, &bolt.Options{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = db.Update(func(tx *bolt.Tx) error {
|
||||
_, err := tx.CreateBucketIfNotExists(gdocs)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
db.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &Bolt{db}, nil
|
||||
}
|
||||
|
||||
// WALName returns the path to currently open database file.
|
||||
func (s *Bolt) WALName() string {
|
||||
return s.db.Path()
|
||||
}
|
||||
|
||||
// Set executes a function within the context of a read-write managed
|
||||
// transaction. If no error is returned from the function then the transaction
|
||||
// is committed. If an error is returned then the entire transaction is rolled back.
|
||||
// Any error that is returned from the function or returned from the commit is returned
|
||||
// from the Update() method.
|
||||
func (s *Bolt) Set(k []byte, v []byte) error {
|
||||
return s.db.Update(func(tx *bolt.Tx) error {
|
||||
return tx.Bucket(gdocs).Put(k, v)
|
||||
})
|
||||
}
|
||||
|
||||
// Get executes a function within the context of a managed read-only transaction.
|
||||
// Any error that is returned from the function is returned from the View() method.
|
||||
func (s *Bolt) Get(k []byte) (b []byte, err error) {
|
||||
err = s.db.View(func(tx *bolt.Tx) error {
|
||||
b = tx.Bucket(gdocs).Get(k)
|
||||
return nil
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Delete deletes a key. Exposing this so that user does not
|
||||
// have to specify the Entry directly.
|
||||
func (s *Bolt) Delete(k []byte) error {
|
||||
return s.db.Update(func(tx *bolt.Tx) error {
|
||||
return tx.Bucket(gdocs).Delete(k)
|
||||
})
|
||||
}
|
||||
|
||||
// Has returns true if the DB does contains the given key.
|
||||
func (s *Bolt) Has(k []byte) (bool, error) {
|
||||
// return s.db.Exists(k)
|
||||
var b []byte
|
||||
err := s.db.View(func(tx *bolt.Tx) error {
|
||||
b = tx.Bucket(gdocs).Get(k)
|
||||
return nil
|
||||
})
|
||||
|
||||
// b == nil
|
||||
if err != nil || string(b) == "" {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// ForEach get all key and value
|
||||
func (s *Bolt) ForEach(fn func(k, v []byte) error) error {
|
||||
return s.db.View(func(tx *bolt.Tx) error {
|
||||
b := tx.Bucket(gdocs)
|
||||
c := b.Cursor()
|
||||
for k, v := c.First(); k != nil; k, v = c.Next() {
|
||||
if err := fn(k, v); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// Close releases all database resources. All transactions
|
||||
// must be closed before closing the database.
|
||||
func (s *Bolt) Close() error {
|
||||
return s.db.Close()
|
||||
}
|
107
vendor/github.com/go-ego/riot/store/ldb_store.go
generated
vendored
Normal file
107
vendor/github.com/go-ego/riot/store/ldb_store.go
generated
vendored
Normal file
@ -0,0 +1,107 @@
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package store
|
||||
|
||||
import (
|
||||
"github.com/syndtr/goleveldb/leveldb"
|
||||
)
|
||||
|
||||
// Leveldb leveldb store
|
||||
type Leveldb struct {
|
||||
db *leveldb.DB
|
||||
}
|
||||
|
||||
// OpenLeveldb opens or creates a DB for the given store. The DB
|
||||
// will be created if not exist, unless ErrorIfMissing is true.
|
||||
// Also, if ErrorIfExist is true and the DB exist Open will
|
||||
// returns os.ErrExist error.
|
||||
func OpenLeveldb(dbPath string) (Store, error) {
|
||||
db, err := leveldb.OpenFile(dbPath, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Leveldb{db}, nil
|
||||
}
|
||||
|
||||
// WALName is useless for this kv database
|
||||
func (s *Leveldb) WALName() string {
|
||||
return "" // 对于此数据库,本函数没用~
|
||||
}
|
||||
|
||||
// Set sets the provided value for a given key.
|
||||
// If key is not present, it is created. If it is present,
|
||||
// the existing value is overwritten with the one provided.
|
||||
func (s *Leveldb) Set(k, v []byte) error {
|
||||
return s.db.Put(k, v, nil)
|
||||
}
|
||||
|
||||
// Get gets the value for the given key. It returns
|
||||
// ErrNotFound if the DB does not contains the key.
|
||||
//
|
||||
// The returned slice is its own copy, it is safe to modify
|
||||
// the contents of the returned slice. It is safe to modify the contents
|
||||
// of the argument after Get returns.
|
||||
func (s *Leveldb) Get(k []byte) ([]byte, error) {
|
||||
return s.db.Get(k, nil)
|
||||
}
|
||||
|
||||
// Delete deletes the value for the given key. Delete will not
|
||||
// returns error if key doesn't exist. Write merge also applies
|
||||
// for Delete, see Write.
|
||||
//
|
||||
// It is safe to modify the contents of the arguments after Delete
|
||||
// returns but not before.
|
||||
func (s *Leveldb) Delete(k []byte) error {
|
||||
return s.db.Delete(k, nil)
|
||||
}
|
||||
|
||||
// Has returns true if the DB does contains the given key.
|
||||
// It is safe to modify the contents of the argument after Has returns.
|
||||
func (s *Leveldb) Has(k []byte) (bool, error) {
|
||||
return s.db.Has(k, nil)
|
||||
}
|
||||
|
||||
// Len calculates approximate sizes of the given key ranges.
|
||||
// The length of the returned sizes are equal with the length of
|
||||
// the given ranges. The returned sizes measure store space usage,
|
||||
// so if the user data compresses by a factor of ten, the returned
|
||||
// sizes will be one-tenth the size of the corresponding user data size.
|
||||
// The results may not include the sizes of recently written data.
|
||||
func (s *Leveldb) Len() (leveldb.Sizes, error) {
|
||||
return s.db.SizeOf(nil)
|
||||
}
|
||||
|
||||
// ForEach get all key and value
|
||||
func (s *Leveldb) ForEach(fn func(k, v []byte) error) error {
|
||||
iter := s.db.NewIterator(nil, nil)
|
||||
for iter.Next() {
|
||||
// Remember that the contents of the returned slice should not be modified, and
|
||||
// only valid until the next call to Next.
|
||||
key := iter.Key()
|
||||
val := iter.Value()
|
||||
if err := fn(key, val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
iter.Release()
|
||||
return iter.Error()
|
||||
}
|
||||
|
||||
// Close closes the DB. This will also releases any outstanding snapshot,
|
||||
// abort any in-flight compaction and discard open transaction.
|
||||
func (s *Leveldb) Close() error {
|
||||
return s.db.Close()
|
||||
}
|
72
vendor/github.com/go-ego/riot/store/store.go
generated
vendored
Normal file
72
vendor/github.com/go-ego/riot/store/store.go
generated
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package store
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultStore default store engine
|
||||
DefaultStore = "ldb"
|
||||
// DefaultStore = "bad"
|
||||
// DefaultStore = "bolt"
|
||||
)
|
||||
|
||||
var supportedStore = map[string]func(path string) (Store, error){
|
||||
"ldb": OpenLeveldb,
|
||||
"bg": OpenBadger, // bad to bg
|
||||
"bolt": OpenBolt,
|
||||
// "kv": OpenKV,
|
||||
// "ledisdb": Open,
|
||||
}
|
||||
|
||||
// RegisterStore register store engine
|
||||
func RegisterStore(name string, fn func(path string) (Store, error)) {
|
||||
supportedStore[name] = fn
|
||||
}
|
||||
|
||||
// Store is store interface
|
||||
type Store interface {
|
||||
// type KVBatch interface {
|
||||
Set(k, v []byte) error
|
||||
Get(k []byte) ([]byte, error)
|
||||
Delete(k []byte) error
|
||||
Has(k []byte) (bool, error)
|
||||
ForEach(fn func(k, v []byte) error) error
|
||||
Close() error
|
||||
WALName() string
|
||||
}
|
||||
|
||||
// OpenStore open store engine
|
||||
func OpenStore(path string, args ...string) (Store, error) {
|
||||
storeName := DefaultStore
|
||||
|
||||
if len(args) > 0 && args[0] != "" {
|
||||
storeName = args[0]
|
||||
} else {
|
||||
storeEnv := os.Getenv("Riot_Store_Engine")
|
||||
if storeEnv != "" {
|
||||
storeName = storeEnv
|
||||
}
|
||||
}
|
||||
|
||||
if fn, has := supportedStore[storeName]; has {
|
||||
return fn(path)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported store engine: %v", storeName)
|
||||
}
|
97
vendor/github.com/go-ego/riot/store_worker.go
generated
vendored
Normal file
97
vendor/github.com/go-ego/riot/store_worker.go
generated
vendored
Normal file
@ -0,0 +1,97 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package riot
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"encoding/binary"
|
||||
"encoding/gob"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/go-ego/riot/types"
|
||||
)
|
||||
|
||||
type storeIndexDocReq struct {
|
||||
docId uint64
|
||||
data types.DocData
|
||||
// data types.DocumentIndexData
|
||||
}
|
||||
|
||||
func (engine *Engine) storeIndexDocWorker(shard int) {
|
||||
for {
|
||||
request := <-engine.storeIndexDocChans[shard]
|
||||
|
||||
// 得到 key
|
||||
b := make([]byte, 10)
|
||||
length := binary.PutUvarint(b, request.docId)
|
||||
|
||||
// 得到 value
|
||||
var buf bytes.Buffer
|
||||
enc := gob.NewEncoder(&buf)
|
||||
err := enc.Encode(request.data)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&engine.numDocsStored, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
// has, err := engine.dbs[shard].Has(b[0:length])
|
||||
// if err != nil {
|
||||
// log.Println("engine.dbs[shard].Has(b[0:length]) ", err)
|
||||
// }
|
||||
|
||||
// if has {
|
||||
// engine.dbs[shard].Delete(b[0:length])
|
||||
// }
|
||||
|
||||
// 将 key-value 写入数据库
|
||||
engine.dbs[shard].Set(b[0:length], buf.Bytes())
|
||||
|
||||
engine.loc.Lock()
|
||||
atomic.AddUint64(&engine.numDocsStored, 1)
|
||||
engine.loc.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (engine *Engine) storeRemoveDocWorker(docId uint64, shard uint32) {
|
||||
// 得到 key
|
||||
b := make([]byte, 10)
|
||||
length := binary.PutUvarint(b, docId)
|
||||
|
||||
// 从数据库删除该 key
|
||||
engine.dbs[shard].Delete(b[0:length])
|
||||
}
|
||||
|
||||
// storageInitWorker persistent storage init worker
|
||||
func (engine *Engine) storeInitWorker(shard int) {
|
||||
engine.dbs[shard].ForEach(func(k, v []byte) error {
|
||||
key, value := k, v
|
||||
// 得到 docID
|
||||
docId, _ := binary.Uvarint(key)
|
||||
|
||||
// 得到 data
|
||||
buf := bytes.NewReader(value)
|
||||
dec := gob.NewDecoder(buf)
|
||||
var data types.DocData
|
||||
err := dec.Decode(&data)
|
||||
if err == nil {
|
||||
// 添加索引
|
||||
engine.internalIndexDoc(docId, data, false)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
engine.storeInitChan <- true
|
||||
}
|
33
vendor/github.com/go-ego/riot/types/BUILD.bazel
generated
vendored
Normal file
33
vendor/github.com/go-ego/riot/types/BUILD.bazel
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"doc_index_data.go",
|
||||
"doc_info.go",
|
||||
"engine_init_options.go",
|
||||
"index.go",
|
||||
"indexer_init_options.go",
|
||||
"scoring_criteria.go",
|
||||
"search_request.go",
|
||||
"search_response.go",
|
||||
],
|
||||
importmap = "go-common/vendor/github.com/go-ego/riot/types",
|
||||
importpath = "github.com/go-ego/riot/types",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["//vendor/github.com/go-ego/riot/utils:go_default_library"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
62
vendor/github.com/go-ego/riot/types/doc_index_data.go
generated
vendored
Normal file
62
vendor/github.com/go-ego/riot/types/doc_index_data.go
generated
vendored
Normal file
@ -0,0 +1,62 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package types
|
||||
|
||||
// DocIndexData type document Index Data struct
|
||||
// type DocIndexData DocData
|
||||
type DocIndexData = DocData
|
||||
|
||||
// DocData type document Index Data struct
|
||||
type DocData struct {
|
||||
// 文档全文(必须是 UTF-8 格式),用于生成待索引的关键词
|
||||
Content string
|
||||
|
||||
// new 类别
|
||||
// Class string
|
||||
// new 属性
|
||||
Attri interface{}
|
||||
|
||||
// 文档的关键词
|
||||
// 当 Content 不为空的时候,优先从 Content 中分词得到关键词。
|
||||
// Tokens 存在的意义在于绕过 riot 内置的分词器,在引擎外部
|
||||
// 进行分词和预处理。
|
||||
// Tokens []*TokenData
|
||||
Tokens []TokenData
|
||||
|
||||
// 文档标签(必须是 UTF-8 格式),比如文档的类别属性等,
|
||||
// 这些标签并不出现在文档文本中
|
||||
Labels []string
|
||||
|
||||
// 文档的评分字段,可以接纳任何类型的结构体
|
||||
Fields interface{}
|
||||
}
|
||||
|
||||
// TokenData 文档的一个关键词
|
||||
type TokenData struct {
|
||||
// 关键词的字符串
|
||||
Text string
|
||||
|
||||
// 关键词的首字节在文档中出现的位置
|
||||
Locations []int
|
||||
}
|
||||
|
||||
// Attri doc attribute
|
||||
type Attri struct {
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Time string `json:"time"`
|
||||
Ts int64 `json:"ts"`
|
||||
}
|
51
vendor/github.com/go-ego/riot/types/doc_info.go
generated
vendored
Normal file
51
vendor/github.com/go-ego/riot/types/doc_info.go
generated
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package types
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// DocInfosShard 文档信息[id]info
|
||||
type DocInfosShard struct {
|
||||
DocInfos map[uint64]*DocInfo
|
||||
NumDocs uint64 // 这实际上是总文档数的一个近似
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
// DocInfo document info
|
||||
type DocInfo struct {
|
||||
Fields interface{}
|
||||
TokenLens float32
|
||||
}
|
||||
|
||||
/// inverted_index.go
|
||||
|
||||
// InvertedIndexShard 反向索引表([关键词]反向索引表)
|
||||
type InvertedIndexShard struct {
|
||||
InvertedIndex map[string]*KeywordIndices
|
||||
TotalTokenLen float32 //总关键词数
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
// KeywordIndices 反向索引表的一行,收集了一个搜索键出现的所有文档,
|
||||
// 按照 DocId 从小到大排序。
|
||||
type KeywordIndices struct {
|
||||
// 下面的切片是否为空,取决于初始化时 IndexType 的值
|
||||
DocIds []uint64 // 全部类型都有
|
||||
Frequencies []float32 // IndexType == FrequenciesIndex
|
||||
Locations [][]int // IndexType == LocsIndex
|
||||
}
|
158
vendor/github.com/go-ego/riot/types/engine_init_options.go
generated
vendored
Normal file
158
vendor/github.com/go-ego/riot/types/engine_init_options.go
generated
vendored
Normal file
@ -0,0 +1,158 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package types
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
)
|
||||
|
||||
var (
|
||||
// EngineOpts 的默认值
|
||||
// defaultNumGseThreads default Segmenter threads num
|
||||
defaultNumGseThreads = runtime.NumCPU()
|
||||
// defaultNumShards = 2
|
||||
defaultNumShards = 8
|
||||
defaultIndexerBufLen = runtime.NumCPU()
|
||||
defaultNumIndexerThreadsPerShard = runtime.NumCPU()
|
||||
defaultRankerBufLen = runtime.NumCPU()
|
||||
defaultNumRankerThreadsPerShard = runtime.NumCPU()
|
||||
defaultDefaultRankOpts = RankOpts{
|
||||
ScoringCriteria: RankByBM25{},
|
||||
}
|
||||
defaultIndexerOpts = IndexerOpts{
|
||||
IndexType: FrequenciesIndex,
|
||||
BM25Parameters: &defaultBM25Parameters,
|
||||
}
|
||||
defaultBM25Parameters = BM25Parameters{
|
||||
K1: 2.0,
|
||||
B: 0.75,
|
||||
}
|
||||
defaultStoreShards = 8
|
||||
)
|
||||
|
||||
// EngineOpts init engine options
|
||||
type EngineOpts struct {
|
||||
// 是否使用分词器
|
||||
// 默认使用,否则在启动阶段跳过 GseDict 和 StopTokenFile 设置
|
||||
// 如果你不需要在引擎内分词,可以将这个选项设为 true
|
||||
// 注意,如果你不用分词器,那么在调用 IndexDoc 时,
|
||||
// DocIndexData 中的 Content 会被忽略
|
||||
// Not use the gse segment
|
||||
NotUseGse bool `toml:"not_use_gse"`
|
||||
|
||||
// new, 分词规则
|
||||
Using int `toml:"using"`
|
||||
|
||||
// 半角逗号 "," 分隔的字典文件,具体用法见
|
||||
// gse.Segmenter.LoadDict 函数的注释
|
||||
GseDict string `toml:"gse_dict"`
|
||||
// GseDict []string
|
||||
SegmenterDict string
|
||||
|
||||
// 停用词文件
|
||||
StopTokenFile string `toml:"stop_file"`
|
||||
// Gse search mode
|
||||
GseMode bool `toml:"gse_mode"`
|
||||
|
||||
// 分词器线程数
|
||||
// NumSegmenterThreads int
|
||||
NumGseThreads int
|
||||
|
||||
// 索引器和排序器的 shard 数目
|
||||
// 被检索/排序的文档会被均匀分配到各个 shard 中
|
||||
NumShards int
|
||||
|
||||
// 索引器的信道缓冲长度
|
||||
IndexerBufLen int
|
||||
|
||||
// 索引器每个shard分配的线程数
|
||||
NumIndexerThreadsPerShard int
|
||||
|
||||
// 排序器的信道缓冲长度
|
||||
RankerBufLen int
|
||||
|
||||
// 排序器每个 shard 分配的线程数
|
||||
NumRankerThreadsPerShard int
|
||||
|
||||
// 索引器初始化选项
|
||||
IndexerOpts *IndexerOpts
|
||||
|
||||
// 默认的搜索选项
|
||||
DefaultRankOpts *RankOpts
|
||||
|
||||
// 是否使用持久数据库,以及数据库文件保存的目录和裂分数目
|
||||
StoreOnly bool `toml:"store_only"`
|
||||
UseStore bool `toml:"use_store"`
|
||||
|
||||
StoreFolder string `toml:"store_folder"`
|
||||
StoreShards int `toml:"store_shards"`
|
||||
StoreEngine string `toml:"store_engine"`
|
||||
|
||||
IDOnly bool `toml:"id_only"`
|
||||
}
|
||||
|
||||
// Init init engine options
|
||||
// 初始化 EngineOpts,当用户未设定某个选项的值时用默认值取代
|
||||
func (options *EngineOpts) Init() {
|
||||
// if !options.NotUseGse && options.GseDict == "" {
|
||||
// log.Fatal("字典文件不能为空")
|
||||
// options.GseDict = "zh"
|
||||
// }
|
||||
|
||||
if options.NumGseThreads == 0 {
|
||||
options.NumGseThreads = defaultNumGseThreads
|
||||
}
|
||||
|
||||
if options.NumShards == 0 {
|
||||
options.NumShards = defaultNumShards
|
||||
}
|
||||
|
||||
if options.IndexerBufLen == 0 {
|
||||
options.IndexerBufLen = defaultIndexerBufLen
|
||||
}
|
||||
|
||||
if options.NumIndexerThreadsPerShard == 0 {
|
||||
options.NumIndexerThreadsPerShard = defaultNumIndexerThreadsPerShard
|
||||
}
|
||||
|
||||
if options.RankerBufLen == 0 {
|
||||
options.RankerBufLen = defaultRankerBufLen
|
||||
}
|
||||
|
||||
if options.NumRankerThreadsPerShard == 0 {
|
||||
options.NumRankerThreadsPerShard = defaultNumRankerThreadsPerShard
|
||||
}
|
||||
|
||||
if options.IndexerOpts == nil {
|
||||
options.IndexerOpts = &defaultIndexerOpts
|
||||
}
|
||||
|
||||
if options.IndexerOpts.BM25Parameters == nil {
|
||||
options.IndexerOpts.BM25Parameters = &defaultBM25Parameters
|
||||
}
|
||||
|
||||
if options.DefaultRankOpts == nil {
|
||||
options.DefaultRankOpts = &defaultDefaultRankOpts
|
||||
}
|
||||
|
||||
if options.DefaultRankOpts.ScoringCriteria == nil {
|
||||
options.DefaultRankOpts.ScoringCriteria = defaultDefaultRankOpts.ScoringCriteria
|
||||
}
|
||||
|
||||
if options.StoreShards == 0 {
|
||||
options.StoreShards = defaultStoreShards
|
||||
}
|
||||
}
|
97
vendor/github.com/go-ego/riot/types/index.go
generated
vendored
Normal file
97
vendor/github.com/go-ego/riot/types/index.go
generated
vendored
Normal file
@ -0,0 +1,97 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
/*
|
||||
|
||||
Package types is riot types
|
||||
*/
|
||||
package types
|
||||
|
||||
// DocIndex document's index
|
||||
type DocIndex struct {
|
||||
// DocId 文本的 DocId
|
||||
DocId uint64
|
||||
|
||||
// TokenLen 文本的关键词长
|
||||
TokenLen float32
|
||||
|
||||
// Keywords 加入的索引键
|
||||
Keywords []KeywordIndex
|
||||
}
|
||||
|
||||
// KeywordIndex 反向索引项,这实际上标注了一个(搜索键,文档)对。
|
||||
type KeywordIndex struct {
|
||||
// Text 搜索键的 UTF-8 文本
|
||||
Text string
|
||||
|
||||
// Frequency 搜索键词频
|
||||
Frequency float32
|
||||
|
||||
// Starts 搜索键在文档中的起始字节位置,按照升序排列
|
||||
Starts []int
|
||||
}
|
||||
|
||||
// IndexedDoc 索引器返回结果
|
||||
type IndexedDoc struct {
|
||||
// DocId document id
|
||||
DocId uint64
|
||||
|
||||
// BM25,仅当索引类型为 FrequenciesIndex 或者 LocsIndex 时返回有效值
|
||||
BM25 float32
|
||||
|
||||
// TokenProximity 关键词在文档中的紧邻距离,
|
||||
// 紧邻距离的含义见 computeTokenProximity 的注释。
|
||||
// 仅当索引类型为 LocsIndex 时返回有效值。
|
||||
TokenProximity int32
|
||||
|
||||
// TokenSnippetLocs 紧邻距离计算得到的关键词位置,
|
||||
// 和 Lookup 函数输入 tokens 的长度一样且一一对应。
|
||||
// 仅当索引类型为 LocsIndex 时返回有效值。
|
||||
TokenSnippetLocs []int
|
||||
|
||||
// TokenLocs 关键词在文本中的具体位置。
|
||||
// 仅当索引类型为 LocsIndex 时返回有效值。
|
||||
TokenLocs [][]int
|
||||
}
|
||||
|
||||
// DocsIndex 方便批量加入文档索引
|
||||
type DocsIndex []*DocIndex
|
||||
|
||||
func (docs DocsIndex) Len() int {
|
||||
return len(docs)
|
||||
}
|
||||
|
||||
func (docs DocsIndex) Swap(i, j int) {
|
||||
docs[i], docs[j] = docs[j], docs[i]
|
||||
}
|
||||
|
||||
func (docs DocsIndex) Less(i, j int) bool {
|
||||
return docs[i].DocId < docs[j].DocId
|
||||
}
|
||||
|
||||
// DocsId 方便批量删除文档索引
|
||||
type DocsId []uint64
|
||||
|
||||
func (docs DocsId) Len() int {
|
||||
return len(docs)
|
||||
}
|
||||
|
||||
func (docs DocsId) Swap(i, j int) {
|
||||
docs[i], docs[j] = docs[j], docs[i]
|
||||
}
|
||||
|
||||
func (docs DocsId) Less(i, j int) bool {
|
||||
return docs[i] < docs[j]
|
||||
}
|
58
vendor/github.com/go-ego/riot/types/indexer_init_options.go
generated
vendored
Normal file
58
vendor/github.com/go-ego/riot/types/indexer_init_options.go
generated
vendored
Normal file
@ -0,0 +1,58 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package types
|
||||
|
||||
// 这些常数定义了反向索引表存储的数据类型
|
||||
const (
|
||||
// DocIdsIndex 仅存储文档的 docId
|
||||
DocIdsIndex = 0
|
||||
|
||||
// FrequenciesIndex 存储关键词的词频,用于计算BM25
|
||||
FrequenciesIndex = 1
|
||||
|
||||
// LocsIndex 存储关键词在文档中出现的具体字节位置(可能有多个)
|
||||
// 如果你希望得到关键词紧邻度数据,必须使用 LocsIndex 类型的索引
|
||||
LocsIndex = 2
|
||||
|
||||
// 默认插入索引表文档 CACHE SIZE
|
||||
defaultDocCacheSize = 300000
|
||||
)
|
||||
|
||||
// IndexerOpts 初始化索引器选项
|
||||
type IndexerOpts struct {
|
||||
// 索引表的类型,见上面的常数
|
||||
IndexType int
|
||||
|
||||
// 待插入索引表文档 CACHE SIZE
|
||||
DocCacheSize int
|
||||
|
||||
// BM25 参数
|
||||
BM25Parameters *BM25Parameters
|
||||
}
|
||||
|
||||
// BM25Parameters 见http://en.wikipedia.org/wiki/Okapi_BM25
|
||||
// 默认值见 engine_init_options.go
|
||||
type BM25Parameters struct {
|
||||
K1 float32
|
||||
B float32
|
||||
}
|
||||
|
||||
// Init init IndexerOpts
|
||||
func (options *IndexerOpts) Init() {
|
||||
if options.DocCacheSize == 0 {
|
||||
options.DocCacheSize = defaultDocCacheSize
|
||||
}
|
||||
}
|
33
vendor/github.com/go-ego/riot/types/scoring_criteria.go
generated
vendored
Normal file
33
vendor/github.com/go-ego/riot/types/scoring_criteria.go
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package types
|
||||
|
||||
// ScoringCriteria 评分规则通用接口
|
||||
type ScoringCriteria interface {
|
||||
// 给一个文档评分,文档排序时先用第一个分值比较,如果
|
||||
// 分值相同则转移到第二个分值,以此类推。
|
||||
// 返回空切片表明该文档应该从最终排序结果中剔除。
|
||||
Score(doc IndexedDoc, fields interface{}) []float32
|
||||
}
|
||||
|
||||
// RankByBM25 一个简单的评分规则,文档分数为BM25
|
||||
type RankByBM25 struct {
|
||||
}
|
||||
|
||||
// Score score
|
||||
func (rule RankByBM25) Score(doc IndexedDoc, fields interface{}) []float32 {
|
||||
return []float32{doc.BM25}
|
||||
}
|
99
vendor/github.com/go-ego/riot/types/search_request.go
generated
vendored
Normal file
99
vendor/github.com/go-ego/riot/types/search_request.go
generated
vendored
Normal file
@ -0,0 +1,99 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package types
|
||||
|
||||
// SearchReq search request options
|
||||
type SearchReq struct {
|
||||
// 搜索的短语(必须是 UTF-8 格式),会被分词
|
||||
// 当值为空字符串时关键词会从下面的 Tokens 读入
|
||||
Text string
|
||||
|
||||
// 关键词(必须是 UTF-8 格式),当 Text 不为空时优先使用 Text
|
||||
// 通常你不需要自己指定关键词,除非你运行自己的分词程序
|
||||
Tokens []string
|
||||
|
||||
// 文档标签(必须是 UTF-8 格式),标签不存在文档文本中,
|
||||
// 但也属于搜索键的一种
|
||||
Labels []string
|
||||
|
||||
// 类别
|
||||
// Class string
|
||||
|
||||
// 逻辑检索表达式
|
||||
Logic Logic
|
||||
|
||||
// 当不为 nil 时,仅从这些 DocIds 包含的键中搜索(忽略值)
|
||||
DocIds map[uint64]bool
|
||||
|
||||
// 排序选项
|
||||
RankOpts *RankOpts
|
||||
|
||||
// 超时,单位毫秒(千分之一秒)。此值小于等于零时不设超时。
|
||||
// 搜索超时的情况下仍有可能返回部分排序结果。
|
||||
Timeout int
|
||||
|
||||
// 设为 true 时仅统计搜索到的文档个数,不返回具体的文档
|
||||
CountDocsOnly bool
|
||||
|
||||
// 不排序,对于可在引擎外部(比如客户端)排序情况适用
|
||||
// 对返回文档很多的情况打开此选项可以有效节省时间
|
||||
Orderless bool
|
||||
}
|
||||
|
||||
// RankOpts rank options
|
||||
type RankOpts struct {
|
||||
// 文档的评分规则,值为 nil 时使用 Engine 初始化时设定的规则
|
||||
ScoringCriteria ScoringCriteria
|
||||
|
||||
// 默认情况下(ReverseOrder = false)按照分数从大到小排序,否则从小到大排序
|
||||
ReverseOrder bool
|
||||
|
||||
// 从第几条结果开始输出
|
||||
OutputOffset int
|
||||
|
||||
// 最大输出的搜索结果数,为 0 时无限制
|
||||
MaxOutputs int
|
||||
}
|
||||
|
||||
// Logic logic options
|
||||
type Logic struct {
|
||||
// return all doc
|
||||
// All bool
|
||||
|
||||
// 与查询, 必须都存在
|
||||
Must bool
|
||||
|
||||
// 或查询, 有一个存在即可
|
||||
Should bool
|
||||
|
||||
// 非查询, 不包含
|
||||
NotIn bool
|
||||
|
||||
LogicExpr LogicExpr
|
||||
}
|
||||
|
||||
// LogicExpr logic expression options
|
||||
type LogicExpr struct {
|
||||
|
||||
// 与查询, 必须都存在
|
||||
MustLabels []string
|
||||
|
||||
// 或查询, 有一个存在即可
|
||||
ShouldLabels []string
|
||||
|
||||
// 非查询, 不包含
|
||||
NotInLabels []string
|
||||
}
|
152
vendor/github.com/go-ego/riot/types/search_response.go
generated
vendored
Normal file
152
vendor/github.com/go-ego/riot/types/search_response.go
generated
vendored
Normal file
@ -0,0 +1,152 @@
|
||||
// Copyright 2013 Hui Chen
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package types
|
||||
|
||||
import (
|
||||
"github.com/go-ego/riot/utils"
|
||||
)
|
||||
|
||||
// SearchResp search response options
|
||||
type SearchResp struct {
|
||||
// 搜索用到的关键词
|
||||
Tokens []string
|
||||
|
||||
// 类别
|
||||
// Class string
|
||||
|
||||
// 搜索到的文档,已排序
|
||||
// Docs []ScoredDoc
|
||||
Docs interface{}
|
||||
|
||||
// 搜索是否超时。超时的情况下也可能会返回部分结果
|
||||
Timeout bool
|
||||
|
||||
// 搜索到的文档个数。注意这是全部文档中满足条件的个数,可能比返回的文档数要大
|
||||
NumDocs int
|
||||
}
|
||||
|
||||
// Content search content
|
||||
type Content struct {
|
||||
// new Content
|
||||
Content string
|
||||
|
||||
// new 属性 Attri
|
||||
Attri interface{}
|
||||
|
||||
// new 返回评分字段
|
||||
Fields interface{}
|
||||
}
|
||||
|
||||
// ScoredDoc scored the document
|
||||
type ScoredDoc struct {
|
||||
DocId uint64
|
||||
|
||||
// new 返回文档 Content
|
||||
Content string
|
||||
// new 返回文档属性 Attri
|
||||
Attri interface{}
|
||||
// new 返回评分字段
|
||||
Fields interface{}
|
||||
|
||||
// 文档的打分值
|
||||
// 搜索结果按照 Scores 的值排序,先按照第一个数排,
|
||||
// 如果相同则按照第二个数排序,依次类推。
|
||||
Scores []float32
|
||||
|
||||
// 用于生成摘要的关键词在文本中的字节位置,
|
||||
// 该切片长度和 SearchResp.Tokens 的长度一样
|
||||
// 只有当 IndexType == LocsIndex 时不为空
|
||||
TokenSnippetLocs []int
|
||||
|
||||
// 关键词出现的位置
|
||||
// 只有当 IndexType == LocsIndex 时不为空
|
||||
TokenLocs [][]int
|
||||
}
|
||||
|
||||
// ScoredDocs 为了方便排序
|
||||
type ScoredDocs []ScoredDoc
|
||||
|
||||
func (docs ScoredDocs) Len() int {
|
||||
return len(docs)
|
||||
}
|
||||
|
||||
func (docs ScoredDocs) Swap(i, j int) {
|
||||
docs[i], docs[j] = docs[j], docs[i]
|
||||
}
|
||||
|
||||
func (docs ScoredDocs) Less(i, j int) bool {
|
||||
// 为了从大到小排序,这实际上实现的是 More 的功能
|
||||
for iScore := 0; iScore < utils.MinInt(len(docs[i].Scores), len(docs[j].Scores)); iScore++ {
|
||||
if docs[i].Scores[iScore] > docs[j].Scores[iScore] {
|
||||
return true
|
||||
} else if docs[i].Scores[iScore] < docs[j].Scores[iScore] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return len(docs[i].Scores) > len(docs[j].Scores)
|
||||
}
|
||||
|
||||
/*
|
||||
______ .__ __. __ ____ ____ __ _______
|
||||
/ __ \ | \ | | | | \ \ / / | | | \
|
||||
| | | | | \| | | | \ \/ / | | | .--. |
|
||||
| | | | | . ` | | | \_ _/ | | | | | |
|
||||
| `--' | | |\ | | `----. | | | | | '--' |
|
||||
\______/ |__| \__| |_______| |__| |__| |_______/
|
||||
|
||||
*/
|
||||
|
||||
// ScoredID scored doc only id
|
||||
type ScoredID struct {
|
||||
DocId uint64
|
||||
|
||||
// 文档的打分值
|
||||
// 搜索结果按照 Scores 的值排序,先按照第一个数排,
|
||||
// 如果相同则按照第二个数排序,依次类推。
|
||||
Scores []float32
|
||||
|
||||
// 用于生成摘要的关键词在文本中的字节位置,
|
||||
// 该切片长度和 SearchResp.Tokens 的长度一样
|
||||
// 只有当 IndexType == LocsIndex 时不为空
|
||||
TokenSnippetLocs []int
|
||||
|
||||
// 关键词出现的位置
|
||||
// 只有当 IndexType == LocsIndex 时不为空
|
||||
TokenLocs [][]int
|
||||
}
|
||||
|
||||
// ScoredIDs 为了方便排序
|
||||
type ScoredIDs []ScoredID
|
||||
|
||||
func (docs ScoredIDs) Len() int {
|
||||
return len(docs)
|
||||
}
|
||||
|
||||
func (docs ScoredIDs) Swap(i, j int) {
|
||||
docs[i], docs[j] = docs[j], docs[i]
|
||||
}
|
||||
|
||||
func (docs ScoredIDs) Less(i, j int) bool {
|
||||
// 为了从大到小排序,这实际上实现的是 More 的功能
|
||||
for iScore := 0; iScore < utils.MinInt(len(docs[i].Scores), len(docs[j].Scores)); iScore++ {
|
||||
if docs[i].Scores[iScore] > docs[j].Scores[iScore] {
|
||||
return true
|
||||
} else if docs[i].Scores[iScore] < docs[j].Scores[iScore] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return len(docs[i].Scores) > len(docs[j].Scores)
|
||||
}
|
26
vendor/github.com/go-ego/riot/utils/BUILD.bazel
generated
vendored
Normal file
26
vendor/github.com/go-ego/riot/utils/BUILD.bazel
generated
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"test_utils.go",
|
||||
"utils.go",
|
||||
],
|
||||
importmap = "go-common/vendor/github.com/go-ego/riot/utils",
|
||||
importpath = "github.com/go-ego/riot/utils",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
1
vendor/github.com/go-ego/riot/utils/test_utils.go
generated
vendored
Normal file
1
vendor/github.com/go-ego/riot/utils/test_utils.go
generated
vendored
Normal file
@ -0,0 +1 @@
|
||||
package utils
|
31
vendor/github.com/go-ego/riot/utils/utils.go
generated
vendored
Normal file
31
vendor/github.com/go-ego/riot/utils/utils.go
generated
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright 2016 ego authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"): you may
|
||||
// not use this file except in compliance with the License. You may obtain
|
||||
// a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package utils
|
||||
|
||||
// AbsInt return to the opposite number
|
||||
func AbsInt(a int) int {
|
||||
if a < 0 {
|
||||
return -a
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// MinInt return to the small number
|
||||
func MinInt(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
Reference in New Issue
Block a user