Commit 5e22e139 by Greg Price

Add spelling correction to text search queries

If a search has no results, then we will get suggestions from
Elasticsearch, re-query with the top suggestion, and return the results
from that query along with the suggestion that was used.

This change also changes search so that a document must match all terms
to be included in the results instead of any term.
parent 84efbb62
......@@ -4,7 +4,8 @@ get "#{APIPREFIX}/search/threads" do
local_params = params # Necessary for params to be available inside blocks
sort_criteria = get_sort_criteria(local_params)
if !local_params["text"] || !sort_criteria
search_text = local_params["text"]
if !search_text || !sort_criteria
{}.to_json
else
page = (local_params["page"] || DEFAULT_PAGE).to_i
......@@ -17,36 +18,58 @@ get "#{APIPREFIX}/search/threads" do
# number of documents considered (ordered by update recency), which means
# that matching threads can be missed if the search terms are very common.
thread_ids = Set.new
self.class.trace_execution_scoped(["Custom/get_search_threads/es_search"]) do
search = Tire.search Content::ES_INDEX_NAME do
query do
match [:title, :body], local_params["text"]
filtered do
filter :term, :commentable_id => local_params["commentable_id"] if local_params["commentable_id"]
filter :terms, :commentable_id => local_params["commentable_ids"].split(",") if local_params["commentable_ids"]
filter :term, :course_id => local_params["course_id"] if local_params["course_id"]
if local_params["group_id"]
filter :or, [
{:not => {:exists => {:field => :group_id}}},
{:term => {:group_id => local_params["group_id"]}}
]
get_matching_thread_ids = lambda do |search_text|
self.class.trace_execution_scoped(["Custom/get_search_threads/es_search"]) do
search = Tire.search Content::ES_INDEX_NAME do
query do
match [:title, :body], search_text, :operator => "AND"
filtered do
filter :term, :commentable_id => local_params["commentable_id"] if local_params["commentable_id"]
filter :terms, :commentable_id => local_params["commentable_ids"].split(",") if local_params["commentable_ids"]
filter :term, :course_id => local_params["course_id"] if local_params["course_id"]
if local_params["group_id"]
filter :or, [
{:not => {:exists => {:field => :group_id}}},
{:term => {:group_id => local_params["group_id"]}}
]
end
end
end
sort do
by "updated_at", "desc"
end
size CommentService.config["max_deep_search_comment_count"].to_i
end
sort do
by "updated_at", "desc"
thread_ids = Set.new
search.results.each do |content|
case content.type
when "comment_thread"
thread_ids.add(content.id)
when "comment"
thread_ids.add(content.comment_thread_id)
end
end
size CommentService.config["max_deep_search_comment_count"].to_i
thread_ids
end
search.results.each do |content|
case content.type
when "comment_thread"
thread_ids.add(content.id)
when "comment"
thread_ids.add(content.comment_thread_id)
end
# Sadly, Elasticsearch does not have a facility for computing suggestions
# with respect to a filter. It would be expensive to determine the best
# suggestion with respect to our filter parameters, so we simply re-query
# with the top suggestion. If that has no results, then we return no results
# and no correction.
thread_ids = get_matching_thread_ids.call(search_text)
corrected_text = nil
if thread_ids.empty?
suggest = Tire.suggest Content::ES_INDEX_NAME do
suggestion "" do
text search_text
phrase :_all
end
end
corrected_text = suggest.results.texts.first
thread_ids = get_matching_thread_ids.call(corrected_text) if corrected_text
corrected_text = nil if thread_ids.empty?
end
results = nil
......@@ -76,6 +99,7 @@ get "#{APIPREFIX}/search/threads" do
self.class.trace_execution_scoped(['Custom/get_search_threads/json_serialize']) do
json_output = {
collection: collection,
corrected_text: corrected_text,
total_results: total_results,
num_pages: num_pages,
page: page,
......
......@@ -164,6 +164,72 @@ describe "app" do
end
end
describe "spelling correction" do
let(:commentable_id) {"test_commentable"}
def check_correction(original_text, corrected_text)
get "/api/v1/search/threads", text: original_text
last_response.should be_ok
result = parse(last_response.body)
result["corrected_text"].should == corrected_text
result["collection"].first.should_not be_nil
end
before(:each) do
thread = make_thread(author, "a thread about green artichokes", course_id, commentable_id)
make_comment(author, thread, "a comment about greed pineapples")
refresh_es_index
end
it "can correct a word appearing only in a comment" do
check_correction("pinapples", "pineapples")
end
it "can correct a word appearing only in a thread" do
check_correction("arichokes", "artichokes")
end
it "can correct a word appearing in both a comment and a thread" do
check_correction("abot", "about")
end
it "can correct a word with multiple errors" do
check_correction("artcokes", "artichokes")
end
it "can correct misspellings in different terms in the same search" do
check_correction("comment abot pinapples", "comment about pineapples")
end
it "does not correct a word that appears in a thread but has a correction and no matches in comments" do
check_correction("green", nil)
end
it "does not correct a word that appears in a comment but has a correction and no matches in threads" do
check_correction("greed", nil)
end
it "does not return a suggestion with no results" do
# Add documents containing a word that is close to our search term
# but that do not match our filter criteria; because we currently only
# consider the top suggestion returned by Elasticsearch without regard
# to the filter, and that suggestion in this case does not match any
# results, we should get back no results and no correction.
10.times do
thread = make_thread(author, "abbot", "other_course_id", "other_commentable_id")
thread.group_id = 1
thread.save!
end
refresh_es_index
get "/api/v1/search/threads", text: "abot", course_id: course_id
last_response.should be_ok
result = parse(last_response.body)
result["corrected_text"].should be_nil
result["collection"].should be_empty
end
end
it "returns the correct values for total_results and num_pages" do
course_id = "test/course/id"
for i in 1..100 do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment