Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
cs_comments_service
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
cs_comments_service
Commits
5f90d8fd
Commit
5f90d8fd
authored
Jul 25, 2013
by
Kevin Chugh
Browse files
Options
Browse Files
Download
Plain Diff
fix merge conflict
parents
069a5475
55cc9867
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
248 additions
and
11 deletions
+248
-11
LICENSE.txt
+10
-0
Rakefile
+6
-2
app.rb
+6
-0
config/application.yml
+1
-0
config/mongoid.yml
+1
-0
lib/tasks/deep_search.rake
+70
-0
models/comment.rb
+12
-0
models/comment_thread.rb
+79
-9
spec/api/query_spec.rb
+63
-0
No files found.
LICENSE.txt
View file @
5f90d8fd
...
...
@@ -659,3 +659,13 @@ specific requirements.
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<http://www.gnu.org/licenses/>.
EdX Inc. wishes to state, in clarification of the above license terms, that
any public, independently available web service offered over the network and
communicating with edX's copyrighted works by any form of inter-service
communication, including but not limited to Remote Procedure Call (RPC)
interfaces, is not a work based on our copyrighted work within the meaning
of the license. "Corresponding Source" of this work, or works based on this
work, as defined by the terms of this license do not include source code
files for programs used solely to provide those public, independently
available web services.
Rakefile
View file @
5f90d8fd
...
...
@@ -224,8 +224,11 @@ namespace :db do
end
task
:reindex_search
=>
:environment
do
Mongoid
.
identity_map_enabled
=
false
#Mongoid.identity_map_enabled = false
#klasses = [CommentThread,Comment]
#klasses.each do |klass|
klass
=
CommentThread
ENV
[
'CLASS'
]
=
klass
.
name
ENV
[
'INDEX'
]
=
new_index
=
klass
.
tire
.
index
.
name
<<
'_'
<<
Time
.
now
.
strftime
(
'%Y%m%d%H%M%S'
)
...
...
@@ -249,7 +252,7 @@ namespace :db do
i
.
delete
if
i
.
exists?
end
else
puts
"[IMPORT] no aliases found. deleting index. creating new one
and setting up alias."
puts
"[IMPORT] no aliases found. deleting index. Creating new one for
#{
klass
}
and setting up alias."
klass
.
tire
.
index
.
delete
a
=
Tire
::
Alias
.
new
a
.
name
(
klass
.
tire
.
index
.
name
)
...
...
@@ -258,6 +261,7 @@ namespace :db do
end
puts
"[IMPORT] done. Index: '
#{
new_index
}
' created."
#end
end
task
:add_anonymous_to_peers
=>
:environment
do
...
...
app.rb
View file @
5f90d8fd
...
...
@@ -22,6 +22,10 @@ if ["staging", "production", "loadtest", "edgestage","edgeprod"].include? enviro
require
'newrelic_rpm'
end
if
ENV
[
"ENABLE_GC_PROFILER"
]
GC
::
Profiler
.
enable
end
set
:cache
,
Dalli
::
Client
.
new
application_yaml
=
ERB
.
new
(
File
.
read
(
"config/application.yml"
)).
result
()
...
...
@@ -48,6 +52,8 @@ DEFAULT_PER_PAGE = 20
if
RACK_ENV
.
to_s
!=
"test"
# disable api_key auth in test environment
before
do
#duct tape to avoid 401 on deep search performance test
#error 401 unless params[:api_key] == CommentService.config[:api_key] or true
error
401
unless
params
[
:api_key
]
==
CommentService
.
config
[
:api_key
]
end
end
...
...
config/application.yml
View file @
5f90d8fd
...
...
@@ -5,3 +5,4 @@ elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %>
cache_timeout
:
threads_search
:
10
threads_query
:
10
max_deep_search_comment_count
:
5000
config/mongoid.yml
View file @
5f90d8fd
...
...
@@ -22,6 +22,7 @@ production:
default
:
hosts
:
-
hurley.member0.mongohq.com:10000
-
hurley.member1.mongohq.com:10000
username
:
<%= ENV['MONGOHQ_USER'] %>
password
:
<%= ENV['MONGOHQ_PASS'] %>
database
:
app6929933
...
...
lib/tasks/deep_search.rake
0 → 100644
View file @
5f90d8fd
require
'rest_client'
roots
=
{}
roots
[
'development'
]
=
"http://localhost:8000"
roots
[
'test'
]
=
"http://localhost:8000"
roots
[
'production'
]
=
"http://edx.org"
roots
[
'staging'
]
=
"http://stage.edx.org"
ROOT
=
roots
[
ENV
[
'SINATRA_ENV'
]]
namespace
:deep_search
do
task
:performance
=>
:environment
do
#USAGE
#SINATRA_ENV=development rake kpis:prolific
#or
#SINATRA_ENV=development bundle exec rake kpis:prolific
#create comment and thread bodies
bodies
=
[]
50
.
times
do
|
i
|
bodies
<<
(
0
...
8
).
map
{
(
'a'
..
'z'
).
to_a
[
rand
(
26
)]
}.
join
end
parents
=
CommentThread
.
limit
(
100
)
#now create comments and threads with hits
puts
"Manufacturing Threads"
100
.
times
do
|
j
|
(
1
..
5
).
to_a
.
sample
.
times
do
|
i
|
c
=
CommentThread
.
new
c
.
course_id
=
'sample course'
c
.
title
=
'sample title'
c
.
commentable_id
=
'sample commetable'
c
.
body
=
bodies
.
sample
c
.
author
=
1
c
.
save
end
end
puts
"Manufacturing Comments"
100
.
times
do
|
j
|
(
1
..
5
).
to_a
.
sample
.
times
do
|
i
|
c
=
Comment
.
new
c
.
course_id
=
'sample course'
c
.
body
=
bodies
.
sample
c
.
comment_thread_id
=
parents
.
sample
.
id
c
.
author
=
1
c
.
save
end
end
sort_keys
=
%w[date activity votes comments]
sort_order
=
"desc"
#set the sinatra env to test to avoid 401'ing
set
:environment
,
:test
start_time
=
Time
.
now
puts
"Starting test at
#{
start_time
}
"
1000
.
times
do
|
i
|
query_params
=
{
course_id:
"1"
,
sort_key:
sort_keys
.
sample
,
sort_order:
sort_order
,
page:
1
,
per_page:
5
,
text:
bodies
.
sample
}
RestClient
.
get
"
#{
PREFIX
}
/threads"
,
params:
query_params
end
end_time
=
Time
.
now
puts
"Ending test at
#{
end_time
}
"
puts
"Total Time:
#{
(
end_time
-
start_time
).
to_f
}
seconds"
end
end
models/comment.rb
View file @
5f90d8fd
...
...
@@ -18,6 +18,18 @@ class Comment < Content
index
({
author_id:
1
,
course_id:
1
})
include
Tire
::
Model
::
Search
include
Tire
::
Model
::
Callbacks
mapping
do
indexes
:body
,
type: :string
,
analyzer: :snowball
,
stored:
true
,
term_vector: :with_positions_offsets
indexes
:course_id
,
type: :string
,
index: :not_analyzed
,
included_in_all:
false
#indexes :comment_thread_id, type: :string, stored: true, index: :not_analyzed, included_in_all: false
#current prod tire doesn't support indexing BSON ids, will reimplement when we upgrade
end
belongs_to
:comment_thread
,
index:
true
belongs_to
:author
,
class_name:
"User"
,
index:
true
after_save
:update_thread_index
...
...
models/comment_thread.rb
View file @
5f90d8fd
...
...
@@ -45,10 +45,8 @@ class CommentThread < Content
indexes
:commentable_id
,
type: :string
,
index: :not_analyzed
,
included_in_all:
false
indexes
:author_id
,
type: :string
,
as:
'author_id'
,
index: :not_analyzed
,
included_in_all:
false
indexes
:group_id
,
type: :integer
,
as:
'group_id'
,
index: :not_analyzed
,
included_in_all:
false
#indexes :pinned, type: :boolean, as: 'pinned', index: :not_analyzed, included_in_all: false
indexes
:comments
do
indexes
:body
,
analyzer:
'snowball'
end
indexes
:id
,
:index
=>
:not_analyzed
indexes
:thread_id
,
:analyzer
=>
:keyword
,
:as
=>
"_id"
end
belongs_to
:author
,
class_name:
"User"
,
inverse_of: :comment_threads
,
index:
true
#, autosave: true
...
...
@@ -96,6 +94,7 @@ class CommentThread < Content
end
def
self
.
perform_search
(
params
,
options
=
{})
page
=
[
1
,
options
[
:page
]
||
1
].
max
per_page
=
options
[
:per_page
]
||
20
sort_key
=
options
[
:sort_key
]
...
...
@@ -107,6 +106,23 @@ class CommentThread < Content
return
results
end
end
#GET /api/v1/search/threads?user_id=1&recursive=False&sort_key=date&│[2013-06-28 10:16:46,104][INFO ][plugins ] [Glamor] loaded [], sites []
#text=response&sort_order=desc&course_id=HarvardX%2FHLS1xD%2FCopyright&per_page=20&api_key=PUT_YOUR_API_KE│T1GYWxzZSZzb3J0X2tleT1kYXRlJnRleHQ9cmVzcG9uc2Umc29ydF9vcmRlcj1kZXNjJmNvdXJzZV9pZA==: initialized
#Y_HERE&page=1
#KChugh - Unfortunately, there's no algorithmically nice way to handle pagination with
#stitching together Comments and CommentThreads, because there is no determinstic relationship
#between the ordinality of comments and threads.
#the best solution is to find all of the thread ids for matching comment hits, and union them
#with the comment thread query, however, Tire does not support ORing a query key with a term filter
#so the 3rd best solution is to run two Tire searches (3 actually, one to query the comments, one to query the threads based on
#thread ids and the original thread search) and merge the results, uniqifying the results in the process.
#so first, find the comment threads associated with comments that hit the query
search
=
Tire
::
Search
::
Search
.
new
'comment_threads'
search
.
query
{
|
query
|
query
.
text
:_all
,
params
[
"text"
]}
if
params
[
"text"
]
search
.
highlight
({
title:
{
number_of_fragments:
0
}
}
,
{
body:
{
number_of_fragments:
0
}
},
options:
{
tag:
"<highlight>"
})
...
...
@@ -116,21 +132,77 @@ class CommentThread < Content
search
.
filter
(
:term
,
course_id:
params
[
"course_id"
])
if
params
[
"course_id"
]
if
params
[
"group_id"
]
search
.
filter
:or
,
[
{
:not
=>
{
:exists
=>
{
:field
=>
:group_id
}}},
{
:term
=>
{
:group_id
=>
params
[
"group_id"
]}}
]
end
search
.
sort
{
|
sort
|
sort
.
by
sort_key
,
sort_order
}
if
sort_key
&&
sort_order
#TODO should have search option 'auto sort or sth'
#again, b/c there is no relationship in ordinality, we cannot paginate if it's a text query
if
not
params
[
"text"
]
search
.
size
per_page
search
.
from
per_page
*
(
page
-
1
)
end
results
=
search
.
results
#if this is a search query, then also search the comments and harvest the matching comments
if
params
[
"text"
]
search
=
Tire
::
Search
::
Search
.
new
'comments'
search
.
query
{
|
query
|
query
.
text
:_all
,
params
[
"text"
]}
if
params
[
"text"
]
search
.
filter
(
:term
,
course_id:
params
[
"course_id"
])
if
params
[
"course_id"
]
search
.
size
CommentService
.
config
[
"max_deep_search_comment_count"
].
to_i
#unforutnately, we cannot paginate here, b/c we don't know how the ordinality is totally
#unrelated to that of threads
c_results
=
search
.
results
comment_ids
=
c_results
.
collect
{
|
c
|
c
.
id
}.
uniq
comments
=
Comment
.
where
(
:id
.
in
=>
comment_ids
)
thread_ids
=
comments
.
collect
{
|
c
|
c
.
comment_thread_id
}
#thread_ids = c_results.collect{|c| c.comment_thread_id}
#as soon as we can add comment thread id to the ES index, via Tire updgrade, we'll
#use ES instead of mongo to collect the thread ids
#use the elasticsearch index instead to avoid DB hit
original_thread_ids
=
results
.
collect
{
|
r
|
r
.
id
}
#now add the original search thread ids
thread_ids
+=
original_thread_ids
thread_ids
=
thread_ids
.
uniq
#now run one more search to harvest the threads and filter by group
search
=
Tire
::
Search
::
Search
.
new
'comment_threads'
search
.
filter
(
:terms
,
:thread_id
=>
thread_ids
)
search
.
filter
(
:terms
,
commentable_id:
params
[
"commentable_ids"
])
if
params
[
"commentable_ids"
]
search
.
filter
(
:term
,
course_id:
params
[
"course_id"
])
if
params
[
"course_id"
]
search
.
size
per_page
search
.
from
per_page
*
(
page
-
1
)
if
params
[
"group_id"
]
search
.
filter
:or
,
[
{
:not
=>
{
:exists
=>
{
:field
=>
:group_id
}}},
{
:term
=>
{
:group_id
=>
params
[
"group_id"
]}}
]
end
search
.
sort
{
|
sort
|
sort
.
by
sort_key
,
sort_order
}
if
sort_key
&&
sort_order
results
=
search
.
results
end
if
CommentService
.
config
[
:cache_enabled
]
Sinatra
::
Application
.
cache
.
set
(
memcached_key
,
results
,
CommentService
.
config
[
:cache_timeout
][
:threads_search
].
to_i
)
end
...
...
@@ -184,7 +256,6 @@ class CommentThread < Content
"group_id"
=>
group_id
,
"pinned"
=>
pinned?
,
"endorsed"
=>
endorsed?
)
if
params
[
:recursive
]
doc
=
doc
.
merge
(
"children"
=>
root_comments
.
map
{
|
c
|
c
.
to_hash
(
recursive:
true
)})
end
...
...
@@ -231,13 +302,12 @@ class CommentThread < Content
!!
(
tag
=~
RE_TAG
)
end
private
def
comment_thread_id
#so that we can use the comment thread id as a common attribute for flagging
self
.
id
end
private
RE_HEADCHAR
=
/[a-z0-9]/
RE_ENDONLYCHAR
=
/\+/
RE_ENDCHAR
=
/[a-z0-9\#]/
...
...
spec/api/query_spec.rb
0 → 100644
View file @
5f90d8fd
require
'spec_helper'
describe
"app"
do
describe
"thread search"
do
describe
"GET /api/v1/search/threads"
do
it
"returns thread with query match"
do
user
=
User
.
find
1
if
user
.
nil?
user
=
create_test_user
(
1
)
end
commentable
=
Commentable
.
new
(
"question_1"
)
random_string
=
(
0
...
8
).
map
{
(
'a'
..
'z'
).
to_a
[
rand
(
26
)]
}.
join
thread
=
CommentThread
.
new
(
title:
"Test title"
,
body:
random_string
,
course_id:
"1"
,
commentable_id:
commentable
.
id
)
thread
.
author
=
user
thread
.
save!
sleep
3
get
"/api/v1/search/threads"
,
text:
random_string
last_response
.
should
be_ok
threads
=
parse
(
last_response
.
body
)[
'collection'
]
threads
.
select
{
|
t
|
t
[
"id"
].
to_s
==
thread
.
id
.
to_s
}.
first
.
should_not
be_nil
end
end
end
describe
"comment search"
do
describe
"GET /api/v1/search/threads"
do
it
"returns thread with comment query match"
do
user
=
User
.
find
1
if
user
.
nil?
user
=
create_test_user
(
1
)
end
commentable
=
Commentable
.
new
(
"question_1"
)
random_string
=
(
0
...
8
).
map
{
(
'a'
..
'z'
).
to_a
[
rand
(
26
)]
}.
join
thread
=
CommentThread
.
new
(
title:
"Test title"
,
body:
"elephant otter"
,
course_id:
"1"
,
commentable_id:
commentable
.
id
)
thread
.
author
=
user
thread
.
save!
sleep
3
comment
=
Comment
.
new
(
body:
random_string
,
course_id:
"1"
,
commentable_id:
commentable
.
id
)
comment
.
author
=
user
comment
.
comment_thread
=
thread
comment
.
save!
sleep
1
get
"/api/v1/search/threads"
,
text:
random_string
last_response
.
should
be_ok
threads
=
parse
(
last_response
.
body
)[
'collection'
]
threads
.
select
{
|
t
|
t
[
"id"
].
to_s
==
thread
.
id
.
to_s
}.
first
.
should_not
be_nil
end
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment