Commit 5f90d8fd by Kevin Chugh

fix merge conflict

parents 069a5475 55cc9867
......@@ -659,3 +659,13 @@ specific requirements.
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<http://www.gnu.org/licenses/>.
EdX Inc. wishes to state, in clarification of the above license terms, that
any public, independently available web service offered over the network and
communicating with edX's copyrighted works by any form of inter-service
communication, including but not limited to Remote Procedure Call (RPC)
interfaces, is not a work based on our copyrighted work within the meaning
of the license. "Corresponding Source" of this work, or works based on this
work, as defined by the terms of this license do not include source code
files for programs used solely to provide those public, independently
available web services.
......@@ -133,8 +133,8 @@ namespace :db do
task :generate_comments, [:commentable_id, :num_threads, :num_top_comments, :num_subcomments] => :environment do |t, args|
args.with_defaults(:num_threads => THREADS_PER_COMMENTABLE,
:num_top_comments=>TOP_COMMENTS_PER_THREAD,
:num_subcomments=> ADDITIONAL_COMMENTS_PER_THREAD)
:num_top_comments=>TOP_COMMENTS_PER_THREAD,
:num_subcomments=> ADDITIONAL_COMMENTS_PER_THREAD)
generate_comments_for(args[:commentable_id], args[:num_threads], args[:num_top_comments], args[:num_subcomments])
end
......@@ -152,54 +152,54 @@ namespace :db do
coll = db.collection("contents")
args[:num].to_i.times do
doc = {"_type" => "CommentThread", "anonymous" => [true, false].sample, "at_position_list" => [],
"tags_array" => [],
"comment_count" => 0, "title" => Faker::Lorem.sentence(6), "author_id" => rand(1..10).to_s,
"body" => Faker::Lorem.paragraphs.join("\n\n"), "course_id" => COURSE_ID, "created_at" => Time.now,
"commentable_id" => COURSE_ID, "closed" => [true, false].sample, "updated_at" => Time.now, "last_activity_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc)
end
binding.pry
Tire.index('comment_threads').delete
CommentThread.create_elasticsearch_index
Tire.index('comment_threads') { import CommentThread.all }
end
task :seed_fast => :environment do
ADDITIONAL_COMMENTS_PER_THREAD = 20
config = YAML.load_file("config/mongoid.yml")[Sinatra::Base.environment]["sessions"]["default"]
connnection = Mongo::Connection.new(config["hosts"][0].split(":")[0], config["hosts"][0].split(":")[1])
db = Mongo::Connection.new.db(config["database"])
coll = db.collection("contents")
Comment.delete_all
CommentThread.each do |thread|
ADDITIONAL_COMMENTS_PER_THREAD.times do
doc = {"_type" => "Comment", "anonymous" => false, "at_position_list" => [],
"author_id" => rand(1..10).to_s, "body" => Faker::Lorem.paragraphs.join("\n\n"),
"comment_thread_id" => BSON::ObjectId.from_string(thread.id.to_s), "course_id" => COURSE_ID,
"created_at" => Time.now,
"endorsed" => [true, false].sample, "parent_ids" => [], "updated_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
"tags_array" => [],
"comment_count" => 0, "title" => Faker::Lorem.sentence(6), "author_id" => rand(1..10).to_s,
"body" => Faker::Lorem.paragraphs.join("\n\n"), "course_id" => COURSE_ID, "created_at" => Time.now,
"commentable_id" => COURSE_ID, "closed" => [true, false].sample, "updated_at" => Time.now, "last_activity_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc)
end
binding.pry
Tire.index('comment_threads').delete
CommentThread.create_elasticsearch_index
Tire.index('comment_threads') { import CommentThread.all }
end
end
task :seed => :environment do
task :seed_fast => :environment do
ADDITIONAL_COMMENTS_PER_THREAD = 20
config = YAML.load_file("config/mongoid.yml")[Sinatra::Base.environment]["sessions"]["default"]
connnection = Mongo::Connection.new(config["hosts"][0].split(":")[0], config["hosts"][0].split(":")[1])
db = Mongo::Connection.new.db(config["database"])
coll = db.collection("contents")
Comment.delete_all
CommentThread.each do |thread|
ADDITIONAL_COMMENTS_PER_THREAD.times do
doc = {"_type" => "Comment", "anonymous" => false, "at_position_list" => [],
"author_id" => rand(1..10).to_s, "body" => Faker::Lorem.paragraphs.join("\n\n"),
"comment_thread_id" => BSON::ObjectId.from_string(thread.id.to_s), "course_id" => COURSE_ID,
"created_at" => Time.now,
"endorsed" => [true, false].sample, "parent_ids" => [], "updated_at" => Time.now,
"votes" => {"count" => 0, "down" => [], "down_count" => 0, "point" => 0, "up" => [], "up_count" => []}}
coll.insert(doc)
end
end
end
Comment.delete_all
CommentThread.delete_all
CommentThread.recalculate_all_context_tag_weights!
User.delete_all
Notification.delete_all
Subscription.delete_all
Tire.index 'comment_threads' do delete end
CommentThread.create_elasticsearch_index
task :seed => :environment do
Comment.delete_all
CommentThread.delete_all
CommentThread.recalculate_all_context_tag_weights!
User.delete_all
Notification.delete_all
Subscription.delete_all
Tire.index 'comment_threads' do delete end
CommentThread.create_elasticsearch_index
beginning_time = Time.now
beginning_time = Time.now
users = (1..10).map {|id| create_test_user(id)}
users = (1..10).map {|id| create_test_user(id)}
# 3.times do
# other_user = users[1..9].sample
# users.first.subscribe(other_user)
......@@ -224,40 +224,44 @@ namespace :db do
end
task :reindex_search => :environment do
Mongoid.identity_map_enabled = false
klass = CommentThread
ENV['CLASS'] = klass.name
ENV['INDEX'] = new_index = klass.tire.index.name << '_' << Time.now.strftime('%Y%m%d%H%M%S')
#Mongoid.identity_map_enabled = false
Rake::Task["tire:import"].invoke
#klasses = [CommentThread,Comment]
#klasses.each do |klass|
klass = CommentThread
ENV['CLASS'] = klass.name
ENV['INDEX'] = new_index = klass.tire.index.name << '_' << Time.now.strftime('%Y%m%d%H%M%S')
puts '[IMPORT] about to swap index'
if a = Tire::Alias.find(klass.tire.index.name)
puts "[IMPORT] aliases found: #{Tire::Alias.find(klass.tire.index.name).indices.to_ary.join(',')}. deleting."
old_indices = Tire::Alias.find(klass.tire.index.name).indices
old_indices.each do |index|
a.indices.delete index
end
Rake::Task["tire:import"].invoke
a.indices.add new_index
a.save
puts '[IMPORT] about to swap index'
if a = Tire::Alias.find(klass.tire.index.name)
puts "[IMPORT] aliases found: #{Tire::Alias.find(klass.tire.index.name).indices.to_ary.join(',')}. deleting."
old_indices = Tire::Alias.find(klass.tire.index.name).indices
old_indices.each do |index|
a.indices.delete index
end
a.indices.add new_index
a.save
old_indices.each do |index|
puts "[IMPORT] deleting index: #{index}"
i = Tire::Index.new(index)
i.delete if i.exists?
old_indices.each do |index|
puts "[IMPORT] deleting index: #{index}"
i = Tire::Index.new(index)
i.delete if i.exists?
end
else
puts "[IMPORT] no aliases found. deleting index. Creating new one for #{klass} and setting up alias."
klass.tire.index.delete
a = Tire::Alias.new
a.name(klass.tire.index.name)
a.index(new_index)
a.save
end
else
puts "[IMPORT] no aliases found. deleting index. creating new one and setting up alias."
klass.tire.index.delete
a = Tire::Alias.new
a.name(klass.tire.index.name)
a.index(new_index)
a.save
end
puts "[IMPORT] done. Index: '#{new_index}' created."
puts "[IMPORT] done. Index: '#{new_index}' created."
#end
end
task :add_anonymous_to_peers => :environment do
......@@ -275,4 +279,4 @@ namespace :jobs do
task :work => :environment do
Delayed::Worker.new(:min_priority => ENV['MIN_PRIORITY'], :max_priority => ENV['MAX_PRIORITY'], :queues => (ENV['QUEUES'] || ENV['QUEUE'] || '').split(','), :quiet => false).start
end
end
end
\ No newline at end of file
......@@ -22,6 +22,10 @@ if ["staging", "production", "loadtest", "edgestage","edgeprod"].include? enviro
require 'newrelic_rpm'
end
if ENV["ENABLE_GC_PROFILER"]
GC::Profiler.enable
end
set :cache, Dalli::Client.new
application_yaml = ERB.new(File.read("config/application.yml")).result()
......@@ -48,6 +52,8 @@ DEFAULT_PER_PAGE = 20
if RACK_ENV.to_s != "test" # disable api_key auth in test environment
before do
#duct tape to avoid 401 on deep search performance test
#error 401 unless params[:api_key] == CommentService.config[:api_key] or true
error 401 unless params[:api_key] == CommentService.config[:api_key]
end
end
......
......@@ -5,3 +5,4 @@ elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %>
cache_timeout:
threads_search: 10
threads_query: 10
max_deep_search_comment_count: 5000
development:
sessions:
default:
database: cs_comments_service_development
hosts:
- localhost:27017
default:
database: cs_comments_service_development
hosts:
- localhost:27017
test:
sessions:
......@@ -22,6 +22,7 @@ production:
default:
hosts:
- hurley.member0.mongohq.com:10000
- hurley.member1.mongohq.com:10000
username: <%= ENV['MONGOHQ_USER'] %>
password: <%= ENV['MONGOHQ_PASS'] %>
database: app6929933
......
require 'rest_client'
roots = {}
roots['development'] = "http://localhost:8000"
roots['test'] = "http://localhost:8000"
roots['production'] = "http://edx.org"
roots['staging'] = "http://stage.edx.org"
ROOT = roots[ENV['SINATRA_ENV']]
namespace :deep_search do
task :performance => :environment do
#USAGE
#SINATRA_ENV=development rake kpis:prolific
#or
#SINATRA_ENV=development bundle exec rake kpis:prolific
#create comment and thread bodies
bodies = []
50.times do |i|
bodies << (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
end
parents = CommentThread.limit(100)
#now create comments and threads with hits
puts "Manufacturing Threads"
100.times do |j|
(1..5).to_a.sample.times do |i|
c = CommentThread.new
c.course_id = 'sample course'
c.title = 'sample title'
c.commentable_id = 'sample commetable'
c.body = bodies.sample
c.author = 1
c.save
end
end
puts "Manufacturing Comments"
100.times do |j|
(1..5).to_a.sample.times do |i|
c = Comment.new
c.course_id = 'sample course'
c.body = bodies.sample
c.comment_thread_id = parents.sample.id
c.author = 1
c.save
end
end
sort_keys = %w[date activity votes comments]
sort_order = "desc"
#set the sinatra env to test to avoid 401'ing
set :environment, :test
start_time = Time.now
puts "Starting test at #{start_time}"
1000.times do |i|
query_params = { course_id: "1", sort_key: sort_keys.sample, sort_order: sort_order, page: 1, per_page: 5, text: bodies.sample }
RestClient.get "#{PREFIX}/threads", params: query_params
end
end_time = Time.now
puts "Ending test at #{end_time}"
puts "Total Time: #{(end_time - start_time).to_f} seconds"
end
end
......@@ -17,6 +17,18 @@ class Comment < Content
field :at_position_list, type: Array, default: []
index({author_id: 1, course_id: 1})
include Tire::Model::Search
include Tire::Model::Callbacks
mapping do
indexes :body, type: :string, analyzer: :snowball, stored: true, term_vector: :with_positions_offsets
indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false
#indexes :comment_thread_id, type: :string, stored: true, index: :not_analyzed, included_in_all: false
#current prod tire doesn't support indexing BSON ids, will reimplement when we upgrade
end
belongs_to :comment_thread, index: true
belongs_to :author, class_name: "User", index: true
......
......@@ -45,10 +45,8 @@ class CommentThread < Content
indexes :commentable_id, type: :string, index: :not_analyzed, included_in_all: false
indexes :author_id, type: :string, as: 'author_id', index: :not_analyzed, included_in_all: false
indexes :group_id, type: :integer, as: 'group_id', index: :not_analyzed, included_in_all: false
#indexes :pinned, type: :boolean, as: 'pinned', index: :not_analyzed, included_in_all: false
indexes :comments do
indexes :body, analyzer: 'snowball'
end
indexes :id, :index => :not_analyzed
indexes :thread_id, :analyzer => :keyword, :as => "_id"
end
belongs_to :author, class_name: "User", inverse_of: :comment_threads, index: true#, autosave: true
......@@ -96,6 +94,7 @@ class CommentThread < Content
end
def self.perform_search(params, options={})
page = [1, options[:page] || 1].max
per_page = options[:per_page] || 20
sort_key = options[:sort_key]
......@@ -107,6 +106,23 @@ class CommentThread < Content
return results
end
end
#GET /api/v1/search/threads?user_id=1&recursive=False&sort_key=date&│[2013-06-28 10:16:46,104][INFO ][plugins ] [Glamor] loaded [], sites []
#text=response&sort_order=desc&course_id=HarvardX%2FHLS1xD%2FCopyright&per_page=20&api_key=PUT_YOUR_API_KE│T1GYWxzZSZzb3J0X2tleT1kYXRlJnRleHQ9cmVzcG9uc2Umc29ydF9vcmRlcj1kZXNjJmNvdXJzZV9pZA==: initialized
#Y_HERE&page=1
#KChugh - Unfortunately, there's no algorithmically nice way to handle pagination with
#stitching together Comments and CommentThreads, because there is no determinstic relationship
#between the ordinality of comments and threads.
#the best solution is to find all of the thread ids for matching comment hits, and union them
#with the comment thread query, however, Tire does not support ORing a query key with a term filter
#so the 3rd best solution is to run two Tire searches (3 actually, one to query the comments, one to query the threads based on
#thread ids and the original thread search) and merge the results, uniqifying the results in the process.
#so first, find the comment threads associated with comments that hit the query
search = Tire::Search::Search.new 'comment_threads'
search.query {|query| query.text :_all, params["text"]} if params["text"]
search.highlight({title: { number_of_fragments: 0 } } , {body: { number_of_fragments: 0 } }, options: { tag: "<highlight>" })
......@@ -114,23 +130,79 @@ class CommentThread < Content
search.filter(:term, commentable_id: params["commentable_id"]) if params["commentable_id"]
search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
if params["group_id"]
search.filter :or, [
{:not => {:exists => {:field => :group_id}}},
{:term => {:group_id => params["group_id"]}}
]
{:term => {:group_id => params["group_id"]}}
]
end
search.sort {|sort| sort.by sort_key, sort_order} if sort_key && sort_order #TODO should have search option 'auto sort or sth'
search.size per_page
search.from per_page * (page - 1)
#again, b/c there is no relationship in ordinality, we cannot paginate if it's a text query
if not params["text"]
search.size per_page
search.from per_page * (page - 1)
end
results = search.results
#if this is a search query, then also search the comments and harvest the matching comments
if params["text"]
search = Tire::Search::Search.new 'comments'
search.query {|query| query.text :_all, params["text"]} if params["text"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
search.size CommentService.config["max_deep_search_comment_count"].to_i
#unforutnately, we cannot paginate here, b/c we don't know how the ordinality is totally
#unrelated to that of threads
c_results = search.results
comment_ids = c_results.collect{|c| c.id}.uniq
comments = Comment.where(:id.in => comment_ids)
thread_ids = comments.collect{|c| c.comment_thread_id}
#thread_ids = c_results.collect{|c| c.comment_thread_id}
#as soon as we can add comment thread id to the ES index, via Tire updgrade, we'll
#use ES instead of mongo to collect the thread ids
#use the elasticsearch index instead to avoid DB hit
original_thread_ids = results.collect{|r| r.id}
#now add the original search thread ids
thread_ids += original_thread_ids
thread_ids = thread_ids.uniq
#now run one more search to harvest the threads and filter by group
search = Tire::Search::Search.new 'comment_threads'
search.filter(:terms, :thread_id => thread_ids)
search.filter(:terms, commentable_id: params["commentable_ids"]) if params["commentable_ids"]
search.filter(:term, course_id: params["course_id"]) if params["course_id"]
search.size per_page
search.from per_page * (page - 1)
if params["group_id"]
search.filter :or, [
{:not => {:exists => {:field => :group_id}}},
{:term => {:group_id => params["group_id"]}}
]
end
search.sort {|sort| sort.by sort_key, sort_order} if sort_key && sort_order
results = search.results
end
if CommentService.config[:cache_enabled]
Sinatra::Application.cache.set(memcached_key, results, CommentService.config[:cache_timeout][:threads_search].to_i)
end
......@@ -184,7 +256,6 @@ class CommentThread < Content
"group_id" => group_id,
"pinned" => pinned?,
"endorsed" => endorsed?)
if params[:recursive]
doc = doc.merge("children" => root_comments.map{|c| c.to_hash(recursive: true)})
end
......@@ -205,8 +276,8 @@ class CommentThread < Content
# unread count
if last_read_time
unread_count = self.comments.where(
:updated_at => {:$gte => last_read_time},
:author_id => {:$ne => params[:user_id]},
:updated_at => {:$gte => last_read_time},
:author_id => {:$ne => params[:user_id]},
).count
read = last_read_time >= self.updated_at
else
......@@ -220,8 +291,8 @@ class CommentThread < Content
end
doc = doc.merge("unread_comments_count" => unread_count)
.merge("read" => read)
.merge("comments_count" => comments_count)
.merge("read" => read)
.merge("comments_count" => comments_count)
doc
......@@ -231,13 +302,12 @@ class CommentThread < Content
!!(tag =~ RE_TAG)
end
private
def comment_thread_id
#so that we can use the comment thread id as a common attribute for flagging
self.id
end
private
RE_HEADCHAR = /[a-z0-9]/
RE_ENDONLYCHAR = /\+/
RE_ENDCHAR = /[a-z0-9\#]/
......
require 'spec_helper'
describe "app" do
describe "thread search" do
describe "GET /api/v1/search/threads" do
it "returns thread with query match" do
user = User.find 1
if user.nil?
user = create_test_user(1)
end
commentable = Commentable.new("question_1")
random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
thread = CommentThread.new(title: "Test title", body: random_string, course_id: "1", commentable_id: commentable.id)
thread.author = user
thread.save!
sleep 3
get "/api/v1/search/threads", text: random_string
last_response.should be_ok
threads = parse(last_response.body)['collection']
threads.select{|t| t["id"].to_s == thread.id.to_s}.first.should_not be_nil
end
end
end
describe "comment search" do
describe "GET /api/v1/search/threads" do
it "returns thread with comment query match" do
user = User.find 1
if user.nil?
user = create_test_user(1)
end
commentable = Commentable.new("question_1")
random_string = (0...8).map{ ('a'..'z').to_a[rand(26)] }.join
thread = CommentThread.new(title: "Test title", body: "elephant otter", course_id: "1", commentable_id: commentable.id)
thread.author = user
thread.save!
sleep 3
comment = Comment.new(body: random_string, course_id: "1", commentable_id: commentable.id)
comment.author = user
comment.comment_thread = thread
comment.save!
sleep 1
get "/api/v1/search/threads", text: random_string
last_response.should be_ok
threads = parse(last_response.body)['collection']
threads.select{|t| t["id"].to_s == thread.id.to_s}.first.should_not be_nil
end
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment