Commit 113dfb91 by Jim Abramson

Merge pull request #51 from edx/feature/jsa/new-sk

remove recursive queries in comment thread views
parents e2f626af f44803cf
Change Log
----------
These are notable changes in cs_comments_service. This is a rolling list of changes,
in roughly chronological order, most recent first. Add your entries at or near
the top. Include a label indicating the component affected.
**models:** added a new sorting key and index to `Comment` documents, removing the need
for certain hierarchical db queries. Also added a copy of the author's username
to `Comment` and `CommentThread` models, to reduce the number db queries.
IMPORTANT: these changes require a data backpopulation to be run BEFORE deploying
updated code. The backpopulation script is located at
scripts/db/migrate-001-sk-author_username.js
and should be run directly against your MongoDB instance.
......@@ -17,8 +17,16 @@ class Comment < Content
field :at_position_list, type: Array, default: []
index({author_id: 1, course_id: 1})
field :sk, type: String, default: nil
before_save :set_sk
def set_sk()
# this attribute is explicitly write-once
if self.sk.nil?
self.sk = (self.parent_ids << self.id).join("-")
end
end
include Tire::Model::Search
include Tire::Model::Callbacks
......@@ -77,19 +85,14 @@ class Comment < Content
end
end
if params[:recursive]
# TODO: remove and reuse the new hierarchical sort keys if possible
subtree_hash = subtree(sort: sort_by_parent_and_time)
# Flatten out the subtree and fetch all users in bulk; makes getting the
# usernames faster. Should probably denormalize usernames.
flattened_subtree = Comment.flatten_subtree(subtree_hash)
User.only(:username).find(flattened_subtree.map{|x| x.author_id})
self.class.hash_tree(subtree_hash).first
else
as_document.slice(*%w[body course_id endorsed anonymous anonymous_to_peers created_at updated_at at_position_list])
.merge("id" => _id)
.merge("user_id" => author_id)
.merge("username" => author.nil? ? "na" : author.username) # avoid crashing to_hash on orphaned comments
.merge("username" => author_username)
.merge("depth" => depth)
.merge("closed" => comment_thread.nil? ? false : comment_thread.closed) # ditto
.merge("thread_id" => comment_thread_id)
......
......@@ -258,11 +258,41 @@ class CommentThread < Content
"pinned" => pinned?,
"endorsed" => endorsed?)
if params[:recursive]
doc = doc.merge("children" => root_comments.map{|c| c.to_hash(recursive: true)})
doc = doc.merge("children" => [])
rs = Comment.where(comment_thread_id: self.id).order_by({"sk"=> 1})
ancestry = [doc]
comments_count = 0
# weave the fetched comments into a single hierarchical doc
rs.each do | comment |
h = comment.to_hash.merge("children" => [])
parent_id = comment.parent_id || self.id
found_parent = false
while ancestry.length > 0 do
if parent_id == ancestry.last["id"] then
# found the children collection to which this comment belongs
ancestry.last["children"] << h
ancestry << h
found_parent = true
comments_count += 1
break
else
# try again with one level back in the ancestry til we find the parent
ancestry.pop
next
end
end
if not found_parent
# if we arrive here, it means a parent_id somewhere in the result set
# is pointing to an invalid place.
msg = "recursion ended: thread_id=#{self.id} comment_id=#{comment.id} parent_ids=#{comment.parent_ids} sk=#{comment.sk}"
logger.warn msg
ancestry = [doc]
end
end
else
comments_count = comments.count
end
comments_count = comments.count
if params[:user_id]
user = User.find_or_create_by(external_id: params[:user_id])
read_state = user.read_states.where(course_id: self.course_id).first
......
......@@ -5,7 +5,14 @@ class Content
field :visible, type: Boolean, default: true
field :abuse_flaggers, type: Array, default: []
field :historical_abuse_flaggers, type: Array, default: [] #preserve abuse flaggers after a moderator unflags
field :author_username, type: String, default: nil
before_save :set_username
def set_username
# avoid having to look this attribute up later, since it does not change
self.author_username = author.username
end
def author_with_anonymity(attr=nil, attr_when_anonymous=nil)
if not attr
(anonymous || anonymous_to_peers) ? nil : author
......
print ("backpopulating author_username into contents collection");
var tot = db.users.count();
print ("found " + tot + " users to process...");
var cnt = 0;
db.users.find({}, {external_id:1, username:1}).forEach(function (doc) {
db.contents.update(
{author_id:doc["external_id"], author_username:{$exists:false}},
{$set:{author_username:doc["username"]}},
{multi:true}
);
cnt += 1;
if (cnt == tot) {
print("done!");
} else if (cnt % 1000 === 0) {
print("processed " + cnt + " records (" + parseInt((cnt/tot)*100) + "% complete)");
}
});
print ("backpopulating content with orphaned author ids");
db.contents.update({author_username:{$exists:false}}, {$set:{author_username:null}}, {multi:true});
print ("done!");
print ("backpopulating hierarchical sorting keys into contents collection");
var tot = db.contents.find({"_type":"Comment","sk":{$exists:false}}).count();
print ("found " + tot + " comments to process...");
var cnt = 0;
db.contents.find({"_type":"Comment","sk":{$exists:false}}).forEach(function (doc) {
var i, sort_ids;
if (typeof(doc.sk)==="undefined") {
if (typeof(doc.parent_ids)==="undefined") {
sort_ids = [];
} else {
sort_ids = doc.parent_ids;
}
sort_ids.push(doc._id);
doc.sk = sort_ids.join("-");
db.contents.save(doc);
}
cnt += 1;
if (cnt == tot) {
print("done!");
} else if (cnt % 1000 === 0) {
print("processed " + cnt + " records (" + parseInt((cnt/tot)*100) + "% complete)");
}
});
print ("creating index on new sorting keys...");
db.contents.ensureIndex({"sk":1})
print ("all done!");
print ("removing fields 'sk' and 'author_username' from contents collection...");
db.contents.update({}, {$unset:{"sk":1, "author_username":1}}, { multi: true });
print ("removing index on contents.sk");
db.contents.dropIndex({"sk":1});
print ("all done!");
......@@ -44,4 +44,131 @@ describe CommentThread do
end
end
end
context "#to_hash (recursive=true)" do
before(:each) { @cid_seq = 10 }
def stub_each_from_array(obj, ary)
stub = obj.stub(:each)
ary.each {|v| stub = stub.and_yield(v)}
obj
end
def set_comment_results(thread, ary)
# avoids an unrelated expecation error
thread.stub(:endorsed?).and_return(true)
rs = stub_each_from_array(double("rs"), ary)
criteria = double("criteria")
criteria.stub(:order_by).and_return(rs)
Comment.stub(:where).with(comment_thread_id: thread.id).and_return(criteria)
end
def make_comment(parent=nil)
c = Comment.new
c.id = @cid_seq
@cid_seq += 1
c.parent_id = parent.nil? ? nil : parent.id
c
end
it "nests comments in the correct order" do
thread = CommentThread.new
thread.id = 1
thread.author = User.new
c0 = make_comment()
c00 = make_comment(c0)
c01 = make_comment(c0)
c010 = make_comment(c01)
set_comment_results(thread, [c0, c00, c01, c010])
h = thread.to_hash({:recursive => true})
h["children"].size.should == 1 # c0
h["children"][0]["id"].should == c0.id
h["children"][0]["children"].size.should == 2 # c00, c01
h["children"][0]["children"][0]["id"].should == c00.id
h["children"][0]["children"][1]["id"].should == c01.id
h["children"][0]["children"][1]["children"].size.should == 1 # c010
h["children"][0]["children"][1]["children"][0]["id"].should == c010.id
h["comments_count"].should == 4
end
it "handles orphaned child comments gracefully" do
thread = CommentThread.new
thread.id = 33
thread.author = User.new
c0 = make_comment()
c00 = make_comment(c0)
c000 = make_comment(c00)
c1 = make_comment()
c10 = make_comment(c1)
c11 = make_comment(c1)
c111 = make_comment(c11)
# lose c0 and c11 from result set. their descendants should
# be silently skipped over.
set_comment_results(thread, [c00, c000, c1, c10, c111])
h = thread.to_hash({:recursive => true})
h["children"].size.should == 1 # c1
h["children"][0]["id"].should == c1.id
h["children"][0]["children"].size.should == 1 # c10
h["children"][0]["children"][0]["id"].should == c10.id
h["comments_count"].should == 2
end
end
context "sorting" do
before (:each) do
[Comment, CommentThread, User].each(&:delete_all).each(&:remove_indexes).each(&:create_indexes)
end
it "indexes comments in hierarchical order" do
author = create_test_user('billy')
thread = CommentThread.new(title: "test case", body: "testing 123", course_id: "foo", commentable_id: "bar")
thread.author = author
thread.save!
a = thread.comments.new(body: "a", course_id: "foo")
a.author = author
a.save!
b = a.children.new(body: "b", course_id: "foo")
b.author = author
b.comment_thread = thread
b.save!
c = b.children.new(body: "c", course_id: "foo")
c.author = author
c.comment_thread = thread
c.save!
d = b.children.new(body: "d", course_id: "foo")
d.author = author
d.comment_thread = thread
d.save!
e = a.children.new(body: "e", course_id: "foo")
e.author = author
e.comment_thread = thread
e.save!
f = thread.comments.new(body: "f", course_id: "foo")
f.author = author
f.save!
seq = []
rs = Comment.where(comment_thread_id: thread.id).order_by({"sk"=>1})
rs.each.map {|c| seq << c.body}
seq.should == ["a", "b", "c", "d", "e", "f"]
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment