Commit c6ed850a authored by cmrd Senya's avatar cmrd Senya Committed by Benjamin Neff

Memory usage optimization for archive export

- Removed posts and non contacts from other's data
- Collections are exported in batches to lower memory footprint
- In base exporters create User object instead of keeping instance because it caches all associations

closes #7627
parent ea57fc5d
......@@ -12,6 +12,7 @@
* Use Bootstrap 3 progress-bar for polls [#7600](https://github.com/diaspora/diaspora/pull/7600)
* Enable frozen string literals [#7595](https://github.com/diaspora/diaspora/pull/7595)
* Remove `rails_admin_histories` table [#7597](https://github.com/diaspora/diaspora/pull/7597)
* Optimize memory usage on profile export [#7627](https://github.com/diaspora/diaspora/pull/7627)
## Bug fixes
* Fix displaying polls with long answers [#7579](https://github.com/diaspora/diaspora/pull/7579)
......
......@@ -4,33 +4,32 @@ module Export
class OthersDataSerializer < ActiveModel::Serializer
# Relayables of other people in the archive: comments, likes, participations, poll participations where author is
# the archive owner
has_many :relayables, each_serializer: FederationEntitySerializer
has_many :relayables, serializer: FlatMapArraySerializer, each_serializer: FederationEntitySerializer
# Parent posts of user's own relayables. We have to save metadata to use
# it in case when posts temporary unavailable on the target pod.
has_many :posts, each_serializer: FederationEntitySerializer
# Authors of posts where we participated and authors are not in contacts
has_many :non_contact_authors, each_serializer: PersonMetadataSerializer
def initialize(user_id)
@user_id = user_id
super(object)
end
private
def object
User.find(@user_id)
end
def relayables
%i[comments likes poll_participations].map {|relayable|
others_relayables.send(relayable)
}.sum
others_relayables.send(relayable).find_each(batch_size: 20)
}
end
def others_relayables
@others_relayables ||= Diaspora::Exporter::OthersRelayables.new(object.person_id)
end
def posts
@posts ||= Diaspora::Exporter::PostsWithActivity.new(object).query
end
def non_contact_authors
Diaspora::Exporter::NonContactAuthors.new(posts, object).query
# Avoid calling pointless #embedded_in_root_associations method
def serializable_data
{}
end
end
end
......@@ -29,7 +29,7 @@ module Export
end
def excluded_subscription_key
entity.public ? :subscribed_users_ids : :subscribed_pods_uris
object.public? ? :subscribed_users_ids : :subscribed_pods_uris
end
end
end
# frozen_string_literal: true
module Export
class PersonMetadataSerializer < ActiveModel::Serializer
attributes :guid,
:account_id,
:public_key
private
def account_id
object.diaspora_handle
end
def public_key
object.serialized_public_key
end
end
end
......@@ -18,12 +18,31 @@ module Export
has_many :followed_tags
has_many :post_subscriptions
has_many :relayables, each_serializer: Export::OwnRelayablesSerializer
has_many :relayables, serializer: FlatMapArraySerializer, each_serializer: Export::OwnRelayablesSerializer
def initialize(user_id, options={})
@user_id = user_id
super(object, options)
end
private
def object
User.find(@user_id)
end
def posts
object.posts.find_each(batch_size: 20)
end
def contacts
object.contacts.find_each(batch_size: 100)
end
def relayables
[*comments, *likes, *poll_participations]
[comments, likes, poll_participations].map {|relayable|
relayable.find_each(batch_size: 20)
}
end
%i[comments likes poll_participations].each {|collection|
......@@ -47,5 +66,10 @@ module Export
def post_subscriptions
Post.subscribed_by(object).pluck(:guid)
end
# Avoid calling pointless #embedded_in_root_associations method
def serializable_data
{}
end
end
end
......@@ -13,6 +13,6 @@ class FederationEntitySerializer < ActiveModel::Serializer
end
def entity
@entity ||= Diaspora::Federation::Entities.build(object)
Diaspora::Federation::Entities.build(object)
end
end
# frozen_string_literal: true
class FlatMapArraySerializer < ActiveModel::ArraySerializer
def serializable_object(options={})
@object.flat_map do |subarray|
subarray.map do |item|
serializer_for(item).serializable_object_with_notification(options)
end
end
end
end
......@@ -20,8 +20,8 @@ module Diaspora
def full_archive
{version: SERIALIZED_VERSION}
.merge(Export::UserSerializer.new(@user).as_json)
.merge(Export::OthersDataSerializer.new(@user).as_json)
.merge(Export::UserSerializer.new(@user.id).as_json)
.merge(Export::OthersDataSerializer.new(@user.id).as_json)
end
end
end
# frozen_string_literal: true
module Diaspora
class Exporter
# This class is capable of quering a list of people from authors of given posts that are non-contacts of a given
# user.
class NonContactAuthors
# @param posts [Post::ActiveRecord_Relation] posts that we fetch authors from to make authors list
# @param user [User] a user we fetch a contact list from
def initialize(posts, user)
@posts = posts
@user = user
end
# Create a request of non-contact authors of the posts for the user
# @return [Post::ActiveRecord_Relation]
def query
Person.where(id: non_contact_authors_ids)
end
private
def non_contact_authors_ids
posts_authors_ids - contacts_ids
end
def posts_authors_ids
posts.pluck(:author_id).uniq
end
def contacts_ids
user.contacts.pluck(:person_id)
end
attr_reader :posts, :user
end
end
end
# frozen_string_literal: true
module Diaspora
class Exporter
# This class allows to query posts where a person made any activity (submitted comments,
# likes, participations or poll participations).
class PostsWithActivity
# @param user [User] user who the activity belongs to (the one who liked, commented posts, etc)
def initialize(user)
@user = user
end
# Create a request of posts with activity
# @return [Post::ActiveRecord_Relation]
def query
Post.from("(#{sql_union_all_activities}) AS posts")
end
private
attr_reader :user
def person
user.person
end
def sql_union_all_activities
all_activities.map(&:to_sql).join(" UNION ")
end
def all_activities
[comments_activity, likes_activity, subscriptions, polls_activity, reshares_activity]
end
def likes_activity
other_people_posts.liked_by(person)
end
def comments_activity
other_people_posts.commented_by(person)
end
def subscriptions
other_people_posts.subscribed_by(user)
end
def reshares_activity
other_people_posts.reshared_by(person)
end
def polls_activity
StatusMessage.where.not(author_id: person.id).joins(:poll_participations)
.where(poll_participations: {author_id: person.id})
end
def other_people_posts
Post.where.not(author_id: person.id)
end
end
end
end
......@@ -201,7 +201,7 @@ describe Diaspora::Exporter do
expect(json).to include_json(user: {posts: [serialized]})
end
it "contains a reshare and its root" do
it "contains a reshare" do
reshare = FactoryGirl.create(:reshare, author: user.person)
serialized_reshare = {
"subscribed_pods_uris": [reshare.root.author.pod.url_to(""), AppConfig.pod_uri.to_s],
......@@ -216,21 +216,8 @@ describe Diaspora::Exporter do
}
}
status_message = reshare.root
serialized_parent = {
"entity_type": "status_message",
"entity_data": {
"author": status_message.diaspora_handle,
"guid": status_message.guid,
"created_at": status_message.created_at.iso8601,
"text": status_message.text,
"public": true
}
}
expect(json).to include_json(
user: {posts: [serialized_reshare]},
others_data: {posts: [serialized_parent]}
user: {posts: [serialized_reshare]}
)
end
......@@ -244,7 +231,7 @@ describe Diaspora::Exporter do
expect(json).to include_json(user: {post_subscriptions: [subscription.target.guid]})
end
it "contains a comment and the commented post" do
it "contains a comment" do
comment = FactoryGirl.create(:comment, author: user.person)
serialized_comment = {
"entity_type": "comment",
......@@ -258,25 +245,12 @@ describe Diaspora::Exporter do
"property_order": %w[author guid parent_guid text created_at]
}
status_message = comment.parent
serialized_post = {
"entity_type": "status_message",
"entity_data": {
"author": status_message.diaspora_handle,
"guid": status_message.guid,
"created_at": status_message.created_at.iso8601,
"text": status_message.text,
"public": false
}
}
expect(json).to include_json(
user: {relayables: [serialized_comment]},
others_data: {posts: [serialized_post]}
user: {relayables: [serialized_comment]}
)
end
it "contains a like and the liked post" do
it "contains a like" do
like = FactoryGirl.create(:like, author: user.person)
serialized_like = {
"entity_type": "like",
......@@ -290,25 +264,12 @@ describe Diaspora::Exporter do
"property_order": %w[author guid parent_guid parent_type positive]
}
status_message = like.target
serialized_post = {
"entity_type": "status_message",
"entity_data": {
"author": status_message.diaspora_handle,
"guid": status_message.guid,
"created_at": status_message.created_at.iso8601,
"text": status_message.text,
"public": false
}
}
expect(json).to include_json(
user: {relayables: [serialized_like]},
others_data: {posts: [serialized_post]}
user: {relayables: [serialized_like]}
)
end
it "contains a poll participation and post with this poll" do
it "contains a poll participation" do
poll_participation = FactoryGirl.create(:poll_participation, author: user.person)
serialized_participation = {
"entity_type": "poll_participation",
......@@ -321,38 +282,8 @@ describe Diaspora::Exporter do
"property_order": %w[author guid parent_guid poll_answer_guid]
}
poll = poll_participation.poll
status_message = poll_participation.status_message
serialized_post = {
"entity_type": "status_message",
"entity_data": {
"author": status_message.diaspora_handle,
"guid": status_message.guid,
"created_at": status_message.created_at.iso8601,
"text": status_message.text,
"poll": {
"entity_type": "poll",
"entity_data": {
"guid": poll.guid,
"question": poll.question,
"poll_answers": poll.poll_answers.map {|answer|
{
"entity_type": "poll_answer",
"entity_data": {
"guid": answer.guid,
"answer": answer.answer
}
}
}
}
},
"public": false
}
}
expect(json).to include_json(
user: {relayables: [serialized_participation]},
others_data: {posts: [serialized_post]}
user: {relayables: [serialized_participation]}
)
end
......@@ -409,23 +340,6 @@ describe Diaspora::Exporter do
expect(json).to include_json(others_data: {relayables: [serialized]})
end
it "contains metadata of a non-contact author of a post where we commented" do
comment = FactoryGirl.create(:comment, author: user.person)
author = comment.parent.author
expect(json).to include_json(
others_data: {
non_contact_authors: [
{
"guid": author.guid,
"account_id": author.diaspora_handle,
"public_key": author.serialized_public_key
}
]
}
)
end
def transform_value(value)
return value.iso8601 if value.is_a? Date
value
......
# frozen_string_literal: true
describe Diaspora::Exporter::NonContactAuthors do
describe "#query" do
let(:user) { FactoryGirl.create(:user_with_aspect) }
let(:post) { FactoryGirl.create(:status_message) }
let(:instance) {
Diaspora::Exporter::NonContactAuthors.new(Post.where(id: post.id), user)
}
context "without contact relationship" do
it "includes post author to the result set" do
expect(instance.query).to eq([post.author])
end
end
context "with contact relationship" do
before do
user.share_with(post.author, user.aspects.first)
end
it "doesn't include post author to the result set" do
expect(instance.query).to be_empty
end
end
end
end
# frozen_string_literal: true
describe Diaspora::Exporter::PostsWithActivity do
let(:user) { FactoryGirl.create(:user) }
let(:instance) { Diaspora::Exporter::PostsWithActivity.new(user) }
describe "#query" do
let(:activity) {
[
user.person.likes.first.target,
user.person.comments.first.parent,
user.person.poll_participations.first.parent.status_message,
user.person.participations.first.target,
user.person.posts.reshares.first.root
]
}
before do
DataGenerator.create(user, %i[activity participation])
end
it "returns all posts with person's activity" do
expect(instance.query).to match_array(activity)
end
end
end
......@@ -6,7 +6,7 @@ describe Diaspora::Exporter do
expect_any_instance_of(Export::UserSerializer).to receive(:as_json).and_return(user: "user_data")
expect_any_instance_of(Export::OthersDataSerializer).to receive(:as_json).and_return(others_date: "others_data")
json = Diaspora::Exporter.new(nil).execute
json = Diaspora::Exporter.new(FactoryGirl.create(:user)).execute
expect(json).to include_json(
version: "2.0",
user: "user_data",
......
......@@ -2,15 +2,7 @@
describe Export::OthersDataSerializer do
let(:user) { FactoryGirl.create(:user) }
let(:serializer) { Export::OthersDataSerializer.new(user) }
let(:others_posts) {
[
*user.person.likes.map(&:target),
*user.person.comments.map(&:parent),
*user.person.posts.reshares.map(&:root),
*user.person.poll_participations.map(&:status_message)
]
}
let(:serializer) { Export::OthersDataSerializer.new(user.id) }
it "uses FederationEntitySerializer for array serializing relayables" do
sm = DataGenerator.new(user).status_message_with_activity
......@@ -25,21 +17,5 @@ describe Export::OthersDataSerializer do
before do
DataGenerator.new(user).activity
end
it "uses FederationEntitySerializer for array serializing posts" do
expect(Export::OthersDataSerializer).to serialize_association(:posts)
.with_each_serializer(FederationEntitySerializer)
.with_objects(others_posts)
serializer.associations
end
it "uses PersonMetadataSerializer for array serializing non_contact_authors" do
non_contact_authors = others_posts.map(&:author)
expect(Export::OthersDataSerializer).to serialize_association(:non_contact_authors)
.with_each_serializer(Export::PersonMetadataSerializer)
.with_objects(non_contact_authors)
serializer.associations
end
end
end
# frozen_string_literal: true
describe Export::PersonMetadataSerializer do
let(:person) { FactoryGirl.create(:person) }
let(:serializer) { Export::PersonMetadataSerializer.new(person) }
it "has person metadata attributes" do
expect(serializer.attributes).to eq(
guid: person.guid,
account_id: person.diaspora_handle,
public_key: person.serialized_public_key
)
end
end
......@@ -2,7 +2,7 @@
describe Export::UserSerializer do
let(:user) { FactoryGirl.create(:user) }
let(:serializer) { Export::UserSerializer.new(user, root: false) }
let(:serializer) { Export::UserSerializer.new(user.id, root: false) }
it "has basic user's attributes" do
expect(serializer.attributes).to eq(
......
......@@ -15,6 +15,7 @@
RSpec::Matchers.define :serialize_association do |association_name|
match do |root_serializer_class|
association = fetch_association(root_serializer_class, association_name)
@serializer_from_options = association.serializer_from_options
execute_receive_matcher_with(association)
end
......@@ -50,7 +51,11 @@ RSpec::Matchers.define :serialize_association do |association_name|
def with_object_expectation(object)
if association_object.is_a?(Array)
expect(object).to match_array(association_object)
if serializer_class == FlatMapArraySerializer
expect(object.flat_map(&:to_a)).to match_array(association_object)
else
expect(object).to match_array(association_object)
end
elsif !association_object.nil?
expect(object).to eq(association_object)
end
......@@ -66,6 +71,7 @@ RSpec::Matchers.define :serialize_association do |association_name|
def pick_serializer_class
return association_serializer_class unless association_serializer_class.nil?
return @serializer_from_options unless @serializer_from_options.nil?
return ActiveModel::ArraySerializer unless each_serializer_class.nil?
end
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment