require 'set' class CreatePostVersions < ActiveRecord::Migration[8.0] class Post < ApplicationRecord self.table_name = 'posts' end class PostTag < ApplicationRecord self.table_name = 'post_tags' end class PostVersion < ApplicationRecord self.table_name = 'post_versions' end def up create_table :post_versions do |t| t.references :post, null: false, foreign_key: true t.integer :version_no, null: false t.string :event_type, null: false t.string :title t.string :url, limit: 768, null: false t.string :thumbnail_base, limit: 2000 t.text :tags, null: false t.references :parent, foreign_key: { to_table: :posts } t.datetime :original_created_from t.datetime :original_created_before t.datetime :created_at, null: false t.references :created_by_user, foreign_key: { to_table: :users } t.index [:post_id, :version_no], unique: true t.check_constraint 'version_no > 0', name: 'post_versions_version_no_positive' t.check_constraint "event_type IN ('create', 'update', 'discard', 'restore')", name: 'post_versions_event_type_valid' end PostVersion.reset_column_information say_with_time 'Backfilling post_versions' do Post.find_in_batches(batch_size: 500) do |posts| post_ids = posts.map(&:id) post_tag_rows_by_post_id = PostTag .joins('INNER JOIN tags ON tags.id = post_tags.tag_id') .joins('INNER JOIN tag_names ON tag_names.id = tags.tag_name_id') .where(post_id: post_ids) .pluck('post_tags.post_id', 'post_tags.created_at', 'post_tags.discarded_at', 'post_tags.created_user_id', 'post_tags.deleted_user_id', 'tag_names.name') .each_with_object(Hash.new { |h, k| h[k] = [] }) do |row, h| post_id, created_at, discarded_at, created_user_id, deleted_user_id, tag_name = row h[post_id] << { created_at:, discarded_at:, created_user_id:, deleted_user_id:, tag_name: } end rows = [] posts.each do |post| post_tag_rows = post_tag_rows_by_post_id[post.id] events = post_tag_rows.flat_map do |post_tag_row| ary = [[post_tag_row[:created_at], post_tag_row[:created_user_id], :add, post_tag_row[:tag_name]]] if post_tag_row[:discarded_at] ary << [post_tag_row[:discarded_at], post_tag_row[:deleted_user_id], :remove, post_tag_row[:tag_name]] end ary end kind_order = { add: 0, remove: 1 } events.sort_by! do |event_at, user_id, kind, tag_name| [event_at, user_id || 0, kind_order.fetch(kind), tag_name] end event_buckets = bucket_events(events) active_tags = Set.new version_no = 0 if event_buckets.empty? version_no += 1 rows << build_row(post:, version_no:, event_type: 'create', created_at: post.created_at, created_by_user_id: post.uploaded_user_id, tags: []) next end first_bucket = event_buckets.first merge_first_bucket_into_create = first_bucket[:first_at] <= post.created_at + 1.second if merge_first_bucket_into_create event_buckets.shift apply_bucket!(active_tags, first_bucket) version_no += 1 rows << build_row( post:, version_no:, event_type: 'create', created_at: post.created_at, created_by_user_id: post.uploaded_user_id || first_bucket[:user_ids].compact.first, tags: active_tags.to_a.sort) else version_no += 1 rows << build_row( post:, version_no:, event_type: 'create', created_at: post.created_at, created_by_user_id: post.uploaded_user_id, tags: []) end event_buckets.each do |bucket| apply_bucket!(active_tags, bucket) version_no += 1 rows << build_row( post:, version_no:, event_type: 'update', created_at: bucket[:first_at], created_by_user_id: bucket[:user_ids].compact.first, tags: active_tags.to_a.sort) end end PostVersion.insert_all!(rows) if rows.any? end end end def down drop_table :post_versions end private def bucket_events events buckets = [] events.each do |event_at, user_id, kind, tag_name| if buckets.empty? || event_at - buckets.last[:last_at] > 1.second buckets << { first_at: event_at, last_at: event_at, user_ids: [user_id], events: [[kind, tag_name]] } else bucket = buckets.last bucket[:last_at] = event_at bucket[:user_ids] << user_id bucket[:events] << [kind, tag_name] end end buckets end def apply_bucket! active_tags, bucket bucket[:events].each do |kind, tag_name| if kind == :add active_tags.add(tag_name) else active_tags.delete(tag_name) end end end def build_row post:, version_no:, event_type:, created_at:, created_by_user_id:, tags: { post_id: post.id, version_no:, event_type:, title: post.title, url: post.url, thumbnail_base: post.thumbnail_base, tags: tags.join(' '), parent_id: post.parent_id, original_created_from: post.original_created_from, original_created_before: post.original_created_before, created_at:, created_by_user_id: } end end