diff --git a/backend/db/migrate/20260409123700_create_post_versions.rb b/backend/db/migrate/20260409123700_create_post_versions.rb index b7a38d8..a2c6da7 100644 --- a/backend/db/migrate/20260409123700_create_post_versions.rb +++ b/backend/db/migrate/20260409123700_create_post_versions.rb @@ -1,3 +1,6 @@ +require 'set' + + class CreatePostVersions < ActiveRecord::Migration[8.0] class Post < ApplicationRecord self.table_name = 'posts' @@ -27,44 +30,122 @@ class CreatePostVersions < ActiveRecord::Migration[8.0] t.references :created_by_user, foreign_key: { to_table: :users } t.index [:post_id, :version_no], unique: true - t.check_constraint 'version_no > 0' - t.check_constraint "event_type IN ('create', 'update', 'discard', 'restore')" + t.check_constraint 'version_no > 0', + name: 'post_versions_version_no_positive' + t.check_constraint "event_type IN ('create', 'update', 'discard', 'restore')", + name: 'post_versions_event_type_valid' end + PostVersion.reset_column_information + say_with_time 'Backfilling post_versions' do Post.find_in_batches(batch_size: 500) do |posts| post_ids = posts.map(&:id) - tag_names_by_post_id = + post_tag_rows_by_post_id = PostTag .joins('INNER JOIN tags ON tags.id = post_tags.tag_id') .joins('INNER JOIN tag_names ON tag_names.id = tags.tag_name_id') .where(post_id: post_ids) - .where('post_tags.discarded_at IS NULL') - .where('tags.discarded_at IS NULL') - .where('tag_names.discarded_at IS NULL') - .pluck('post_tags.post_id', 'tag_names.name') - .each_with_object(Hash.new { |h, k| h[k] = [] }) do |(post_id, tag_name), h| - h[post_id] << tag_name + .pluck('post_tags.post_id', + 'post_tags.created_at', + 'post_tags.discarded_at', + 'post_tags.created_user_id', + 'post_tags.deleted_user_id', + 'tag_names.name') + .each_with_object(Hash.new { |h, k| h[k] = [] }) do |row, h| + post_id, created_at, discarded_at, created_user_id, deleted_user_id, tag_name = row + h[post_id] << { created_at:, + discarded_at:, + created_user_id:, + deleted_user_id:, + tag_name: } + end + + rows = [] + + posts.each do |post| + post_tag_rows = post_tag_rows_by_post_id[post.id] + + events = post_tag_rows.flat_map do |post_tag_row| + ary = [[post_tag_row[:created_at], + post_tag_row[:created_user_id], + :add, + post_tag_row[:tag_name]]] + + if post_tag_row[:discarded_at] + ary << [post_tag_row[:discarded_at], + post_tag_row[:deleted_user_id], + :remove, + post_tag_row[:tag_name]] end - rows = posts.map do |post| - tags = tag_names_by_post_id[post.id].uniq.sort.join(' ') - { post_id: post.id, - version_no: 1, - event_type: 'create', - title: post.title, - url: post.url, - thumbnail_base: post.thumbnail_base, - tags:, - parent_id: post.parent_id, - original_created_from: post.original_created_from, - original_created_before: post.original_created_before, - created_at: post.created_at, - created_by_user_id: post.uploaded_user_id } + ary + end + + kind_order = { add: 0, remove: 1 } + + events.sort_by! do |event_at, user_id, kind, tag_name| + [event_at, user_id || 0, kind_order.fetch(kind), tag_name] + end + + event_buckets = bucket_events(events) + + active_tags = Set.new + version_no = 0 + + if event_buckets.empty? + version_no += 1 + rows << build_row(post:, + version_no:, + event_type: 'create', + created_at: post.created_at, + created_by_user_id: post.uploaded_user_id, + tags: []) + next + end + + first_bucket = event_buckets.first + merge_first_bucket_into_create = first_bucket[:first_at] <= post.created_at + 1.second + + if merge_first_bucket_into_create + event_buckets.shift + apply_bucket!(active_tags, first_bucket) + + version_no += 1 + rows << build_row( + post:, + version_no:, + event_type: 'create', + created_at: post.created_at, + created_by_user_id: post.uploaded_user_id || first_bucket[:user_ids].compact.first, + tags: active_tags.to_a.sort) + else + version_no += 1 + rows << build_row( + post:, + version_no:, + event_type: 'create', + created_at: post.created_at, + created_by_user_id: post.uploaded_user_id, + tags: []) + end + + event_buckets.each do |bucket| + apply_bucket!(active_tags, bucket) + + version_no += 1 + rows << build_row( + post:, + version_no:, + event_type: 'update', + created_at: bucket[:first_at], + created_by_user_id: bucket[:user_ids].compact.first, + tags: active_tags.to_a.sort) + end end - PostVersion.insert_all!(rows) if rows.present? + PostVersion.insert_all!(rows) if rows.any? end end end @@ -72,4 +153,51 @@ class CreatePostVersions < ActiveRecord::Migration[8.0] def down drop_table :post_versions end + + private + + def bucket_events events + buckets = [] + + events.each do |event_at, user_id, kind, tag_name| + if buckets.empty? || event_at - buckets.last[:last_at] > 1.second + buckets << { first_at: event_at, + last_at: event_at, + user_ids: [user_id], + events: [[kind, tag_name]] } + else + bucket = buckets.last + bucket[:last_at] = event_at + bucket[:user_ids] << user_id + bucket[:events] << [kind, tag_name] + end + end + + buckets + end + + def apply_bucket! active_tags, bucket + bucket[:events].each do |kind, tag_name| + if kind == :add + active_tags.add(tag_name) + else + active_tags.delete(tag_name) + end + end + end + + def build_row post:, version_no:, event_type:, created_at:, created_by_user_id:, tags: + { post_id: post.id, + version_no:, + event_type:, + title: post.title, + url: post.url, + thumbnail_base: post.thumbnail_base, + tags: tags.join(' '), + parent_id: post.parent_id, + original_created_from: post.original_created_from, + original_created_before: post.original_created_before, + created_at:, + created_by_user_id: } + end end diff --git a/backend/db/schema.rb b/backend/db/schema.rb index 34a6930..42c7cd4 100644 --- a/backend/db/schema.rb +++ b/backend/db/schema.rb @@ -149,8 +149,8 @@ ActiveRecord::Schema[8.0].define(version: 2026_04_09_123700) do t.index ["parent_id"], name: "index_post_versions_on_parent_id" t.index ["post_id", "version_no"], name: "index_post_versions_on_post_id_and_version_no", unique: true t.index ["post_id"], name: "index_post_versions_on_post_id" - t.check_constraint "`event_type` in (_utf8mb4'create',_utf8mb4'update',_utf8mb4'discard',_utf8mb4'restore')" - t.check_constraint "`version_no` > 0" + t.check_constraint "`event_type` in (_utf8mb4'create',_utf8mb4'update',_utf8mb4'discard',_utf8mb4'restore')", name: "post_versions_event_type_valid" + t.check_constraint "`version_no` > 0", name: "post_versions_version_no_positive" end create_table "posts", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|