|
- require 'set'
-
-
- class CreatePostVersions < ActiveRecord::Migration[8.0]
- class Post < ApplicationRecord
- self.table_name = 'posts'
- end
-
- class PostTag < ApplicationRecord
- self.table_name = 'post_tags'
- end
-
- class PostVersion < ApplicationRecord
- self.table_name = 'post_versions'
- end
-
- def up
- create_table :post_versions do |t|
- t.references :post, null: false, foreign_key: true
- t.integer :version_no, null: false
- t.string :event_type, null: false
- t.string :title
- t.string :url, limit: 768, null: false
- t.string :thumbnail_base, limit: 2000
- t.text :tags, null: false
- t.references :parent, foreign_key: { to_table: :posts }
- t.datetime :original_created_from
- t.datetime :original_created_before
- t.datetime :created_at, null: false
- t.references :created_by_user, foreign_key: { to_table: :users }
-
- t.index [:post_id, :version_no], unique: true
- t.check_constraint 'version_no > 0',
- name: 'post_versions_version_no_positive'
- t.check_constraint "event_type IN ('create', 'update', 'discard', 'restore')",
- name: 'post_versions_event_type_valid'
- end
-
- PostVersion.reset_column_information
-
- say_with_time 'Backfilling post_versions' do
- Post.find_in_batches(batch_size: 500) do |posts|
- post_ids = posts.map(&:id)
-
- post_tag_rows_by_post_id =
- PostTag
- .joins('INNER JOIN tags ON tags.id = post_tags.tag_id')
- .joins('INNER JOIN tag_names ON tag_names.id = tags.tag_name_id')
- .where(post_id: post_ids)
- .pluck('post_tags.post_id',
- 'post_tags.created_at',
- 'post_tags.discarded_at',
- 'post_tags.created_user_id',
- 'post_tags.deleted_user_id',
- 'tag_names.name')
- .each_with_object(Hash.new { |h, k| h[k] = [] }) do |row, h|
- post_id, created_at, discarded_at, created_user_id, deleted_user_id, tag_name = row
- h[post_id] << { created_at:,
- discarded_at:,
- created_user_id:,
- deleted_user_id:,
- tag_name: }
- end
-
- rows = []
-
- posts.each do |post|
- post_tag_rows = post_tag_rows_by_post_id[post.id]
-
- events = post_tag_rows.flat_map do |post_tag_row|
- ary = [[post_tag_row[:created_at],
- post_tag_row[:created_user_id],
- :add,
- post_tag_row[:tag_name]]]
-
- if post_tag_row[:discarded_at]
- ary << [post_tag_row[:discarded_at],
- post_tag_row[:deleted_user_id],
- :remove,
- post_tag_row[:tag_name]]
- end
-
- ary
- end
-
- kind_order = { add: 0, remove: 1 }
-
- events.sort_by! do |event_at, user_id, kind, tag_name|
- [event_at, user_id || 0, kind_order.fetch(kind), tag_name]
- end
-
- event_buckets = bucket_events(events)
-
- active_tags = Set.new
- version_no = 0
-
- if event_buckets.empty?
- version_no += 1
- rows << build_row(post:,
- version_no:,
- event_type: 'create',
- created_at: post.created_at,
- created_by_user_id: post.uploaded_user_id,
- tags: [])
- next
- end
-
- first_bucket = event_buckets.first
- merge_first_bucket_into_create = first_bucket[:first_at] <= post.created_at + 1.second
-
- if merge_first_bucket_into_create
- event_buckets.shift
- apply_bucket!(active_tags, first_bucket)
-
- version_no += 1
- rows << build_row(
- post:,
- version_no:,
- event_type: 'create',
- created_at: post.created_at,
- created_by_user_id: post.uploaded_user_id || first_bucket[:user_ids].compact.first,
- tags: active_tags.to_a.sort)
- else
- version_no += 1
- rows << build_row(
- post:,
- version_no:,
- event_type: 'create',
- created_at: post.created_at,
- created_by_user_id: post.uploaded_user_id,
- tags: [])
- end
-
- event_buckets.each do |bucket|
- apply_bucket!(active_tags, bucket)
-
- version_no += 1
- rows << build_row(
- post:,
- version_no:,
- event_type: 'update',
- created_at: bucket[:first_at],
- created_by_user_id: bucket[:user_ids].compact.first,
- tags: active_tags.to_a.sort)
- end
- end
-
- PostVersion.insert_all!(rows) if rows.any?
- end
- end
- end
-
- def down
- drop_table :post_versions
- end
-
- private
-
- def bucket_events events
- buckets = []
-
- events.each do |event_at, user_id, kind, tag_name|
- if buckets.empty? || event_at - buckets.last[:last_at] > 1.second
- buckets << { first_at: event_at,
- last_at: event_at,
- user_ids: [user_id],
- events: [[kind, tag_name]] }
- else
- bucket = buckets.last
- bucket[:last_at] = event_at
- bucket[:user_ids] << user_id
- bucket[:events] << [kind, tag_name]
- end
- end
-
- buckets
- end
-
- def apply_bucket! active_tags, bucket
- bucket[:events].each do |kind, tag_name|
- if kind == :add
- active_tags.add(tag_name)
- else
- active_tags.delete(tag_name)
- end
- end
- end
-
- def build_row post:, version_no:, event_type:, created_at:, created_by_user_id:, tags:
- { post_id: post.id,
- version_no:,
- event_type:,
- title: post.title,
- url: post.url,
- thumbnail_base: post.thumbnail_base,
- tags: tags.join(' '),
- parent_id: post.parent_id,
- original_created_from: post.original_created_from,
- original_created_before: post.original_created_before,
- created_at:,
- created_by_user_id: }
- end
- end
|