diff --git a/backend/app/models/post.rb b/backend/app/models/post.rb index 24af551..1bd0723 100644 --- a/backend/app/models/post.rb +++ b/backend/app/models/post.rb @@ -9,12 +9,7 @@ class Post < ApplicationRecord has_many :post_tags_with_discarded, -> { with_discarded }, class_name: 'PostTag' has_many :tags, through: :active_post_tags has_many :user_post_views, dependent: :destroy - has_many :post_similarities_as_post, - class_name: 'PostSimilarity', - foreign_key: :post_id - has_many :post_similarities_as_target_post, - class_name: 'PostSimilarity', - foreign_key: :target_post_id + has_many :post_similarities has_one_attached :thumbnail before_validation :normalise_url @@ -34,18 +29,12 @@ class Post < ApplicationRecord end def related(limit: nil) - ids_with_cos = - post_similarities_as_post.select(:target_post_id, :cos) - .map { |ps| [ps.target_post_id, ps.cos] } + - post_similarities_as_target_post.select(:post_id, :cos) - .map { |ps| [ps.post_id, ps.cos] } + ids = post_similarities.select(:target_post_id).order(cos: :desc) + ids = ids.limit(limit) if limit + ids = ids.pluck(:target_post_id) + return [] if ids.empty? - sorted = ids_with_cos.sort_by { |_, cos| -cos } - - ids = sorted.map(&:first) - ids = ids.first(limit) if limit - - Post.where(id: ids).index_by(&:id).values_at(*ids) + Post.where(id: ids).order(Arel.sql("FIELD(id, #{ ids.join(',') })")) end def resized_thumbnail! diff --git a/backend/app/models/post_similarity.rb b/backend/app/models/post_similarity.rb index 0f86550..c549b5e 100644 --- a/backend/app/models/post_similarity.rb +++ b/backend/app/models/post_similarity.rb @@ -1,6 +1,6 @@ class PostSimilarity < ApplicationRecord self.primary_key = :post_id, :target_post_id - belongs_to :post, class_name: 'Post', foreign_key: 'post_id' - belongs_to :target_post, class_name: 'Post', foreign_key: 'target_post_id' + belongs_to :post + belongs_to :target_post, class_name: 'Post' end diff --git a/backend/app/models/tag_similarity.rb b/backend/app/models/tag_similarity.rb index 7a6d252..dd66c1e 100644 --- a/backend/app/models/tag_similarity.rb +++ b/backend/app/models/tag_similarity.rb @@ -1,6 +1,6 @@ class TagSimilarity < ApplicationRecord self.primary_key = :tag_id, :target_tag_id - belongs_to :tag, class_name: 'Tag', foreign_key: 'tag_id' - belongs_to :target_tag, class_name: 'Tag', foreign_key: 'target_tag_id' + belongs_to :tag + belongs_to :target_tag, class_name: 'Tag' end diff --git a/backend/app/services/similarity/calc.rb b/backend/app/services/similarity/calc.rb new file mode 100644 index 0000000..40d717d --- /dev/null +++ b/backend/app/services/similarity/calc.rb @@ -0,0 +1,106 @@ +module Similarity + class Calc + def self.call model, tgt + similarity_model = "#{ model.name }Similarity".constantize + + # 最大保存件数 + n = 20 + + similarity_model.delete_all + + posts = model.includes(tgt).select(:id).to_a + + tag_ids = { } + tag_cnts = { } + + posts.each do |p| + arr = p.public_send(tgt).map(&:id).sort + tag_ids[p.id] = arr + tag_cnts[p.id] = arr.size + end + + intersection_size = -> a, b do + i = 0 + j = 0 + cnt = 0 + while i < a.size && j < b.size + a_i = a[i] + b_j = b[j] + if a_i == b_j + cnt += 1 + i += 1 + j += 1 + elsif a_i < b_j + i += 1 + else + j += 1 + end + end + cnt + end + + push_topk = -> list, cos, target_id do + return if list.size >= n && cos <= list[-1][0] + + idx = nil + list.each_with_index do |(c, tid), i| + if tid == target_id + idx = i + break + end + end + if idx + return if cos <= list[idx][0] + list.delete_at(idx) + end + + insert_at = list.size + list.each_with_index do |(c, _), i| + if cos > c + insert_at = i + break + end + end + list.insert(insert_at, [cos, target_id]) + list.pop if list.size > n + end + + top = Hash.new { |h, key| h[key] = [] } + + ids = posts.map(&:id) + ids.each_with_index do |post_id, i| + a = tag_ids[post_id] + a_cnt = tag_cnts[post_id] + + ((i + 1)...ids.size).each do |j| + target_id = ids[j] + b = tag_ids[target_id] + b_cnt = tag_cnts[target_id] + + norm = Math.sqrt(a_cnt * b_cnt) + cos = norm.zero? ? 0.0 : intersection_size.(a, b).fdiv(norm) + + push_topk.(top[post_id], cos, target_id) + push_topk.(top[target_id], cos, post_id) + end + end + + buf = [] + flush = -> do + return if buf.empty? + similarity_model.insert_all!(buf) + buf.clear + end + + top.each do |post_id, list| + list.each do |cos, target_post_id| + buf << { "#{ model.name.underscore }_id".to_sym => post_id, + "target_#{ model.name.underscore }_id".to_sym => target_post_id, + cos: } + flush.call if buf.size >= 1_000 + end + end + flush.call + end + end +end diff --git a/backend/db/migrate/20260121213900_add_index_to_post_similarities.rb b/backend/db/migrate/20260121213900_add_index_to_post_similarities.rb new file mode 100644 index 0000000..52a6ee1 --- /dev/null +++ b/backend/db/migrate/20260121213900_add_index_to_post_similarities.rb @@ -0,0 +1,9 @@ +class AddIndexToPostSimilarities < ActiveRecord::Migration[8.0] + def change + remove_index :post_similarities, name: 'index_post_similarities_on_post_id' + + add_index :post_similarities, [:post_id, :cos], + order: { cos: :desc }, + name: 'index_post_similarities_on_post_id_and_cos' + end +end diff --git a/backend/db/migrate/20260121225600_add_index_to_tag_similarities.rb b/backend/db/migrate/20260121225600_add_index_to_tag_similarities.rb new file mode 100644 index 0000000..c820f15 --- /dev/null +++ b/backend/db/migrate/20260121225600_add_index_to_tag_similarities.rb @@ -0,0 +1,9 @@ +class AddIndexToTagSimilarities < ActiveRecord::Migration[8.0] + def change + remove_index :tag_similarities, name: 'index_tag_similarities_on_tag_id' + + add_index :tag_similarities, [:tag_id, :cos], + order: { cos: :desc }, + name: 'index_tag_similarities_on_tag_id_and_cos' + end +end diff --git a/backend/db/schema.rb b/backend/db/schema.rb index ee14689..c6fc0ed 100644 --- a/backend/db/schema.rb +++ b/backend/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2026_01_21_024800) do +ActiveRecord::Schema[8.0].define(version: 2026_01_21_225600) do create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.string "name", null: false t.string "record_type", null: false @@ -59,7 +59,7 @@ ActiveRecord::Schema[8.0].define(version: 2026_01_21_024800) do t.bigint "post_id", null: false t.bigint "target_post_id", null: false t.float "cos", null: false - t.index ["post_id"], name: "index_post_similarities_on_post_id" + t.index ["post_id", "cos"], name: "index_post_similarities_on_post_id_and_cos", order: { cos: :desc } t.index ["target_post_id"], name: "index_post_similarities_on_target_post_id" end @@ -130,7 +130,7 @@ ActiveRecord::Schema[8.0].define(version: 2026_01_21_024800) do t.bigint "tag_id", null: false t.bigint "target_tag_id", null: false t.float "cos", null: false - t.index ["tag_id"], name: "index_tag_similarities_on_tag_id" + t.index ["tag_id", "cos"], name: "index_tag_similarities_on_tag_id_and_cos", order: { cos: :desc } t.index ["target_tag_id"], name: "index_tag_similarities_on_target_tag_id" end diff --git a/backend/lib/tasks/calc_post_similarities.rake b/backend/lib/tasks/calc_post_similarities.rake index 97e4fd2..925770f 100644 --- a/backend/lib/tasks/calc_post_similarities.rake +++ b/backend/lib/tasks/calc_post_similarities.rake @@ -1,28 +1,6 @@ namespace :post_similarity do desc '関聯投稿テーブル作成' task calc: :environment do - dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } } - norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) } - cos = -> a, b do - na = norm.(a) - nb = norm.(b) - if na.zero? || nb.zero? - 0.0 - else - dot.(a, b) / na / nb - end - end - - posts = Post.includes(:tags).to_a - posts.each_with_index do |post, i| - existence_of_tags = post.tags.index_with(1) - ((i + 1)...posts.size).each do |j| - target_post = posts[j] - existence_of_target_tags = target_post.tags.index_with(1) - PostSimilarity.find_or_initialize_by(post:, target_post:).tap { |ps| - ps.cos = cos.(existence_of_tags, existence_of_target_tags) - }.save! - end - end + Similarity::Calc.call(Post, :tags) end end diff --git a/backend/lib/tasks/calc_tag_similarities.rake b/backend/lib/tasks/calc_tag_similarities.rake index 9e16977..0fc718a 100644 --- a/backend/lib/tasks/calc_tag_similarities.rake +++ b/backend/lib/tasks/calc_tag_similarities.rake @@ -1,28 +1,6 @@ namespace :tag_similarity do desc '関聯タグ・テーブル作成' task calc: :environment do - dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } } - norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) } - cos = -> a, b do - na = norm.(a) - nb = norm.(b) - if na.zero? || nb.zero? - 0.0 - else - dot.(a, b) / na / nb - end - end - - tags = Tag.includes(:posts).to_a - tags.each_with_index do |tag, i| - existence_of_posts = tag.posts.index_with(1) - ((i + 1)...tags.size).each do |j| - target_tag = tags[j] - existence_of_target_posts = target_tag.posts.index_with(1) - TagSimilarity.find_or_initialize_by(tag:, target_tag:).tap { |ts| - ts.cos = cos.(existence_of_posts, existence_of_target_posts) - }.save! - end - end + Similarity::Calc.call(Tag, :posts) end end