| @@ -9,12 +9,7 @@ class Post < ApplicationRecord | |||
| has_many :post_tags_with_discarded, -> { with_discarded }, class_name: 'PostTag' | |||
| has_many :tags, through: :active_post_tags | |||
| has_many :user_post_views, dependent: :destroy | |||
| has_many :post_similarities_as_post, | |||
| class_name: 'PostSimilarity', | |||
| foreign_key: :post_id | |||
| has_many :post_similarities_as_target_post, | |||
| class_name: 'PostSimilarity', | |||
| foreign_key: :target_post_id | |||
| has_many :post_similarities | |||
| has_one_attached :thumbnail | |||
| before_validation :normalise_url | |||
| @@ -34,18 +29,12 @@ class Post < ApplicationRecord | |||
| end | |||
| def related(limit: nil) | |||
| ids_with_cos = | |||
| post_similarities_as_post.select(:target_post_id, :cos) | |||
| .map { |ps| [ps.target_post_id, ps.cos] } + | |||
| post_similarities_as_target_post.select(:post_id, :cos) | |||
| .map { |ps| [ps.post_id, ps.cos] } | |||
| ids = post_similarities.select(:target_post_id).order(cos: :desc) | |||
| ids = ids.limit(limit) if limit | |||
| ids = ids.pluck(:target_post_id) | |||
| return [] if ids.empty? | |||
| sorted = ids_with_cos.sort_by { |_, cos| -cos } | |||
| ids = sorted.map(&:first) | |||
| ids = ids.first(limit) if limit | |||
| Post.where(id: ids).index_by(&:id).values_at(*ids) | |||
| Post.where(id: ids).order(Arel.sql("FIELD(id, #{ ids.join(',') })")) | |||
| end | |||
| def resized_thumbnail! | |||
| @@ -1,6 +1,6 @@ | |||
| class PostSimilarity < ApplicationRecord | |||
| self.primary_key = :post_id, :target_post_id | |||
| belongs_to :post, class_name: 'Post', foreign_key: 'post_id' | |||
| belongs_to :target_post, class_name: 'Post', foreign_key: 'target_post_id' | |||
| belongs_to :post | |||
| belongs_to :target_post, class_name: 'Post' | |||
| end | |||
| @@ -1,6 +1,6 @@ | |||
| class TagSimilarity < ApplicationRecord | |||
| self.primary_key = :tag_id, :target_tag_id | |||
| belongs_to :tag, class_name: 'Tag', foreign_key: 'tag_id' | |||
| belongs_to :target_tag, class_name: 'Tag', foreign_key: 'target_tag_id' | |||
| belongs_to :tag | |||
| belongs_to :target_tag, class_name: 'Tag' | |||
| end | |||
| @@ -0,0 +1,106 @@ | |||
| module Similarity | |||
| class Calc | |||
| def self.call model, tgt | |||
| similarity_model = "#{ model.name }Similarity".constantize | |||
| # 最大保存件数 | |||
| n = 20 | |||
| similarity_model.delete_all | |||
| posts = model.includes(tgt).select(:id).to_a | |||
| tag_ids = { } | |||
| tag_cnts = { } | |||
| posts.each do |p| | |||
| arr = p.public_send(tgt).map(&:id).sort | |||
| tag_ids[p.id] = arr | |||
| tag_cnts[p.id] = arr.size | |||
| end | |||
| intersection_size = -> a, b do | |||
| i = 0 | |||
| j = 0 | |||
| cnt = 0 | |||
| while i < a.size && j < b.size | |||
| a_i = a[i] | |||
| b_j = b[j] | |||
| if a_i == b_j | |||
| cnt += 1 | |||
| i += 1 | |||
| j += 1 | |||
| elsif a_i < b_j | |||
| i += 1 | |||
| else | |||
| j += 1 | |||
| end | |||
| end | |||
| cnt | |||
| end | |||
| push_topk = -> list, cos, target_id do | |||
| return if list.size >= n && cos <= list[-1][0] | |||
| idx = nil | |||
| list.each_with_index do |(c, tid), i| | |||
| if tid == target_id | |||
| idx = i | |||
| break | |||
| end | |||
| end | |||
| if idx | |||
| return if cos <= list[idx][0] | |||
| list.delete_at(idx) | |||
| end | |||
| insert_at = list.size | |||
| list.each_with_index do |(c, _), i| | |||
| if cos > c | |||
| insert_at = i | |||
| break | |||
| end | |||
| end | |||
| list.insert(insert_at, [cos, target_id]) | |||
| list.pop if list.size > n | |||
| end | |||
| top = Hash.new { |h, key| h[key] = [] } | |||
| ids = posts.map(&:id) | |||
| ids.each_with_index do |post_id, i| | |||
| a = tag_ids[post_id] | |||
| a_cnt = tag_cnts[post_id] | |||
| ((i + 1)...ids.size).each do |j| | |||
| target_id = ids[j] | |||
| b = tag_ids[target_id] | |||
| b_cnt = tag_cnts[target_id] | |||
| norm = Math.sqrt(a_cnt * b_cnt) | |||
| cos = norm.zero? ? 0.0 : intersection_size.(a, b).fdiv(norm) | |||
| push_topk.(top[post_id], cos, target_id) | |||
| push_topk.(top[target_id], cos, post_id) | |||
| end | |||
| end | |||
| buf = [] | |||
| flush = -> do | |||
| return if buf.empty? | |||
| similarity_model.insert_all!(buf) | |||
| buf.clear | |||
| end | |||
| top.each do |post_id, list| | |||
| list.each do |cos, target_post_id| | |||
| buf << { "#{ model.name.underscore }_id".to_sym => post_id, | |||
| "target_#{ model.name.underscore }_id".to_sym => target_post_id, | |||
| cos: } | |||
| flush.call if buf.size >= 1_000 | |||
| end | |||
| end | |||
| flush.call | |||
| end | |||
| end | |||
| end | |||
| @@ -0,0 +1,9 @@ | |||
| class AddIndexToPostSimilarities < ActiveRecord::Migration[8.0] | |||
| def change | |||
| remove_index :post_similarities, name: 'index_post_similarities_on_post_id' | |||
| add_index :post_similarities, [:post_id, :cos], | |||
| order: { cos: :desc }, | |||
| name: 'index_post_similarities_on_post_id_and_cos' | |||
| end | |||
| end | |||
| @@ -0,0 +1,9 @@ | |||
| class AddIndexToTagSimilarities < ActiveRecord::Migration[8.0] | |||
| def change | |||
| remove_index :tag_similarities, name: 'index_tag_similarities_on_tag_id' | |||
| add_index :tag_similarities, [:tag_id, :cos], | |||
| order: { cos: :desc }, | |||
| name: 'index_tag_similarities_on_tag_id_and_cos' | |||
| end | |||
| end | |||
| @@ -10,7 +10,7 @@ | |||
| # | |||
| # It's strongly recommended that you check this file into your version control system. | |||
| ActiveRecord::Schema[8.0].define(version: 2026_01_21_024800) do | |||
| ActiveRecord::Schema[8.0].define(version: 2026_01_21_225600) do | |||
| create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| | |||
| t.string "name", null: false | |||
| t.string "record_type", null: false | |||
| @@ -59,7 +59,7 @@ ActiveRecord::Schema[8.0].define(version: 2026_01_21_024800) do | |||
| t.bigint "post_id", null: false | |||
| t.bigint "target_post_id", null: false | |||
| t.float "cos", null: false | |||
| t.index ["post_id"], name: "index_post_similarities_on_post_id" | |||
| t.index ["post_id", "cos"], name: "index_post_similarities_on_post_id_and_cos", order: { cos: :desc } | |||
| t.index ["target_post_id"], name: "index_post_similarities_on_target_post_id" | |||
| end | |||
| @@ -130,7 +130,7 @@ ActiveRecord::Schema[8.0].define(version: 2026_01_21_024800) do | |||
| t.bigint "tag_id", null: false | |||
| t.bigint "target_tag_id", null: false | |||
| t.float "cos", null: false | |||
| t.index ["tag_id"], name: "index_tag_similarities_on_tag_id" | |||
| t.index ["tag_id", "cos"], name: "index_tag_similarities_on_tag_id_and_cos", order: { cos: :desc } | |||
| t.index ["target_tag_id"], name: "index_tag_similarities_on_target_tag_id" | |||
| end | |||
| @@ -1,28 +1,6 @@ | |||
| namespace :post_similarity do | |||
| desc '関聯投稿テーブル作成' | |||
| task calc: :environment do | |||
| dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } } | |||
| norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) } | |||
| cos = -> a, b do | |||
| na = norm.(a) | |||
| nb = norm.(b) | |||
| if na.zero? || nb.zero? | |||
| 0.0 | |||
| else | |||
| dot.(a, b) / na / nb | |||
| end | |||
| end | |||
| posts = Post.includes(:tags).to_a | |||
| posts.each_with_index do |post, i| | |||
| existence_of_tags = post.tags.index_with(1) | |||
| ((i + 1)...posts.size).each do |j| | |||
| target_post = posts[j] | |||
| existence_of_target_tags = target_post.tags.index_with(1) | |||
| PostSimilarity.find_or_initialize_by(post:, target_post:).tap { |ps| | |||
| ps.cos = cos.(existence_of_tags, existence_of_target_tags) | |||
| }.save! | |||
| end | |||
| end | |||
| Similarity::Calc.call(Post, :tags) | |||
| end | |||
| end | |||
| @@ -1,28 +1,6 @@ | |||
| namespace :tag_similarity do | |||
| desc '関聯タグ・テーブル作成' | |||
| task calc: :environment do | |||
| dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } } | |||
| norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) } | |||
| cos = -> a, b do | |||
| na = norm.(a) | |||
| nb = norm.(b) | |||
| if na.zero? || nb.zero? | |||
| 0.0 | |||
| else | |||
| dot.(a, b) / na / nb | |||
| end | |||
| end | |||
| tags = Tag.includes(:posts).to_a | |||
| tags.each_with_index do |tag, i| | |||
| existence_of_posts = tag.posts.index_with(1) | |||
| ((i + 1)...tags.size).each do |j| | |||
| target_tag = tags[j] | |||
| existence_of_target_posts = target_tag.posts.index_with(1) | |||
| TagSimilarity.find_or_initialize_by(tag:, target_tag:).tap { |ts| | |||
| ts.cos = cos.(existence_of_posts, existence_of_target_posts) | |||
| }.save! | |||
| end | |||
| end | |||
| Similarity::Calc.call(Tag, :posts) | |||
| end | |||
| end | |||