| @@ -9,12 +9,7 @@ class Post < ApplicationRecord | |||||
| has_many :post_tags_with_discarded, -> { with_discarded }, class_name: 'PostTag' | has_many :post_tags_with_discarded, -> { with_discarded }, class_name: 'PostTag' | ||||
| has_many :tags, through: :active_post_tags | has_many :tags, through: :active_post_tags | ||||
| has_many :user_post_views, dependent: :destroy | has_many :user_post_views, dependent: :destroy | ||||
| has_many :post_similarities_as_post, | |||||
| class_name: 'PostSimilarity', | |||||
| foreign_key: :post_id | |||||
| has_many :post_similarities_as_target_post, | |||||
| class_name: 'PostSimilarity', | |||||
| foreign_key: :target_post_id | |||||
| has_many :post_similarities | |||||
| has_one_attached :thumbnail | has_one_attached :thumbnail | ||||
| before_validation :normalise_url | before_validation :normalise_url | ||||
| @@ -34,18 +29,12 @@ class Post < ApplicationRecord | |||||
| end | end | ||||
| def related(limit: nil) | def related(limit: nil) | ||||
| ids_with_cos = | |||||
| post_similarities_as_post.select(:target_post_id, :cos) | |||||
| .map { |ps| [ps.target_post_id, ps.cos] } + | |||||
| post_similarities_as_target_post.select(:post_id, :cos) | |||||
| .map { |ps| [ps.post_id, ps.cos] } | |||||
| ids = post_similarities.select(:target_post_id).order(cos: :desc) | |||||
| ids = ids.limit(limit) if limit | |||||
| ids = ids.pluck(:target_post_id) | |||||
| return [] if ids.empty? | |||||
| sorted = ids_with_cos.sort_by { |_, cos| -cos } | |||||
| ids = sorted.map(&:first) | |||||
| ids = ids.first(limit) if limit | |||||
| Post.where(id: ids).index_by(&:id).values_at(*ids) | |||||
| Post.where(id: ids).order(Arel.sql("FIELD(id, #{ ids.join(',') })")) | |||||
| end | end | ||||
| def resized_thumbnail! | def resized_thumbnail! | ||||
| @@ -1,6 +1,6 @@ | |||||
| class PostSimilarity < ApplicationRecord | class PostSimilarity < ApplicationRecord | ||||
| self.primary_key = :post_id, :target_post_id | self.primary_key = :post_id, :target_post_id | ||||
| belongs_to :post, class_name: 'Post', foreign_key: 'post_id' | |||||
| belongs_to :target_post, class_name: 'Post', foreign_key: 'target_post_id' | |||||
| belongs_to :post | |||||
| belongs_to :target_post, class_name: 'Post' | |||||
| end | end | ||||
| @@ -1,6 +1,6 @@ | |||||
| class TagSimilarity < ApplicationRecord | class TagSimilarity < ApplicationRecord | ||||
| self.primary_key = :tag_id, :target_tag_id | self.primary_key = :tag_id, :target_tag_id | ||||
| belongs_to :tag, class_name: 'Tag', foreign_key: 'tag_id' | |||||
| belongs_to :target_tag, class_name: 'Tag', foreign_key: 'target_tag_id' | |||||
| belongs_to :tag | |||||
| belongs_to :target_tag, class_name: 'Tag' | |||||
| end | end | ||||
| @@ -0,0 +1,106 @@ | |||||
| module Similarity | |||||
| class Calc | |||||
| def self.call model, tgt | |||||
| similarity_model = "#{ model.name }Similarity".constantize | |||||
| # 最大保存件数 | |||||
| n = 20 | |||||
| similarity_model.delete_all | |||||
| posts = model.includes(tgt).select(:id).to_a | |||||
| tag_ids = { } | |||||
| tag_cnts = { } | |||||
| posts.each do |p| | |||||
| arr = p.public_send(tgt).map(&:id).sort | |||||
| tag_ids[p.id] = arr | |||||
| tag_cnts[p.id] = arr.size | |||||
| end | |||||
| intersection_size = -> a, b do | |||||
| i = 0 | |||||
| j = 0 | |||||
| cnt = 0 | |||||
| while i < a.size && j < b.size | |||||
| a_i = a[i] | |||||
| b_j = b[j] | |||||
| if a_i == b_j | |||||
| cnt += 1 | |||||
| i += 1 | |||||
| j += 1 | |||||
| elsif a_i < b_j | |||||
| i += 1 | |||||
| else | |||||
| j += 1 | |||||
| end | |||||
| end | |||||
| cnt | |||||
| end | |||||
| push_topk = -> list, cos, target_id do | |||||
| return if list.size >= n && cos <= list[-1][0] | |||||
| idx = nil | |||||
| list.each_with_index do |(c, tid), i| | |||||
| if tid == target_id | |||||
| idx = i | |||||
| break | |||||
| end | |||||
| end | |||||
| if idx | |||||
| return if cos <= list[idx][0] | |||||
| list.delete_at(idx) | |||||
| end | |||||
| insert_at = list.size | |||||
| list.each_with_index do |(c, _), i| | |||||
| if cos > c | |||||
| insert_at = i | |||||
| break | |||||
| end | |||||
| end | |||||
| list.insert(insert_at, [cos, target_id]) | |||||
| list.pop if list.size > n | |||||
| end | |||||
| top = Hash.new { |h, key| h[key] = [] } | |||||
| ids = posts.map(&:id) | |||||
| ids.each_with_index do |post_id, i| | |||||
| a = tag_ids[post_id] | |||||
| a_cnt = tag_cnts[post_id] | |||||
| ((i + 1)...ids.size).each do |j| | |||||
| target_id = ids[j] | |||||
| b = tag_ids[target_id] | |||||
| b_cnt = tag_cnts[target_id] | |||||
| norm = Math.sqrt(a_cnt * b_cnt) | |||||
| cos = norm.zero? ? 0.0 : intersection_size.(a, b).fdiv(norm) | |||||
| push_topk.(top[post_id], cos, target_id) | |||||
| push_topk.(top[target_id], cos, post_id) | |||||
| end | |||||
| end | |||||
| buf = [] | |||||
| flush = -> do | |||||
| return if buf.empty? | |||||
| similarity_model.insert_all!(buf) | |||||
| buf.clear | |||||
| end | |||||
| top.each do |post_id, list| | |||||
| list.each do |cos, target_post_id| | |||||
| buf << { "#{ model.name.underscore }_id".to_sym => post_id, | |||||
| "target_#{ model.name.underscore }_id".to_sym => target_post_id, | |||||
| cos: } | |||||
| flush.call if buf.size >= 1_000 | |||||
| end | |||||
| end | |||||
| flush.call | |||||
| end | |||||
| end | |||||
| end | |||||
| @@ -0,0 +1,9 @@ | |||||
| class AddIndexToPostSimilarities < ActiveRecord::Migration[8.0] | |||||
| def change | |||||
| remove_index :post_similarities, name: 'index_post_similarities_on_post_id' | |||||
| add_index :post_similarities, [:post_id, :cos], | |||||
| order: { cos: :desc }, | |||||
| name: 'index_post_similarities_on_post_id_and_cos' | |||||
| end | |||||
| end | |||||
| @@ -0,0 +1,9 @@ | |||||
| class AddIndexToTagSimilarities < ActiveRecord::Migration[8.0] | |||||
| def change | |||||
| remove_index :tag_similarities, name: 'index_tag_similarities_on_tag_id' | |||||
| add_index :tag_similarities, [:tag_id, :cos], | |||||
| order: { cos: :desc }, | |||||
| name: 'index_tag_similarities_on_tag_id_and_cos' | |||||
| end | |||||
| end | |||||
| @@ -10,7 +10,7 @@ | |||||
| # | # | ||||
| # It's strongly recommended that you check this file into your version control system. | # It's strongly recommended that you check this file into your version control system. | ||||
| ActiveRecord::Schema[8.0].define(version: 2026_01_21_024800) do | |||||
| ActiveRecord::Schema[8.0].define(version: 2026_01_21_225600) do | |||||
| create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| | create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| | ||||
| t.string "name", null: false | t.string "name", null: false | ||||
| t.string "record_type", null: false | t.string "record_type", null: false | ||||
| @@ -59,7 +59,7 @@ ActiveRecord::Schema[8.0].define(version: 2026_01_21_024800) do | |||||
| t.bigint "post_id", null: false | t.bigint "post_id", null: false | ||||
| t.bigint "target_post_id", null: false | t.bigint "target_post_id", null: false | ||||
| t.float "cos", null: false | t.float "cos", null: false | ||||
| t.index ["post_id"], name: "index_post_similarities_on_post_id" | |||||
| t.index ["post_id", "cos"], name: "index_post_similarities_on_post_id_and_cos", order: { cos: :desc } | |||||
| t.index ["target_post_id"], name: "index_post_similarities_on_target_post_id" | t.index ["target_post_id"], name: "index_post_similarities_on_target_post_id" | ||||
| end | end | ||||
| @@ -130,7 +130,7 @@ ActiveRecord::Schema[8.0].define(version: 2026_01_21_024800) do | |||||
| t.bigint "tag_id", null: false | t.bigint "tag_id", null: false | ||||
| t.bigint "target_tag_id", null: false | t.bigint "target_tag_id", null: false | ||||
| t.float "cos", null: false | t.float "cos", null: false | ||||
| t.index ["tag_id"], name: "index_tag_similarities_on_tag_id" | |||||
| t.index ["tag_id", "cos"], name: "index_tag_similarities_on_tag_id_and_cos", order: { cos: :desc } | |||||
| t.index ["target_tag_id"], name: "index_tag_similarities_on_target_tag_id" | t.index ["target_tag_id"], name: "index_tag_similarities_on_target_tag_id" | ||||
| end | end | ||||
| @@ -1,28 +1,6 @@ | |||||
| namespace :post_similarity do | namespace :post_similarity do | ||||
| desc '関聯投稿テーブル作成' | desc '関聯投稿テーブル作成' | ||||
| task calc: :environment do | task calc: :environment do | ||||
| dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } } | |||||
| norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) } | |||||
| cos = -> a, b do | |||||
| na = norm.(a) | |||||
| nb = norm.(b) | |||||
| if na.zero? || nb.zero? | |||||
| 0.0 | |||||
| else | |||||
| dot.(a, b) / na / nb | |||||
| end | |||||
| end | |||||
| posts = Post.includes(:tags).to_a | |||||
| posts.each_with_index do |post, i| | |||||
| existence_of_tags = post.tags.index_with(1) | |||||
| ((i + 1)...posts.size).each do |j| | |||||
| target_post = posts[j] | |||||
| existence_of_target_tags = target_post.tags.index_with(1) | |||||
| PostSimilarity.find_or_initialize_by(post:, target_post:).tap { |ps| | |||||
| ps.cos = cos.(existence_of_tags, existence_of_target_tags) | |||||
| }.save! | |||||
| end | |||||
| end | |||||
| Similarity::Calc.call(Post, :tags) | |||||
| end | end | ||||
| end | end | ||||
| @@ -1,28 +1,6 @@ | |||||
| namespace :tag_similarity do | namespace :tag_similarity do | ||||
| desc '関聯タグ・テーブル作成' | desc '関聯タグ・テーブル作成' | ||||
| task calc: :environment do | task calc: :environment do | ||||
| dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } } | |||||
| norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) } | |||||
| cos = -> a, b do | |||||
| na = norm.(a) | |||||
| nb = norm.(b) | |||||
| if na.zero? || nb.zero? | |||||
| 0.0 | |||||
| else | |||||
| dot.(a, b) / na / nb | |||||
| end | |||||
| end | |||||
| tags = Tag.includes(:posts).to_a | |||||
| tags.each_with_index do |tag, i| | |||||
| existence_of_posts = tag.posts.index_with(1) | |||||
| ((i + 1)...tags.size).each do |j| | |||||
| target_tag = tags[j] | |||||
| existence_of_target_posts = target_tag.posts.index_with(1) | |||||
| TagSimilarity.find_or_initialize_by(tag:, target_tag:).tap { |ts| | |||||
| ts.cos = cos.(existence_of_posts, existence_of_target_posts) | |||||
| }.save! | |||||
| end | |||||
| end | |||||
| Similarity::Calc.call(Tag, :posts) | |||||
| end | end | ||||
| end | end | ||||