feat: 類似度算出バッチ修正,ほか(#228) (#232)

#228

#228

#228

Co-authored-by: miteruzo <miteruzo@naver.com>
Reviewed-on: #232
This commit was merged in pull request #232.
This commit is contained in:
2026-01-22 23:30:08 +09:00
parent 86209dcc84
commit f6de272f55
18 changed files with 553 additions and 76 deletions
+10 -4
View File
@@ -49,13 +49,19 @@ class TagsController < ApplicationController
return head :unauthorized unless current_user return head :unauthorized unless current_user
return head :forbidden unless current_user.member? return head :forbidden unless current_user.member?
name = params[:name].presence
category = params[:category].presence
tag = Tag.find(params[:id]) tag = Tag.find(params[:id])
attrs = { name: params[:name].presence, if name.present?
category: params[:category].presence }.compact tag.tag_name.update!(name:)
end
tag.update!(attrs) if attrs.present? if category.present?
tag.update!(category:)
end
render json: tag render json: tag.as_json(methods: [:name])
end end
end end
+6 -17
View File
@@ -9,12 +9,7 @@ class Post < ApplicationRecord
has_many :post_tags_with_discarded, -> { with_discarded }, class_name: 'PostTag' has_many :post_tags_with_discarded, -> { with_discarded }, class_name: 'PostTag'
has_many :tags, through: :active_post_tags has_many :tags, through: :active_post_tags
has_many :user_post_views, dependent: :destroy has_many :user_post_views, dependent: :destroy
has_many :post_similarities_as_post, has_many :post_similarities
class_name: 'PostSimilarity',
foreign_key: :post_id
has_many :post_similarities_as_target_post,
class_name: 'PostSimilarity',
foreign_key: :target_post_id
has_one_attached :thumbnail has_one_attached :thumbnail
before_validation :normalise_url before_validation :normalise_url
@@ -34,18 +29,12 @@ class Post < ApplicationRecord
end end
def related(limit: nil) def related(limit: nil)
ids_with_cos = ids = post_similarities.select(:target_post_id).order(cos: :desc)
post_similarities_as_post.select(:target_post_id, :cos) ids = ids.limit(limit) if limit
.map { |ps| [ps.target_post_id, ps.cos] } + ids = ids.pluck(:target_post_id)
post_similarities_as_target_post.select(:post_id, :cos) return [] if ids.empty?
.map { |ps| [ps.post_id, ps.cos] }
sorted = ids_with_cos.sort_by { |_, cos| -cos } Post.where(id: ids).order(Arel.sql("FIELD(id, #{ ids.join(',') })"))
ids = sorted.map(&:first)
ids = ids.first(limit) if limit
Post.where(id: ids).index_by(&:id).values_at(*ids)
end end
def resized_thumbnail! def resized_thumbnail!
+4 -2
View File
@@ -1,4 +1,6 @@
class PostSimilarity < ApplicationRecord class PostSimilarity < ApplicationRecord
belongs_to :post, class_name: 'Post', foreign_key: 'post_id' self.primary_key = :post_id, :target_post_id
belongs_to :target_post, class_name: 'Post', foreign_key: 'target_post_id'
belongs_to :post
belongs_to :target_post, class_name: 'Post'
end end
+4 -2
View File
@@ -1,4 +1,6 @@
class TagSimilarity < ApplicationRecord class TagSimilarity < ApplicationRecord
belongs_to :tag, class_name: 'Tag', foreign_key: 'tag_id' self.primary_key = :tag_id, :target_tag_id
belongs_to :target_tag, class_name: 'Tag', foreign_key: 'target_tag_id'
belongs_to :tag
belongs_to :target_tag, class_name: 'Tag'
end end
+106
View File
@@ -0,0 +1,106 @@
module Similarity
class Calc
def self.call model, tgt
similarity_model = "#{ model.name }Similarity".constantize
# 最大保存件数
n = 20
similarity_model.delete_all
posts = model.includes(tgt).select(:id).to_a
tag_ids = { }
tag_cnts = { }
posts.each do |p|
arr = p.public_send(tgt).map(&:id).sort
tag_ids[p.id] = arr
tag_cnts[p.id] = arr.size
end
intersection_size = -> a, b do
i = 0
j = 0
cnt = 0
while i < a.size && j < b.size
a_i = a[i]
b_j = b[j]
if a_i == b_j
cnt += 1
i += 1
j += 1
elsif a_i < b_j
i += 1
else
j += 1
end
end
cnt
end
push_topk = -> list, cos, target_id do
return if list.size >= n && cos <= list[-1][0]
idx = nil
list.each_with_index do |(c, tid), i|
if tid == target_id
idx = i
break
end
end
if idx
return if cos <= list[idx][0]
list.delete_at(idx)
end
insert_at = list.size
list.each_with_index do |(c, _), i|
if cos > c
insert_at = i
break
end
end
list.insert(insert_at, [cos, target_id])
list.pop if list.size > n
end
top = Hash.new { |h, key| h[key] = [] }
ids = posts.map(&:id)
ids.each_with_index do |post_id, i|
a = tag_ids[post_id]
a_cnt = tag_cnts[post_id]
((i + 1)...ids.size).each do |j|
target_id = ids[j]
b = tag_ids[target_id]
b_cnt = tag_cnts[target_id]
norm = Math.sqrt(a_cnt * b_cnt)
cos = norm.zero? ? 0.0 : intersection_size.(a, b).fdiv(norm)
push_topk.(top[post_id], cos, target_id)
push_topk.(top[target_id], cos, post_id)
end
end
buf = []
flush = -> do
return if buf.empty?
similarity_model.insert_all!(buf)
buf.clear
end
top.each do |post_id, list|
list.each do |cos, target_post_id|
buf << { "#{ model.name.underscore }_id".to_sym => post_id,
"target_#{ model.name.underscore }_id".to_sym => target_post_id,
cos: }
flush.call if buf.size >= 1_000
end
end
flush.call
end
end
end
@@ -0,0 +1,43 @@
class ChangePostSimilaritiesToCompositePk < ActiveRecord::Migration[8.0]
def up
execute <<~SQL
ALTER TABLE
post_similarities
MODIFY COLUMN id BIGINT NOT NULL
;
SQL
execute <<~SQL
ALTER TABLE
post_similarities
DROP PRIMARY KEY
;
SQL
remove_column :post_similarities, :id
execute <<~SQL
ALTER TABLE
post_similarities
ADD PRIMARY KEY (post_id, target_post_id)
;
SQL
end
def down
execute <<~SQL
ALTER TABLE
post_similarities
DROP PRIMARY KEY
;
SQL
execute <<~SQL
ALTER TABLE
post_similarities
ADD COLUMN id BIGINT NOT NULL AUTO_INCREMENT FIRST
, ADD PRIMARY KEY (id)
;
SQL
end
end
@@ -0,0 +1,43 @@
class ChangeTagSimilaritiesToCompositePk < ActiveRecord::Migration[8.0]
def up
execute <<~SQL
ALTER TABLE
tag_similarities
MODIFY COLUMN id BIGINT NOT NULL
;
SQL
execute <<~SQL
ALTER TABLE
tag_similarities
DROP PRIMARY KEY
;
SQL
remove_column :tag_similarities, :id
execute <<~SQL
ALTER TABLE
tag_similarities
ADD PRIMARY KEY (tag_id, target_tag_id)
;
SQL
end
def down
execute <<~SQL
ALTER TABLE
tag_similarities
DROP PRIMARY KEY
;
SQL
execute <<~SQL
ALTER TABLE
tag_similarities
ADD COLUMN id BIGINT NOT NULL AUTO_INCREMENT FIRST
, ADD PRIMARY KEY (id)
;
SQL
end
end
@@ -0,0 +1,9 @@
class AddIndexToPostSimilarities < ActiveRecord::Migration[8.0]
def change
remove_index :post_similarities, name: 'index_post_similarities_on_post_id'
add_index :post_similarities, [:post_id, :cos],
order: { cos: :desc },
name: 'index_post_similarities_on_post_id_and_cos'
end
end
@@ -0,0 +1,9 @@
class AddIndexToTagSimilarities < ActiveRecord::Migration[8.0]
def change
remove_index :tag_similarities, name: 'index_tag_similarities_on_tag_id'
add_index :tag_similarities, [:tag_id, :cos],
order: { cos: :desc },
name: 'index_tag_similarities_on_tag_id_and_cos'
end
end
+5 -5
View File
@@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[8.0].define(version: 2026_01_18_144400) do ActiveRecord::Schema[8.0].define(version: 2026_01_21_225600) do
create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
t.string "name", null: false t.string "name", null: false
t.string "record_type", null: false t.string "record_type", null: false
@@ -55,11 +55,11 @@ ActiveRecord::Schema[8.0].define(version: 2026_01_18_144400) do
t.index ["tag_id"], name: "index_nico_tag_relations_on_tag_id" t.index ["tag_id"], name: "index_nico_tag_relations_on_tag_id"
end end
create_table "post_similarities", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| create_table "post_similarities", primary_key: ["post_id", "target_post_id"], charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
t.bigint "post_id", null: false t.bigint "post_id", null: false
t.bigint "target_post_id", null: false t.bigint "target_post_id", null: false
t.float "cos", null: false t.float "cos", null: false
t.index ["post_id"], name: "index_post_similarities_on_post_id" t.index ["post_id", "cos"], name: "index_post_similarities_on_post_id_and_cos", order: { cos: :desc }
t.index ["target_post_id"], name: "index_post_similarities_on_target_post_id" t.index ["target_post_id"], name: "index_post_similarities_on_target_post_id"
end end
@@ -126,11 +126,11 @@ ActiveRecord::Schema[8.0].define(version: 2026_01_18_144400) do
t.index ["name"], name: "index_tag_names_on_name", unique: true t.index ["name"], name: "index_tag_names_on_name", unique: true
end end
create_table "tag_similarities", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| create_table "tag_similarities", primary_key: ["tag_id", "target_tag_id"], charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
t.bigint "tag_id", null: false t.bigint "tag_id", null: false
t.bigint "target_tag_id", null: false t.bigint "target_tag_id", null: false
t.float "cos", null: false t.float "cos", null: false
t.index ["tag_id"], name: "index_tag_similarities_on_tag_id" t.index ["tag_id", "cos"], name: "index_tag_similarities_on_tag_id_and_cos", order: { cos: :desc }
t.index ["target_tag_id"], name: "index_tag_similarities_on_target_tag_id" t.index ["target_tag_id"], name: "index_tag_similarities_on_target_tag_id"
end end
+1 -23
View File
@@ -1,28 +1,6 @@
namespace :post_similarity do namespace :post_similarity do
desc '関聯投稿テーブル作成' desc '関聯投稿テーブル作成'
task calc: :environment do task calc: :environment do
dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } } Similarity::Calc.call(Post, :tags)
norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) }
cos = -> a, b do
na = norm.(a)
nb = norm.(b)
if na.zero? || nb.zero?
0.0
else
dot.(a, b) / na / nb
end
end
posts = Post.includes(:tags).to_a
posts.each_with_index do |post, i|
existence_of_tags = post.tags.index_with(1)
((i + 1)...posts.size).each do |j|
target_post = posts[j]
existence_of_target_tags = target_post.tags.index_with(1)
PostSimilarity.find_or_initialize_by(post:, target_post:).tap { |ps|
ps.cos = cos.(existence_of_tags, existence_of_target_tags)
}.save!
end
end
end end
end end
+1 -23
View File
@@ -1,28 +1,6 @@
namespace :tag_similarity do namespace :tag_similarity do
desc '関聯タグ・テーブル作成' desc '関聯タグ・テーブル作成'
task calc: :environment do task calc: :environment do
dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } } Similarity::Calc.call(Tag, :posts)
norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) }
cos = -> a, b do
na = norm.(a)
nb = norm.(b)
if na.zero? || nb.zero?
0.0
else
dot.(a, b) / na / nb
end
end
tags = Tag.includes(:posts).to_a
tags.each_with_index do |tag, i|
existence_of_posts = tag.posts.index_with(1)
((i + 1)...tags.size).each do |j|
target_tag = tags[j]
existence_of_target_posts = target_tag.posts.index_with(1)
TagSimilarity.find_or_initialize_by(tag:, target_tag:).tap { |ts|
ts.cos = cos.(existence_of_posts, existence_of_target_posts)
}.save!
end
end
end end
end end
+4
View File
@@ -7,5 +7,9 @@ FactoryBot.define do
trait :member do trait :member do
role { "member" } role { "member" }
end end
trait :admin do
role { 'admin' }
end
end end
end end
+134
View File
@@ -0,0 +1,134 @@
# spec/requests/tag_children_spec.rb
require "rails_helper"
RSpec.describe "TagChildren", type: :request do
let!(:parent) { create(:tag) }
let!(:child) { create(:tag) }
# ここは君のUser factoryに合わせて調整
let(:user) { create_member_user! }
let(:admin) { create_admin_user! }
# current_user を ApplicationController でスタブ
def stub_current_user(user_or_nil)
allow_any_instance_of(ApplicationController)
.to receive(:current_user)
.and_return(user_or_nil)
end
describe "POST /tag_children" do
subject(:do_request) do
post "/tags/#{ parent_id }/children/#{ child_id }"
end
context "when not logged in" do
let(:parent_id) { parent.id }
let(:child_id) { child.id }
it "returns 401" do
stub_current_user(nil)
do_request
expect(response).to have_http_status(:unauthorized)
end
end
context "when logged in but not admin" do
let(:parent_id) { parent.id }
let(:child_id) { child.id }
it "returns 403" do
stub_current_user(user)
do_request
expect(response).to have_http_status(:forbidden)
end
end
context "when admin and params are present" do
before { stub_current_user(admin) }
let(:parent_id) { parent.id }
let(:child_id) { child.id }
it "returns 204 and adds child to parent.children" do
expect(parent.children).not_to include(child)
expect { do_request }
.to change { parent.reload.children.ids.include?(child.id) }
.from(false).to(true)
expect(response).to have_http_status(:no_content)
end
end
context "when Tag.find raises (invalid ids) it still returns 204" do
before { stub_current_user(admin) }
let(:parent_id) { -1 }
let(:child_id) { -1 }
it "returns 204 (rescue nil)" do
do_request
expect(response).to have_http_status(:no_content)
end
end
end
describe "DELETE /tag_children" do
subject(:do_request) do
delete "/tags/#{ parent_id }/children/#{ child_id }"
end
context "when not logged in" do
let(:parent_id) { parent.id }
let(:child_id) { child.id }
it "returns 401" do
stub_current_user(nil)
do_request
expect(response).to have_http_status(:unauthorized)
end
end
context "when logged in but not admin" do
let(:parent_id) { parent.id }
let(:child_id) { child.id }
it "returns 403" do
stub_current_user(user)
do_request
expect(response).to have_http_status(:forbidden)
end
end
context "when admin and params are present" do
before do
stub_current_user(admin)
parent.children << child
end
let(:parent_id) { parent.id }
let(:child_id) { child.id }
it "returns 204 and removes child from parent.children" do
expect(parent.reload.children).to include(child)
expect { do_request }
.to change { parent.reload.children.ids.include?(child.id) }
.from(true).to(false)
expect(response).to have_http_status(:no_content)
end
end
context "when Tag.find raises (invalid ids) it still returns 204" do
before { stub_current_user(admin) }
let(:parent_id) { -1 }
let(:child_id) { -1 }
it "returns 204 (rescue nil)" do
do_request
expect(response).to have_http_status(:no_content)
end
end
end
end
+99
View File
@@ -76,4 +76,103 @@ RSpec.describe 'Tags API', type: :request do
expect(response).to have_http_status(:not_found) expect(response).to have_http_status(:not_found)
end end
end end
# member? を持つ user を想定(Factory 側で trait 作ってもOK
let(:member_user) { create(:user) }
let(:non_member_user) { create(:user) }
def stub_current_user(user)
allow_any_instance_of(ApplicationController).to receive(:current_user).and_return(user)
end
before do
allow(member_user).to receive(:member?).and_return(true)
allow(non_member_user).to receive(:member?).and_return(false)
end
describe "PATCH /tags/:id" do
context "未ログイン" do
before { stub_current_user(nil) }
it "401 を返す" do
patch "/tags/#{tag.id}", params: { name: "new" }
expect(response).to have_http_status(:unauthorized)
end
end
context "ログインしてゐるが member でない" do
before { stub_current_user(non_member_user) }
it "403 を返す" do
patch "/tags/#{tag.id}", params: { name: "new" }
expect(response).to have_http_status(:forbidden)
end
end
context "member" do
before { stub_current_user(member_user) }
it "name だけ更新できる" do
patch "/tags/#{tag.id}", params: { name: "new" }
expect(response).to have_http_status(:ok)
tag.reload
expect(tag.name).to eq("new")
expect(tag.category).to eq("general")
json = JSON.parse(response.body)
expect(json["id"]).to eq(tag.id)
expect(json["name"]).to eq("new")
expect(json["category"]).to eq("general")
end
it "category だけ更新できる" do
patch "/tags/#{tag.id}", params: { category: "meme" }
expect(response).to have_http_status(:ok)
tag.reload
expect(tag.name).to eq("spec_tag")
expect(tag.category).to eq("meme")
end
it "空文字は presence により無視され、更新は走らない(値が変わらない)" do
patch "/tags/#{tag.id}", params: { name: "", category: " " }
expect(response).to have_http_status(:ok)
tag.reload
expect(tag.name).to eq("spec_tag")
expect(tag.category).to eq("general")
end
it "両方更新できる" do
patch "/tags/#{tag.id}", params: { name: "n", category: "meta" }
expect(response).to have_http_status(:ok)
tag.reload
expect(tag.name).to eq("n")
expect(tag.category).to eq("meta")
end
it "存在しない id だと RecordNotFound になる(通常は 404" do
# Rails 設定次第で例外がそのまま上がる/404になる
# APIなら rescue_from で 404 にしてることが多いので、その場合は 404 を期待。
patch "/tags/999999999", params: { name: "x" }
expect(response.status).to be_in([404, 500])
end
it "バリデーションで update! が失敗したら(通常は 422 か 500)" do
patch "/tags/#{tag.id}", params: { name: 'new', category: 'nico' }
# rescue_from の実装次第で変はる:
# - RecordInvalid を 422 にしてるなら 422
# - 未処理なら 500
expect(response.status).to be_in([422, 500])
end
end
end
end end
+7
View File
@@ -5,4 +5,11 @@ module TestRecords
role: 'member', role: 'member',
banned: false) banned: false)
end end
def create_admin_user!
User.create!(name: 'spec admin',
inheritance_code: SecureRandom.hex(16),
role: 'admin',
banned: false)
end
end end
@@ -0,0 +1,34 @@
require 'rails_helper'
RSpec.describe 'post_similarity:calc' do
include RakeTaskHelper
it 'calls Similarity::Calc with Post and :tags' do
# 必要最低限のデータ
t1 = Tag.create!(name: "t1")
t2 = Tag.create!(name: "t2")
t3 = Tag.create!(name: "t3")
p1 = Post.create!(url: "https://example.com/1")
p2 = Post.create!(url: "https://example.com/2")
p3 = Post.create!(url: "https://example.com/3")
# kept スコープが絡むなら、PostTag がデフォで kept になる前提
PostTag.create!(post: p1, tag: t1)
PostTag.create!(post: p1, tag: t2)
PostTag.create!(post: p2, tag: t1)
PostTag.create!(post: p2, tag: t3)
PostTag.create!(post: p3, tag: t3)
expect { run_rake_task("post_similarity:calc") }
.to change { PostSimilarity.count }.from(0)
ps = PostSimilarity.find_by!(post_id: p1.id, target_post_id: p2.id)
ps_rev = PostSimilarity.find_by!(post_id: p2.id, target_post_id: p1.id)
expect(ps_rev.cos).to eq(ps.cos)
end
end
@@ -0,0 +1,34 @@
require 'rails_helper'
RSpec.describe 'tag_similarity:calc' do
include RakeTaskHelper
it 'calls Similarity::Calc with Tag and :posts' do
# 必要最低限のデータ
t1 = Tag.create!(name: "t1")
t2 = Tag.create!(name: "t2")
t3 = Tag.create!(name: "t3")
p1 = Post.create!(url: "https://example.com/1")
p2 = Post.create!(url: "https://example.com/2")
p3 = Post.create!(url: "https://example.com/3")
# kept スコープが絡むなら、PostTag がデフォで kept になる前提
PostTag.create!(post: p1, tag: t1)
PostTag.create!(post: p1, tag: t2)
PostTag.create!(post: p2, tag: t1)
PostTag.create!(post: p2, tag: t3)
PostTag.create!(post: p3, tag: t3)
expect { run_rake_task("tag_similarity:calc") }
.to change { TagSimilarity.count }.from(0)
ps = TagSimilarity.find_by!(tag_id: t1.id, target_tag_id: t2.id)
ps_rev = TagSimilarity.find_by!(tag_id: t2.id, target_tag_id: t1.id)
expect(ps_rev.cos).to eq(ps.cos)
end
end