Browse Source

TagName サニタイズ(#281) (#289)

#281

#281

Merge remote-tracking branch 'origin/main' into feature/281

#281

#281 テストまだ通ってないので要確認

Co-authored-by: miteruzo <miteruzo@naver.com>
Reviewed-on: https://git.miteruzo.com/miteruzo/btrc-hub/pulls/289
feature/278
みてるぞ 1 week ago
parent
commit
d772cceb5e
16 changed files with 351 additions and 27 deletions
  1. +1
    -1
      backend/app/controllers/wiki_pages_controller.rb
  2. +20
    -0
      backend/app/models/my_discard.rb
  3. +15
    -5
      backend/app/models/tag.rb
  4. +11
    -0
      backend/app/models/tag_name.rb
  5. +58
    -0
      backend/app/models/tag_name_sanitisation_rule.rb
  6. +4
    -0
      backend/app/models/wiki_page.rb
  7. +31
    -0
      backend/db/migrate/20260309123200_create_tag_name_sanitisation_rules.rb
  8. +6
    -0
      backend/db/migrate/20260311232100_add_discarded_at_to_tag_names.rb
  9. +6
    -0
      backend/db/migrate/20260311232300_add_discarded_at_to_wiki_pages.rb
  10. +5
    -1
      backend/db/schema.rb
  11. +1
    -1
      backend/lib/tasks/sync_nico.rake
  12. +11
    -1
      backend/spec/factories/tags.rb
  13. +101
    -0
      backend/spec/models/tag_name_sanitisation_rule_spec.rb
  14. +73
    -12
      backend/spec/models/tag_spec.rb
  15. +6
    -4
      backend/spec/requests/posts_spec.rb
  16. +2
    -2
      backend/spec/tasks/nico_sync_spec.rb

+ 1
- 1
backend/app/controllers/wiki_pages_controller.rb View File

@@ -90,7 +90,7 @@ class WikiPagesController < ApplicationController

return head :unprocessable_entity if name.blank? || body.blank?

tag_name = TagName.find_or_create_by!(name:)
tag_name = TagName.find_undiscard_or_create_by!(name:)
page = WikiPage.new(tag_name:, created_user: current_user, updated_user: current_user)
if page.save
message = params[:message].presence


+ 20
- 0
backend/app/models/my_discard.rb View File

@@ -0,0 +1,20 @@
module MyDiscard
extend ActiveSupport::Concern

included { include Discard::Model }

class_methods do
def find_undiscard_or_create_by! attrs, &block
record = with_discarded.find_by(attrs)

if record&.discarded?
record.undiscard!
record.update_columns(created_at: record.reload.updated_at)
end

record or create!(attrs, &block)
rescue ActiveRecord::RecordNotUnique
retry
end
end
end

+ 15
- 5
backend/app/models/tag.rb View File

@@ -1,5 +1,5 @@
class Tag < ApplicationRecord
include Discard::Model
include MyDiscard

class NicoTagNormalisationError < ArgumentError
;
@@ -134,10 +134,10 @@ class Tag < ApplicationRecord
end

def self.find_or_create_by_tag_name! name, category:
tn = TagName.find_or_create_by!(name: name.to_s.strip)
tn = TagName.find_undiscard_or_create_by!(name: name.to_s.strip)
tn = tn.canonical if tn.canonical_id?

Tag.find_or_create_by!(tag_name_id: tn.id) do |t|
Tag.find_undiscard_or_create_by!(tag_name_id: tn.id) do |t|
t.category = category
end
rescue ActiveRecord::RecordNotUnique
@@ -162,9 +162,19 @@ class Tag < ApplicationRecord
end

source_tag_name = source_tag.tag_name

if source_tag_name.wiki_page.present?
raise ActiveRecord::RecordInvalid.new(source_tag_name)
end

source_tag.discard!
source_tag_name.reload
source_tag_name.update!(canonical: target_tag.tag_name)

if source_tag.nico?
source_tag_name.discard!
else
source_tag_name.update_columns(canonical_id: target_tag.tag_name_id,
updated_at: Time.current)
end
end

# 投稿件数を再集計


+ 11
- 0
backend/app/models/tag_name.rb View File

@@ -1,4 +1,8 @@
class TagName < ApplicationRecord
include MyDiscard

default_scope -> { kept }

has_one :tag
has_one :wiki_page

@@ -10,6 +14,7 @@ class TagName < ApplicationRecord
validate :canonical_must_be_canonical
validate :alias_name_must_not_have_prefix
validate :canonical_must_not_be_present_with_tag_or_wiki_page
validate :name_must_be_sanitised

def self.canonicalise names
names = Array(names).map { |n| n.to_s.strip }.reject(&:blank?)
@@ -39,4 +44,10 @@ class TagName < ApplicationRecord
errors.add :canonical, 'タグもしくは Wiki の参照がある名前はエーリアスになれません.'
end
end

def name_must_be_sanitised
if name? && name != TagNameSanitisationRule.sanitise(name)
errors.add :name, '名前に使用できない文字が含まれてゐます.'
end
end
end

+ 58
- 0
backend/app/models/tag_name_sanitisation_rule.rb View File

@@ -0,0 +1,58 @@
class TagNameSanitisationRule < ApplicationRecord
include Discard::Model

self.primary_key = :priority

default_scope -> { kept }

validates :source_pattern, presence: true, uniqueness: true

validate :source_pattern_must_be_regexp

class << self
def sanitise(name) =
rules.reduce(name.dup) { |name, (pattern, replacement)| name.gsub(pattern, replacement) }

def apply!
TagName.find_each do |tn|
name = sanitise(tn.name)
next if name == tn.name

TagName.transaction do
existing_tn = TagName.find_by(name:)
if existing_tn
existing_tn = existing_tn.canonical || existing_tn
next if existing_tn.id == tn.id

existing_tag = Tag.find_by(tag_name_id: existing_tn.id)
source_tag = Tag.find_by(tag_name_id: tn.id)

if existing_tag
Tag.merge_tags!(existing_tag, source_tag) if tn.tag
elsif source_tag
source_tag.update_columns(tag_name_id: existing_tn.id, updated_at: Time.current)
end
tn.discard!

next
end

# TagName 側の自動サニタイズを回避
tn.update_columns(name:, updated_at: Time.current)
end
end
end

private

def rules = kept.order(:priority).map { |r| [Regexp.new(r.source_pattern), r.replacement] }
end

private

def source_pattern_must_be_regexp
Regexp.new(source_pattern)
rescue RegexpError
errors.add :source_pattern, '変な正規表現だね〜(笑)'
end
end

+ 4
- 0
backend/app/models/wiki_page.rb View File

@@ -2,6 +2,10 @@ require 'set'


class WikiPage < ApplicationRecord
include MyDiscard

default_scope -> { kept }

has_many :wiki_revisions, dependent: :destroy
belongs_to :created_user, class_name: 'User'
belongs_to :updated_user, class_name: 'User'


+ 31
- 0
backend/db/migrate/20260309123200_create_tag_name_sanitisation_rules.rb View File

@@ -0,0 +1,31 @@
class CreateTagNameSanitisationRules < ActiveRecord::Migration[8.0]
def up
create_table :tag_name_sanitisation_rules, id: :integer, primary_key: :priority do |t|
t.string :source_pattern, null: false
t.string :replacement, null: false
t.timestamps
t.datetime :discarded_at
t.index :source_pattern, unique: true
t.index :discarded_at
end

now = ActiveRecord::Base.connection.quote(Time.current)
execute <<~SQL
INSERT INTO
tag_name_sanitisation_rules(priority, source_pattern, replacement, created_at, updated_at)
VALUES
(10, '\\\\*', '_', #{ now }, #{ now })
, (20, '\\\\?', '_', #{ now }, #{ now })
, (25, '\\\\/', '_', #{ now }, #{ now })
, (30, '_+', '_', #{ now }, #{ now })
, (40, '_$', '', #{ now }, #{ now })
, (45, '^([^:]+\\\\:)?_', '\\\\1', #{ now }, #{ now })
, (50, '^([^:]+\\\\:)?$', '\\\\1null', #{ now }, #{ now })
;
SQL
end

def down
drop_table :tag_name_sanitisation_rules
end
end

+ 6
- 0
backend/db/migrate/20260311232100_add_discarded_at_to_tag_names.rb View File

@@ -0,0 +1,6 @@
class AddDiscardedAtToTagNames < ActiveRecord::Migration[8.0]
def change
add_column :tag_names, :discarded_at, :datetime
add_index :tag_names, :discarded_at
end
end

+ 6
- 0
backend/db/migrate/20260311232300_add_discarded_at_to_wiki_pages.rb View File

@@ -0,0 +1,6 @@
class AddDiscardedAtToWikiPages < ActiveRecord::Migration[8.0]
def change
add_column :wiki_pages, :discarded_at, :datetime
add_index :wiki_pages, :discarded_at
end
end

+ 5
- 1
backend/db/schema.rb View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[8.0].define(version: 2026_03_11_123100) do
ActiveRecord::Schema[8.0].define(version: 2026_03_11_232300) do
create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
t.string "name", null: false
t.string "record_type", null: false
@@ -142,7 +142,9 @@ ActiveRecord::Schema[8.0].define(version: 2026_03_11_123100) do
t.bigint "canonical_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.datetime "discarded_at"
t.index ["canonical_id"], name: "index_tag_names_on_canonical_id"
t.index ["discarded_at"], name: "index_tag_names_on_discarded_at"
t.index ["name"], name: "index_tag_names_on_name", unique: true
end

@@ -204,7 +206,9 @@ ActiveRecord::Schema[8.0].define(version: 2026_03_11_123100) do
t.bigint "updated_user_id", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.datetime "discarded_at"
t.index ["created_user_id"], name: "index_wiki_pages_on_created_user_id"
t.index ["discarded_at"], name: "index_wiki_pages_on_discarded_at"
t.index ["tag_name_id"], name: "index_wiki_pages_on_tag_name_id", unique: true
t.index ["updated_user_id"], name: "index_wiki_pages_on_updated_user_id"
end


+ 1
- 1
backend/lib/tasks/sync_nico.rake View File

@@ -108,7 +108,7 @@ namespace :nico do
desired_non_nico_tag_ids = []

datum['tags'].each do |raw|
name = "nico:#{ raw }"
name = TagNameSanitisationRule.sanitise("nico:#{ raw }")
tag = Tag.find_or_create_by_tag_name!(name, category: :nico)
desired_nico_tag_based_ids << tag.id



+ 11
- 1
backend/spec/factories/tags.rb View File

@@ -1,12 +1,22 @@
FactoryBot.define do
factory :tag do
transient do
name { nil }
end

category { :general }
post_count { 0 }
association :tag_name

after(:build) do |tag, evaluator|
tag.name = evaluator.name if evaluator.name.present?
end

trait :nico do
category { :nico }
tag_name { association(:tag_name, name: "nico:#{ SecureRandom.hex(4) }") }
transient do
name { "nico:#{ SecureRandom.hex(4) }" }
end
end
end
end

+ 101
- 0
backend/spec/models/tag_name_sanitisation_rule_spec.rb View File

@@ -0,0 +1,101 @@
require 'rails_helper'

RSpec.describe TagNameSanitisationRule, type: :model do
describe '.sanitise' do
before do
described_class.create!(priority: 10, source_pattern: '_', replacement: '')
described_class.create!(priority: 20, source_pattern: 'ABC', replacement: 'abc')
end

it 'applies sanitisation rules sequentially in priority order' do
expect(described_class.sanitise('A_B_C')).to eq('abc')
end

it 'does not fail when a rule does not match' do
expect { described_class.sanitise('xyz') }.not_to raise_error
expect(described_class.sanitise('xyz')).to eq('xyz')
end
end

describe 'validations' do
it 'is invalid when source_pattern is not a valid regexp' do
rule = described_class.new(priority: 10, source_pattern: '[', replacement: '')
expect(rule).to be_invalid
expect(rule.errors[:source_pattern]).to be_present
end
end

describe '.apply!' do
before do
described_class.create!(priority: 10, source_pattern: '_', replacement: '')
end

context 'when no conflicting tag_name exists' do
let!(:tag_name) do
TagName.create!(name: 'tmp').tap do |tn|
tn.update_columns(name: 'foo_bar', updated_at: Time.current)
end
end

it 'renames the tag_name' do
described_class.apply!
expect(tag_name.reload.name).to eq('foobar')
end
end

context 'when a conflicting canonical tag_name exists' do
let!(:existing) { TagName.create!(name: 'foobar') }
let!(:source) do
TagName.create!(name: 'tmp').tap do |tn|
tn.update_columns(name: 'foo_bar', updated_at: Time.current)
end
end

it 'deletes the source tag_name' do
described_class.apply!
expect(TagName.exists?(source.id)).to be(false)
expect(existing.reload.name).to eq('foobar')
end
end

context 'when the source tag_name has a tag and the existing one has no tag' do
let!(:existing) { TagName.create!(name: 'foobar') }
let!(:source_tag) { create(:tag, name: 'tmp', category: :general) }
let!(:source_tag_name_id) { source_tag.tag_name_id }

before do
source_tag.tag_name.update_columns(name: 'foo_bar', updated_at: Time.current)
end

it 'moves the tag to the existing tag_name' do
described_class.apply!
expected_tag_name_id = existing.canonical_id || existing.id
expect(source_tag.reload.tag_name_id).to eq(expected_tag_name_id)
expect(TagName.exists?(source_tag_name_id)).to be(false)
end
end

context 'when both source and existing tag_names have tags' do
let!(:existing_tn) { TagName.create!(name: 'foobar') }
let!(:existing_tag) { Tag.create!(tag_name: existing_tn, category: :general) }

let!(:source_tn) { TagName.create!(name: 'tmp') }
let!(:source_tag) { Tag.create!(tag_name: source_tn, category: :general) }
let!(:source_tag_name_id) { source_tn.id }

before do
source_tn.update_columns(name: 'foo_bar', updated_at: Time.current)
end

it 'merges the source tag into the existing tag and deletes the source tag_name' do
expect(TagName.find_by(name: 'foobar')&.tag&.id).to eq(existing_tag.id)
expect(TagName.find_by(name: 'foo_bar')&.tag&.id).to eq(source_tag.id)

described_class.apply!

expect(Tag.exists?(source_tag.id)).to be(false)
expect(TagName.exists?(source_tag.tag_name_id)).to be(false)
end
end
end
end

+ 73
- 12
backend/spec/models/tag_spec.rb View File

@@ -2,16 +2,18 @@ require 'rails_helper'

RSpec.describe Tag, type: :model do
describe '.merge_tags!' do
let!(:target_tag) { create(:tag) }
let!(:source_tag) { create(:tag) }
let!(:target_tag) { create(:tag, category: :general) }
let!(:source_tag) { create(:tag, category: :general) }
let!(:source_tag_name) { source_tag.tag_name }

let!(:post_record) { Post.create!(url: 'https://example.com/posts/1', title: 'test post') }
let!(:post_record) do
Post.create!(url: 'https://example.com/posts/1', title: 'test post')
end

context 'when merging a simple source tag' do
let!(:source_post_tag) { PostTag.create!(post: post_record, tag: source_tag) }

it 'discards the source post_tag, creates an active target post_tag, ' +
'discards the source tag, and aliases the source tag_name' do
it 'discards the source post_tag, creates an active target post_tag, discards the source tag, and aliases the source tag_name' do
described_class.merge_tags!(target_tag, [source_tag])

source_pt = PostTag.with_discarded.find(source_post_tag.id)
@@ -22,7 +24,9 @@ RSpec.describe Tag, type: :model do
expect(active_target).to be_present

expect(Tag.with_discarded.find(source_tag.id)).to be_discarded
expect(source_tag.tag_name.reload.canonical_id).to eq(target_tag.tag_name_id)
expect(TagName.with_discarded.find(source_tag_name.id)).not_to be_discarded
expect(source_tag_name.reload.canonical_id).to eq(target_tag.tag_name_id)
expect(target_tag.reload.post_count).to eq(1)
end
end

@@ -30,8 +34,7 @@ RSpec.describe Tag, type: :model do
let!(:target_post_tag) { PostTag.create!(post: post_record, tag: target_tag) }
let!(:source_post_tag) { PostTag.create!(post: post_record, tag: source_tag) }

it 'discards the source post_tag, keeps one active target post_tag, ' +
'and aliases the source tag_name' do
it 'discards the source post_tag, keeps one active target post_tag, discards the source tag, and aliases the source tag_name' do
described_class.merge_tags!(target_tag, [source_tag])

source_pt = PostTag.with_discarded.find(source_post_tag.id)
@@ -43,7 +46,9 @@ RSpec.describe Tag, type: :model do
expect(active.first.id).to eq(target_post_tag.id)

expect(Tag.with_discarded.find(source_tag.id)).to be_discarded
expect(source_tag.tag_name.reload.canonical_id).to eq(target_tag.tag_name_id)
expect(TagName.with_discarded.find(source_tag_name.id)).not_to be_discarded
expect(source_tag_name.reload.canonical_id).to eq(target_tag.tag_name_id)
expect(target_tag.reload.post_count).to eq(1)
end
end

@@ -61,14 +66,49 @@ RSpec.describe Tag, type: :model do
expect(source_pt.discarded_at).to be_present
expect(source_pt.tag_id).to eq(source_tag.id)
expect(active_target).to be_present
expect(source_tag_name.reload.canonical_id).to eq(target_tag.tag_name_id)
expect(target_tag.reload.post_count).to eq(1)
end
end

context 'when the source tag_name is invalid under sanitisation rules' do
let!(:source_post_tag) { PostTag.create!(post: post_record, tag: source_tag) }
let!(:sanitisation_rule) do
TagNameSanitisationRule.create!(
priority: 99_999,
source_pattern: 'INVALIDTOKEN',
replacement: ''
)
end

before do
source_tag_name.update_columns(
name: "#{ source_tag_name.name }INVALIDTOKEN",
updated_at: Time.current
)
end

it 'still merges, but discards the source tag_name instead of aliasing it' do
described_class.merge_tags!(target_tag, [source_tag])

source_pt = PostTag.with_discarded.find(source_post_tag.id)
active_target = PostTag.kept.find_by(post_id: post_record.id, tag_id: target_tag.id)
discarded_source_tag_name = TagName.with_discarded.find(source_tag_name.id)

expect(source_pt.discarded_at).to be_present
expect(source_pt.tag_id).to eq(source_tag.id)
expect(active_target).to be_present

expect(Tag.with_discarded.find(source_tag.id)).to be_discarded
expect(target_tag.reload.post_count).to eq(1)
end
end

context 'when aliasing the source tag_name is invalid' do
context 'when the source tag_name has a wiki_page' do
let!(:source_post_tag) { PostTag.create!(post: post_record, tag: source_tag) }
let!(:wiki_page) do
WikiPage.create!(
tag_name: source_tag.tag_name,
tag_name: source_tag_name,
created_user: create_admin_user!,
updated_user: create_admin_user!
)
@@ -80,8 +120,29 @@ RSpec.describe Tag, type: :model do
}.to raise_error(ActiveRecord::RecordInvalid)

expect(Tag.with_discarded.find(source_tag.id)).not_to be_discarded
expect(TagName.with_discarded.find(source_tag_name.id)).not_to be_discarded
expect(PostTag.kept.find(source_post_tag.id).tag_id).to eq(source_tag.id)
expect(source_tag.tag_name.reload.canonical_id).to be_nil
expect(PostTag.kept.find_by(post_id: post_record.id, tag_id: target_tag.id)).to be_nil
expect(source_tag_name.reload.canonical_id).to be_nil
expect(target_tag.reload.post_count).to eq(0)
end
end

context 'when merging a nico source tag' do
let!(:target_tag) { create(:tag, category: :nico, name: 'nico:foo') }
let!(:source_tag) { create(:tag, category: :nico, name: 'nico:bar') }
let!(:source_tag_name_id) { source_tag.tag_name_id }

it 'discards the source tag_name instead of aliasing it' do
described_class.merge_tags!(target_tag, [source_tag])

discarded_source_tag = Tag.with_discarded.find(source_tag.id)
discarded_source_tag_name = TagName.with_discarded.find(source_tag_name_id)

expect(discarded_source_tag).to be_discarded
expect(discarded_source_tag_name).to be_discarded
expect(discarded_source_tag_name.canonical_id).to be_nil
expect(target_tag.reload.post_count).to eq(0)
end
end
end


+ 6
- 4
backend/spec/requests/posts_spec.rb View File

@@ -525,8 +525,9 @@ RSpec.describe 'Posts API', type: :request do

context "when nico tag already exists in tags" do
before do
Tag.find_or_create_by!(tag_name: TagName.find_or_create_by!(name: 'nico:nico_tag'),
category: :nico)
Tag.find_undiscard_or_create_by!(
tag_name: TagName.find_undiscard_or_create_by!(name: 'nico:nico_tag'),
category: :nico)
end

it 'return 400' do
@@ -610,8 +611,9 @@ RSpec.describe 'Posts API', type: :request do

context "when nico tag already exists in tags" do
before do
Tag.find_or_create_by!(tag_name: TagName.find_or_create_by!(name: 'nico:nico_tag'),
category: :nico)
Tag.find_undiscard_or_create_by!(
tag_name: TagName.find_undiscard_or_create_by!(name: 'nico:nico_tag'),
category: :nico)
end

it 'return 400' do


+ 2
- 2
backend/spec/tasks/nico_sync_spec.rb View File

@@ -8,8 +8,8 @@ RSpec.describe "nico:sync" do
end

def create_tag!(name, category:)
tn = TagName.find_or_create_by!(name: name.to_s.strip)
Tag.find_or_create_by!(tag_name_id: tn.id) { |t| t.category = category }
tn = TagName.find_undiscard_or_create_by!(name: name.to_s.strip)
Tag.find_undiscard_or_create_by!(tag_name_id: tn.id) { |t| t.category = category }
end

def link_nico_to_tag!(nico_tag, tag)


Loading…
Cancel
Save