Merge remote-tracking branch 'origin/main' into '#106'

このコミットが含まれているのは:
2026-02-11 17:54:48 +09:00
コミット e5048dc9b3
123個のファイルの変更5109行の追加1257行の削除
+1 -23
ファイルの表示
@@ -1,28 +1,6 @@
namespace :post_similarity do
desc '関聯投稿テーブル作成'
task calc: :environment do
dot = -> a, b { (a.keys & b.keys).sum { |k| a[k] * b[k] } }
norm = -> v { Math.sqrt(v.values.sum { |e| e * e }) }
cos = -> a, b do
na = norm.(a)
nb = norm.(b)
if na.zero? || nb.zero?
0.0
else
dot.(a, b) / na / nb
end
end
posts = Post.includes(:tags).to_a
posts.each_with_index do |post, i|
existence_of_tags = post.tags.index_with(1)
((i + 1)...posts.size).each do |j|
target_post = posts[j]
existence_of_target_tags = target_post.tags.index_with(1)
PostSimilarity.find_or_initialize_by(post:, target_post:).tap { |ps|
ps.cos = cos.(existence_of_tags, existence_of_target_tags)
}.save!
end
end
Similarity::Calc.call(Post, :tags)
end
end
+6
ファイルの表示
@@ -0,0 +1,6 @@
namespace :tag_similarity do
desc '関聯タグ・テーブル作成'
task calc: :environment do
Similarity::Calc.call(Tag, :posts)
end
end
+74
ファイルの表示
@@ -0,0 +1,74 @@
namespace :wiki do
desc 'Wiki 移行'
task migrate: :environment do
require 'digest'
require 'gollum-lib'
wiki = Gollum::Wiki.new(Rails.root.join('wiki').to_s)
WikiPage.where.missing(:wiki_revisions).find_each do |wiki_page|
page = wiki.page("#{ wiki_page.id }.md")
next unless page
versions = page.versions
next if versions.blank?
base_revision_id = nil
versions.reverse_each do |version|
pg = wiki.page("#{ wiki_page.id }.md", version.id)
raw = pg&.raw_data
next unless raw
lines = raw.force_encoding('UTF-8').split("\n")
line_shas = lines.map { |l| Digest::SHA256.hexdigest(l) }
tree_sha = Digest::SHA256.hexdigest(line_shas.join(','))
at = version.authored_date
line_id_by_sha = WikiLine.where(sha256: line_shas).pluck(:sha256, :id).to_h
missing_rows = []
line_shas.each_with_index do |sha, i|
next if line_id_by_sha.key?(sha)
missing_rows << { sha256: sha,
body: lines[i],
created_at: at,
updated_at: at }
end
if missing_rows.any?
WikiLine.upsert_all(missing_rows)
line_id_by_sha = WikiLine.where(sha256: line_shas).pluck(:sha256, :id).to_h
end
line_ids = line_shas.map { |sha| line_id_by_sha.fetch(sha) }
rev = nil
ActiveRecord::Base.transaction do
wiki_page.lock!
rev = WikiRevision.create!(
wiki_page:,
base_revision_id:,
created_user_id: (Integer(version.author.name) rescue 2),
kind: :content,
redirect_page_id: nil,
message: nil,
lines_count: lines.length,
tree_sha256: tree_sha,
created_at: at,
updated_at: at)
rows = line_ids.each_with_index.map do |line_id, pos|
{ wiki_revision_id: rev.id,
wiki_line_id: line_id,
position: pos }
end
WikiRevisionLine.insert_all!(rows)
end
base_revision_id = rev.id
end
end
end
end
+50 -22
ファイルの表示
@@ -3,9 +3,10 @@ namespace :nico do
task sync: :environment do
require 'json'
require 'nokogiri'
require 'open3'
require 'open-uri'
require 'open3'
require 'set'
require 'time'
fetch_thumbnail = -> url do
html = URI.open(url, read_timeout: 60, 'User-Agent' => 'Mozilla/5.0').read
@@ -14,9 +15,9 @@ namespace :nico do
doc.at('meta[name="thumbnail"]')&.[]('content').presence
end
def sync_post_tags! post, desired_tag_ids
def sync_post_tags! post, desired_tag_ids, current_ids: nil
current_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set
desired_ids = desired_tag_ids.compact.to_set
current_ids = post.tags.pluck(:id).to_set
to_add = desired_ids - current_ids
to_remove = current_ids - desired_ids
@@ -45,14 +46,43 @@ namespace :nico do
data = JSON.parse(stdout)
data.each do |datum|
post = Post.where('url LIKE ?', '%nicovideo.jp%').find { |post|
post.url =~ %r{#{ Regexp.escape(datum['code']) }(?!\d)}
}
unless post
title = datum['title']
url = "https://www.nicovideo.jp/watch/#{ datum['code'] }"
thumbnail_base = fetch_thumbnail.(url) || '' rescue ''
post = Post.new(title:, url:, thumbnail_base:, uploaded_user: nil)
code = datum['code']
post =
Post
.where('url REGEXP ?', "nicovideo\\.jp/watch/#{ Regexp.escape(code) }([^0-9]|$)")
.first
title = datum['title']
original_created_at = datum['uploaded_at'] &&
Time.strptime(datum['uploaded_at'], '%Y-%m-%d %H:%M:%S')
original_created_from = original_created_at&.change(sec: 0)
original_created_before = original_created_from&.+(1.minute)
if post
attrs = { title:, original_created_from:, original_created_before: }
unless post.thumbnail.attached?
thumbnail_base = fetch_thumbnail.(post.url) rescue nil
if thumbnail_base.present?
post.thumbnail.attach(
io: URI.open(thumbnail_base),
filename: File.basename(URI.parse(thumbnail_base).path),
content_type: 'image/jpeg')
attrs[:thumbnail_base] = thumbnail_base
end
end
post.assign_attributes(attrs)
if post.changed?
post.save!
post.resized_thumbnail! if post.thumbnail.attached?
end
else
url = "https://www.nicovideo.jp/watch/#{ code }"
thumbnail_base = fetch_thumbnail.(url) rescue nil
post = Post.new(title:, url:, thumbnail_base:, uploaded_user: nil,
original_created_from:, original_created_before:)
if thumbnail_base.present?
post.thumbnail.attach(
io: URI.open(thumbnail_base),
@@ -61,24 +91,22 @@ namespace :nico do
end
post.save!
post.resized_thumbnail!
sync_post_tags!(post, [Tag.tagme.id])
sync_post_tags!(post, [Tag.tagme.id, Tag.bot.id, Tag.niconico.id, Tag.video.id])
end
kept_tags = post.tags.reload
kept_non_nico_ids = kept_tags.where.not(category: 'nico').pluck(:id).to_set
kept_ids = PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set
kept_non_nico_ids = post.tags.where.not(category: 'nico').pluck(:id).to_set
desired_nico_ids = []
desired_non_nico_ids = []
datum['tags'].each do |raw|
name = "nico:#{ raw }"
tag = Tag.find_or_initialize_by(name:) do |t|
t.category = 'nico'
end
tag.save! if tag.new_record?
tag = Tag.find_or_create_by_tag_name!(name, category: 'nico')
desired_nico_ids << tag.id
unless tag.in?(kept_tags)
desired_non_nico_ids.concat(tag.linked_tags.pluck(:id))
desired_nico_ids.concat(tag.linked_tags.pluck(:id))
unless tag.id.in?(kept_ids)
linked_ids = tag.linked_tags.pluck(:id)
desired_non_nico_ids.concat(linked_ids)
desired_nico_ids.concat(linked_ids)
end
end
desired_nico_ids.uniq!
@@ -94,7 +122,7 @@ namespace :nico do
end
desired_all_ids.uniq!
sync_post_tags!(post, desired_all_ids)
sync_post_tags!(post, desired_all_ids, current_ids: kept_ids)
end
end
end