Files
btrc-hub/backend/lib/tasks/sync_nico.rake
みてるぞ bde7d33949 タグ履歴 (#309) (#319)
#309

#309

#309

#309

#309

Merge remote-tracking branch 'origin/main' into feature/309

#309

Co-authored-by: miteruzo <miteruzo@naver.com>
Reviewed-on: #319
2026-04-19 20:21:51 +09:00

162 lines
5.6 KiB
Ruby

namespace :nico do
desc 'ニコニコ DB 同期'
task sync: :environment do
require 'json'
require 'nokogiri'
require 'open-uri'
require 'open3'
require 'set'
require 'time'
fetch_thumbnail = -> url do
html = URI.open(url, read_timeout: 60, 'User-Agent' => 'Mozilla/5.0').read
doc = Nokogiri::HTML(html)
doc.at('meta[name="thumbnail"]')&.[]('content').presence
end
def sync_post_tags! post, desired_tag_ids, current_tag_ids: nil
current_tag_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set
desired_tag_ids = desired_tag_ids.compact.to_set
to_add = desired_tag_ids - current_tag_ids
to_remove = current_tag_ids - desired_tag_ids
Tag.where(id: to_add.to_a).find_each do |tag|
begin
PostTag.create!(post:, tag:)
rescue ActiveRecord::RecordNotUnique
;
end
end
PostTag.where(post_id: post.id, tag_id: to_remove.to_a).kept.find_each do |pt|
pt.discard_by!(nil)
end
end
mysql_user = ENV['MYSQL_USER']
mysql_pass = ENV['MYSQL_PASS']
nizika_nico_path = ENV['NIZIKA_NICO_PATH']
stdout, stderr, status = Open3.capture3(
{ 'MYSQL_USER' => mysql_user, 'MYSQL_PASS' => mysql_pass },
'python3', "#{ nizika_nico_path }/get_videos.py")
unless status.success?
warn stderr
abort
end
data = JSON.parse(stdout)
data.each do |datum|
code = datum['code']
post =
Post
.where('url REGEXP ?', "nicovideo\\.jp/watch/#{ Regexp.escape(code) }([^0-9]|$)")
.first
title = datum['title']
original_created_at = datum['uploaded_at'] &&
Time.strptime(datum['uploaded_at'], '%Y-%m-%d %H:%M:%S')
original_created_from = original_created_at&.change(sec: 0)
original_created_before = original_created_from&.+(1.minute)
post_created = false
post_changed = false
if post
attrs = { title:, original_created_from:, original_created_before: }
unless post.thumbnail.attached?
thumbnail_base = fetch_thumbnail.(post.url) rescue nil
if thumbnail_base.present?
post.thumbnail.attach(
io: URI.open(thumbnail_base),
filename: File.basename(URI.parse(thumbnail_base).path),
content_type: 'image/jpeg')
attrs[:thumbnail_base] = thumbnail_base
end
end
post.assign_attributes(attrs)
post_changed = post.changed?
if post_changed
post.save!
post.resized_thumbnail! if post.thumbnail.attached?
end
else
post_created = true
url = "https://www.nicovideo.jp/watch/#{ code }"
thumbnail_base = fetch_thumbnail.(url) rescue nil
post = Post.new(title:, url:, thumbnail_base:, uploaded_user: nil,
original_created_from:, original_created_before:)
if thumbnail_base.present?
post.thumbnail.attach(
io: URI.open(thumbnail_base),
filename: File.basename(URI.parse(thumbnail_base).path),
content_type: 'image/jpeg')
end
post.save!
post.resized_thumbnail!
sync_post_tags!(post, [Tag.tagme.id, Tag.bot.id, Tag.niconico.id, Tag.video.id])
end
tags = post.tags
# 既存のタグ Id. 集合
kept_tag_ids = tags.pluck(:id).to_set
# うち内部タグ Id. 集合
kept_non_nico_tag_ids = tags.not_nico.pluck(:id).to_set
# 記載すべき外部タグ Id. および連携される内部タグ Id. のリスト
desired_nico_tag_based_ids = []
# 記載すべき内部タグ Id. のリスト
desired_non_nico_tag_ids = []
datum['tags'].each do |raw|
name = TagNameSanitisationRule.sanitise("nico:#{ raw }")
tag = Tag.find_or_create_by_tag_name!(name, category: :nico)
event_type = tag.nico_tag_versions.exists? ? :update : :create
NicoTagVersionRecorder.record!(tag:, event_type:, created_by_user: nil)
desired_nico_tag_based_ids << tag.id
# 新たに記載される外部タグと連携される内部タグを記載
unless tag.id.in?(kept_tag_ids)
linked_ids = tag.linked_tags.pluck(:id)
desired_non_nico_tag_ids.concat(linked_ids)
desired_nico_tag_based_ids.concat(linked_ids)
end
end
deerjikist = Deerjikist.find_by(platform: :nico, code: datum['user'])
if deerjikist
desired_non_nico_tag_ids << deerjikist.tag_id
desired_nico_tag_based_ids << deerjikist.tag_id
elsif !(Tag.where(id: kept_non_nico_tag_ids).where(category: :deerjikist).exists?)
desired_non_nico_tag_ids << Tag.no_deerjikist.id
desired_nico_tag_based_ids << Tag.no_deerjikist.id
end
desired_nico_tag_based_ids.uniq!
desired_all_tag_ids = kept_non_nico_tag_ids.to_a + desired_nico_tag_based_ids
desired_non_nico_tag_ids.concat(kept_non_nico_tag_ids.to_a)
desired_non_nico_tag_ids.uniq!
if kept_non_nico_tag_ids != desired_non_nico_tag_ids.to_set
desired_all_tag_ids << Tag.bot.id
end
desired_all_tag_ids.uniq!
sync_post_tags!(post, desired_all_tag_ids, current_tag_ids: kept_tag_ids)
if post_created
PostVersionRecorder.record!(post:, event_type: :create, created_by_user: nil)
elsif post_changed || kept_tag_ids != desired_all_tag_ids.to_set
PostVersionRecorder.ensure_snapshot!(post, created_by_user: nil)
PostVersionRecorder.record!(post:, event_type: :update, created_by_user: nil)
end
end
end
end