namespace :nico do desc 'ニコニコ DB 同期' task sync: :environment do require 'json' require 'open3' require 'open-uri' require 'nokogiri' require 'set' require 'time' fetch_thumbnail = -> url do html = URI.open(url, read_timeout: 60, 'User-Agent' => 'Mozilla/5.0').read doc = Nokogiri::HTML(html) doc.at('meta[name="thumbnail"]')&.[]('content').presence end def sync_post_tags! post, desired_tag_ids, current_ids: nil current_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set desired_ids = desired_tag_ids.compact.to_set to_add = desired_ids - current_ids to_remove = current_ids - desired_ids Tag.where(id: to_add.to_a).find_each do |tag| begin PostTag.create!(post:, tag:) rescue ActiveRecord::RecordNotUnique ; end end PostTag.where(post_id: post.id, tag_id: to_remove.to_a).kept.find_each do |pt| pt.discard_by!(nil) end end mysql_user = ENV['MYSQL_USER'] mysql_pass = ENV['MYSQL_PASS'] nizika_nico_path = ENV['NIZIKA_NICO_PATH'] stdout, stderr, status = Open3.capture3( { 'MYSQL_USER' => mysql_user, 'MYSQL_PASS' => mysql_pass }, 'python3', "#{ nizika_nico_path }/get_videos.py") abort unless status.success? data = JSON.parse(stdout) data.each do |datum| code = datum['code'] post = Post .where('url REGEXP ?', "nicovideo\\.jp/watch/#{ Regexp.escape(code) }([^0-9]|$)") .first title = datum['title'] original_created_at = datum['uploaded_at'] && Time.strptime(datum['uploaded_at'], '%Y-%m-%d %H:%M:%S') original_created_from = original_created_at&.change(sec: 0) original_created_before = original_created_from&.+(1.minute) if post attrs = { title:, original_created_from:, original_created_before: } unless post.thumbnail.attached? thumbnail_base = fetch_thumbnail.(post.url) rescue nil if thumbnail_base.present? post.thumbnail.attach( io: URI.open(thumbnail_base), filename: File.basename(URI.parse(thumbnail_base).path), content_type: 'image/jpeg') attrs[:thumbnail_base] = thumbnail_base end end post.assign_attributes(attrs) if post.changed? post.save! post.resized_thumbnail! if post.thumbnail.attached? end else url = "https://www.nicovideo.jp/watch/#{ code }" thumbnail_base = fetch_thumbnail.(url) rescue nil post = Post.new(title:, url:, thumbnail_base:, uploaded_user: nil, original_created_from:, original_created_before:) if thumbnail_base.present? post.thumbnail.attach( io: URI.open(thumbnail_base), filename: File.basename(URI.parse(thumbnail_base).path), content_type: 'image/jpeg') end post.save! post.resized_thumbnail! sync_post_tags!(post, [Tag.tagme.id]) end kept_ids = PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set kept_non_nico_ids = post.tags.where.not(category: 'nico').pluck(:id).to_set desired_nico_ids = [] desired_non_nico_ids = [] datum['tags'].each do |raw| name = "nico:#{ raw }" tag = Tag.find_or_create_by_tag_name!(name, category: 'nico') desired_nico_ids << tag.id unless tag.id.in?(kept_ids) linked_ids = tag.linked_tags.pluck(:id) desired_non_nico_ids.concat(linked_ids) desired_nico_ids.concat(linked_ids) end end desired_nico_ids.uniq! desired_all_ids = kept_non_nico_ids.to_a + desired_nico_ids desired_non_nico_ids.concat(kept_non_nico_ids.to_a) desired_non_nico_ids.uniq! if kept_non_nico_ids.to_set != desired_non_nico_ids.to_set desired_all_ids << Tag.bot.id end desired_all_ids.uniq! sync_post_tags!(post, desired_all_ids, current_ids: kept_ids) end end end