ぼざクリタグ広場 https://hub.nizika.monster
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

103 lines
3.5 KiB

  1. namespace :nico do
  2. desc 'ニコニコ DB 同期'
  3. task sync: :environment do
  4. require 'open3'
  5. require 'open-uri'
  6. require 'nokogiri'
  7. require 'set'
  8. fetch_thumbnail = -> url do
  9. html = URI.open(url, read_timeout: 60, 'User-Agent' => 'Mozilla/5.0').read
  10. doc = Nokogiri::HTML(html)
  11. doc.at('meta[name="thumbnail"]')&.[]('content').presence
  12. end
  13. def sync_post_tags! post, desired_tag_ids, current_ids: nil
  14. current_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set
  15. desired_ids = desired_tag_ids.compact.to_set
  16. to_add = desired_ids - current_ids
  17. to_remove = current_ids - desired_ids
  18. Tag.where(id: to_add.to_a).find_each do |tag|
  19. begin
  20. PostTag.create!(post:, tag:)
  21. rescue ActiveRecord::RecordNotUnique
  22. ;
  23. end
  24. end
  25. PostTag.where(post_id: post.id, tag_id: to_remove.to_a).kept.find_each do |pt|
  26. pt.discard_by!(nil)
  27. end
  28. end
  29. mysql_user = ENV['MYSQL_USER']
  30. mysql_pass = ENV['MYSQL_PASS']
  31. nizika_nico_path = ENV['NIZIKA_NICO_PATH']
  32. stdout, stderr, status = Open3.capture3(
  33. { 'MYSQL_USER' => mysql_user, 'MYSQL_PASS' => mysql_pass },
  34. 'python3', "#{ nizika_nico_path }/get_videos.py")
  35. abort unless status.success?
  36. data = JSON.parse(stdout)
  37. data.each do |datum|
  38. code = datum['code']
  39. post = Post.where('url REGEXP ?', "nicovideo\\.jp/watch/#{ Regexp.escape(code) }([^0-9]|$)")
  40. .first
  41. title = datum['title']
  42. original_created_at = datum['uploaded_at'] &&
  43. Time.strptime(datum['uploaded_at'], '%Y-%m-%d %H:%M:%S')
  44. original_created_from = original_created_at&.change(sec: 0)
  45. original_created_before = original_created_from&.+(1.minute)
  46. if post
  47. attrs = { title:, original_created_from:, original_created_before: }
  48. post.update!(attrs) if attrs.any? { |k, v| post.public_send(k) != v }
  49. else
  50. url = "https://www.nicovideo.jp/watch/#{ code }"
  51. thumbnail_base = fetch_thumbnail.(url) rescue nil
  52. post = Post.new(title:, url:, thumbnail_base:, uploaded_user: nil,
  53. original_created_from:, original_created_before:)
  54. if thumbnail_base.present?
  55. post.thumbnail.attach(
  56. io: URI.open(thumbnail_base),
  57. filename: File.basename(URI.parse(thumbnail_base).path),
  58. content_type: 'image/jpeg')
  59. end
  60. post.save!
  61. post.resized_thumbnail!
  62. sync_post_tags!(post, [Tag.tagme.id])
  63. end
  64. kept_ids = PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set
  65. kept_non_nico_ids = post.tags.where.not(category: 'nico').pluck(:id).to_set
  66. desired_nico_ids = []
  67. desired_non_nico_ids = []
  68. datum['tags'].each do |raw|
  69. name = "nico:#{ raw }"
  70. tag = Tag.find_or_create_by_tag_name!(name, category: 'nico')
  71. desired_nico_ids << tag.id
  72. unless tag.id.in?(kept_ids)
  73. linked_ids = tag.linked_tags.pluck(:id)
  74. desired_non_nico_ids.concat(linked_ids)
  75. desired_nico_ids.concat(linked_ids)
  76. end
  77. end
  78. desired_nico_ids.uniq!
  79. desired_all_ids = kept_non_nico_ids.to_a + desired_nico_ids
  80. desired_non_nico_ids.concat(kept_non_nico_ids.to_a)
  81. desired_non_nico_ids.uniq!
  82. if kept_non_nico_ids.to_set != desired_non_nico_ids.to_set
  83. desired_all_ids << Tag.bot.id
  84. end
  85. desired_all_ids.uniq!
  86. sync_post_tags!(post, desired_all_ids, current_ids: kept_ids)
  87. end
  88. end
  89. end