ぼざクリタグ広場 https://hub.nizika.monster
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

157 lines
5.4 KiB

  1. namespace :nico do
  2. desc 'ニコニコ DB 同期'
  3. task sync: :environment do
  4. require 'json'
  5. require 'nokogiri'
  6. require 'open-uri'
  7. require 'open3'
  8. require 'set'
  9. require 'time'
  10. fetch_thumbnail = -> url do
  11. html = URI.open(url, read_timeout: 60, 'User-Agent' => 'Mozilla/5.0').read
  12. doc = Nokogiri::HTML(html)
  13. doc.at('meta[name="thumbnail"]')&.[]('content').presence
  14. end
  15. def sync_post_tags! post, desired_tag_ids, current_tag_ids: nil
  16. current_tag_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set
  17. desired_tag_ids = desired_tag_ids.compact.to_set
  18. to_add = desired_tag_ids - current_tag_ids
  19. to_remove = current_tag_ids - desired_tag_ids
  20. Tag.where(id: to_add.to_a).find_each do |tag|
  21. begin
  22. PostTag.create!(post:, tag:)
  23. rescue ActiveRecord::RecordNotUnique
  24. ;
  25. end
  26. end
  27. PostTag.where(post_id: post.id, tag_id: to_remove.to_a).kept.find_each do |pt|
  28. pt.discard_by!(nil)
  29. end
  30. end
  31. mysql_user = ENV['MYSQL_USER']
  32. mysql_pass = ENV['MYSQL_PASS']
  33. nizika_nico_path = ENV['NIZIKA_NICO_PATH']
  34. stdout, stderr, status = Open3.capture3(
  35. { 'MYSQL_USER' => mysql_user, 'MYSQL_PASS' => mysql_pass },
  36. 'python3', "#{ nizika_nico_path }/get_videos.py")
  37. unless status.success?
  38. warn stderr
  39. abort
  40. end
  41. data = JSON.parse(stdout)
  42. data.each do |datum|
  43. code = datum['code']
  44. post =
  45. Post
  46. .where('url REGEXP ?', "nicovideo\\.jp/watch/#{ Regexp.escape(code) }([^0-9]|$)")
  47. .first
  48. title = datum['title']
  49. original_created_at = datum['uploaded_at'] &&
  50. Time.strptime(datum['uploaded_at'], '%Y-%m-%d %H:%M:%S')
  51. original_created_from = original_created_at&.change(sec: 0)
  52. original_created_before = original_created_from&.+(1.minute)
  53. post_created = false
  54. post_changed = false
  55. if post
  56. attrs = { title:, original_created_from:, original_created_before: }
  57. unless post.thumbnail.attached?
  58. thumbnail_base = fetch_thumbnail.(post.url) rescue nil
  59. if thumbnail_base.present?
  60. post.thumbnail.attach(
  61. io: URI.open(thumbnail_base),
  62. filename: File.basename(URI.parse(thumbnail_base).path),
  63. content_type: 'image/jpeg')
  64. attrs[:thumbnail_base] = thumbnail_base
  65. end
  66. end
  67. post.assign_attributes(attrs)
  68. post_changed = post.changed?
  69. if post_changed
  70. post.save!
  71. post.resized_thumbnail! if post.thumbnail.attached?
  72. end
  73. else
  74. post_created = true
  75. url = "https://www.nicovideo.jp/watch/#{ code }"
  76. thumbnail_base = fetch_thumbnail.(url) rescue nil
  77. post = Post.new(title:, url:, thumbnail_base:, uploaded_user: nil,
  78. original_created_from:, original_created_before:)
  79. if thumbnail_base.present?
  80. post.thumbnail.attach(
  81. io: URI.open(thumbnail_base),
  82. filename: File.basename(URI.parse(thumbnail_base).path),
  83. content_type: 'image/jpeg')
  84. end
  85. post.save!
  86. post.resized_thumbnail!
  87. sync_post_tags!(post, [Tag.tagme.id, Tag.bot.id, Tag.niconico.id, Tag.video.id])
  88. end
  89. tags = post.tags
  90. # 既存のタグ Id. 集合
  91. kept_tag_ids = tags.pluck(:id).to_set
  92. # うち内部タグ Id. 集合
  93. kept_non_nico_tag_ids = tags.not_nico.pluck(:id).to_set
  94. # 記載すべき外部タグ Id. および連携される内部タグ Id. のリスト
  95. desired_nico_tag_based_ids = []
  96. # 記載すべき内部タグ Id. のリスト
  97. desired_non_nico_tag_ids = []
  98. datum['tags'].each do |raw|
  99. name = TagNameSanitisationRule.sanitise("nico:#{ raw }")
  100. tag = Tag.find_or_create_by_tag_name!(name, category: :nico)
  101. desired_nico_tag_based_ids << tag.id
  102. # 新たに記載される外部タグと連携される内部タグを記載
  103. unless tag.id.in?(kept_tag_ids)
  104. linked_ids = tag.linked_tags.pluck(:id)
  105. desired_non_nico_tag_ids.concat(linked_ids)
  106. desired_nico_tag_based_ids.concat(linked_ids)
  107. end
  108. end
  109. deerjikist = Deerjikist.find_by(platform: :nico, code: datum['user'])
  110. if deerjikist
  111. desired_non_nico_tag_ids << deerjikist.tag_id
  112. desired_nico_tag_based_ids << deerjikist.tag_id
  113. elsif !(Tag.where(id: kept_non_nico_tag_ids).where(category: :deerjikist).exists?)
  114. desired_non_nico_tag_ids << Tag.no_deerjikist.id
  115. desired_nico_tag_based_ids << Tag.no_deerjikist.id
  116. end
  117. desired_nico_tag_based_ids.uniq!
  118. desired_all_tag_ids = kept_non_nico_tag_ids.to_a + desired_nico_tag_based_ids
  119. desired_non_nico_tag_ids.concat(kept_non_nico_tag_ids.to_a)
  120. desired_non_nico_tag_ids.uniq!
  121. if kept_non_nico_tag_ids != desired_non_nico_tag_ids.to_set
  122. desired_all_tag_ids << Tag.bot.id
  123. end
  124. desired_all_tag_ids.uniq!
  125. sync_post_tags!(post, desired_all_tag_ids, current_tag_ids: kept_tag_ids)
  126. if post_created
  127. PostVersionRecorder.record!(post:, event_type: :create, created_by_user: nil)
  128. elsif post_changed || kept_tag_ids != desired_all_tag_ids.to_set
  129. PostVersionRecorder.record!(post:, event_type: :update, created_by_user: nil)
  130. end
  131. end
  132. end
  133. end