From 39101ca422b8144c46e682118c741ab3a7af090d Mon Sep 17 00:00:00 2001 From: miteruzo Date: Sat, 2 May 2026 02:48:40 +0900 Subject: [PATCH] #314 --- backend/app/models/tag.rb | 1 + .../discovery/youtube_video_discoverer.rb | 46 ++++++ backend/app/services/post_importer.rb | 51 ++++++ .../services/video_sources/youtube/client.rb | 87 ++++++++++ backend/app/services/youtube/api_client.rb | 68 ++++++++ backend/app/services/youtube/search_client.rb | 34 ++++ backend/app/services/youtube/sync.rb | 152 ++++++++++++++++++ backend/app/services/youtube/video_item.rb | 29 ++++ backend/lib/tasks/sync_posts.rake | 6 + 9 files changed, 474 insertions(+) create mode 100644 backend/app/services/discovery/youtube_video_discoverer.rb create mode 100644 backend/app/services/post_importer.rb create mode 100644 backend/app/services/video_sources/youtube/client.rb create mode 100644 backend/app/services/youtube/api_client.rb create mode 100644 backend/app/services/youtube/search_client.rb create mode 100644 backend/app/services/youtube/sync.rb create mode 100644 backend/app/services/youtube/video_item.rb create mode 100644 backend/lib/tasks/sync_posts.rake diff --git a/backend/app/models/tag.rb b/backend/app/models/tag.rb index 54c3d68..acdc5c7 100644 --- a/backend/app/models/tag.rb +++ b/backend/app/models/tag.rb @@ -84,6 +84,7 @@ class Tag < ApplicationRecord def self.no_deerjikist = find_or_create_by_tag_name!('ニジラー情報不詳', category: :meta) def self.video = find_or_create_by_tag_name!('動画', category: :meta) def self.niconico = find_or_create_by_tag_name!('ニコニコ', category: :meta) + def self.youtube = find_or_create_by_tag_name!('YouTube', category: :meta) def self.normalise_tags tag_names, with_tagme: true, with_no_deerjikist: true, diff --git a/backend/app/services/discovery/youtube_video_discoverer.rb b/backend/app/services/discovery/youtube_video_discoverer.rb new file mode 100644 index 0000000..70eac89 --- /dev/null +++ b/backend/app/services/discovery/youtube_video_discoverer.rb @@ -0,0 +1,46 @@ +class Discovery::YoutubeVideoDiscoverer + def initialize(client: Youtube::SearchClient.new) + @client = client + end + + def call discovery_query:, published_after:, published_before: + body = @client.search_videos( + query: discovery_query.query, + published_after: published_after, + published_before: published_before) + + body.fetch('items', []).each do |item| + next unless item.dig('id', 'kind') == 'youtube#video' + + upsert_candidate!(discovery_query, item) + end + end + + private + + def upsert_candidate! discovery_query, item + snippet = item.fetch('snippet') + code = item.fetch('id').fetch('videoId') + + candidate = VideoCandidate.find_or_initialize_by(provider: 'youtube', code:) + + candidate.title = snippet['title'].to_s + candidate.description = snippet['description'].to_s + candidate.channel_code = snippet['channelId'] + candidate.channel_title = snippet['channelTitle'] + candidate.published_at = Time.zone.parse(snippet['publishedAt']) + candidate.thumbnail_url = snippet.dig('thumbnails', 'high', 'url') + candidate.raw_data = item + candidate.last_discovered_at = Time.current + candidate.save! + + VideoCandidateHit.find_or_create_by!( + video_candidate: candidate, + discovery_query: discovery_query + ) do |hit| + hit.matched_field = 'youtube_search' + hit.score = 0 + hit.searched_at = Time.current + end + end +end diff --git a/backend/app/services/post_importer.rb b/backend/app/services/post_importer.rb new file mode 100644 index 0000000..6d86287 --- /dev/null +++ b/backend/app/services/post_importer.rb @@ -0,0 +1,51 @@ +class PostImporter + def initialize provider: + @provider = provider + end + + def import_posts source_videos + alive_codes = [] + + ApplicationRecord.transaction do + source_videos.each do |source_video| + alive_codes << source_video.fetch(:code) + + deerjikist_tag = deerjikist_tag_of(source_video) + video = upsert_post(source_video, deerjikist_tag) + end + end + end + + private + + attr_reader :provider + + def deerjikist_tag_of source_video + user_code = source_video[:user_code] + return nil if user_code.blank? + + deerjikist&.tag + end + + def upsert_post source_video, deerjikist_tag + url = + case provider + when 'youtube' + "https://youtube.com/watch/#{ source_video.fetch(:code) }" + end + raise 'ちんぽ!' if url.blank? + + Post.find_or_initialize_by(url:).tap do |post| + post.title = source_viedo.fetch(:title) + post.uploaded_user_id = nil + post.original_created_from = source_video.fetch(:uploaded_at) + post.original_created_before = post.original_created_from + 1.min + post.tags << (deerjikist_tag || no_deerjikist) + post.tags << Tag.youtube + post.tags << Tag.video + post.tags << Tag.bot + post.tags << Tag.tagme + post.save! + end + end +end diff --git a/backend/app/services/video_sources/youtube/client.rb b/backend/app/services/video_sources/youtube/client.rb new file mode 100644 index 0000000..ac797f2 --- /dev/null +++ b/backend/app/services/video_sources/youtube/client.rb @@ -0,0 +1,87 @@ +module VideoSources + module Youtube + class Client + API_BASE = 'https://www.googleapis.com/youtube/v3' + + def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') + @api_key = api_key + end + + def videos video_ids + return [] if video_ids.empty? + + response = connection.get('videos', part: 'snippet,statistics', + id: video_ids.join(','), + key: @api_key) + + JSON.parse(response.body).fetch('items', []).map do |item| + build_video(item) + end + end + + def comments video_id + comments = [] + page_token = nil + + loop do + response = connection.get('commentThreads', { + part: 'snippet', + videoId: video_id, + maxResults: 100, + textFormat: 'plainText', + pageToken: page_token, + key: @api_key }.compact) + + body = JSON.parse(response.body) + + comments.concat(body.fetch('items', []).map { |item| build_comment(item) }) + + page_token = body['nextPageToken'] + break if page_token.blank? + end + + comments + rescue Faraday::ForbiddenError + [] + end + + private + + def connection + @connection ||= Faraday.new(url: API_BASE) do |faraday| + faraday.response :raise_error + end + end + + def build_video item + snippet = item.fetch('snippet') + statistics = item.fetch('statistics', { }) + + { provider: 'youtube', + code: item.fetch('id'), + user_code: snippet['channelId'], + title: snippet['title'].to_s, + description: snippet['description'].to_s, + tag_names: snippet.fetch('tags', []), + views_count: statistics.fetch('viewCount', 0).to_i, + uploaded_at: Time.zone.parse(snippet.fetch('publishedAt')) } + end + + def build_comment item + snippet = + item + .fetch('snippet') + .fetch('topLevelComment') + .fetch('snippet') + + { provider_comment_id: item.fetch('id'), + user_code: snippet['authorChannelId']&.fetch('value', nil), + content: snippet['textDisplay'].to_s, + posted_at: Time.zone.parse(snippet.fetch('publishedAt')), + reaction_count: snippet.fetch('likeCount', 0).to_i, + comment_no: nil, + vpos_ms: nil } + end + end + end +end diff --git a/backend/app/services/youtube/api_client.rb b/backend/app/services/youtube/api_client.rb new file mode 100644 index 0000000..8ab0b04 --- /dev/null +++ b/backend/app/services/youtube/api_client.rb @@ -0,0 +1,68 @@ +require 'json' +require 'net/http' +require 'uri' + + +module Youtube + class ApiClient + ENDPOINT = 'https://www.googleapis.com/youtube/v3' + + def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') + @api_key = api_key + end + + def search_videos q:, published_after:, page_token: nil + get_json('/search', { + part: 'snippet', + type: 'video', + q: q, + order: 'date', + maxResults: 50, + publishedAfter: published_after.iso8601, + pageToken: page_token }.compact) + end + + def videos ids + return { 'items' => [] } if ids.empty? + + get_json('/videos', part: 'snippet,status,contentDetails', id: ids.join (',')) + end + + def playlist_items playlist_id:, page_token: nil + get_json('/playlistItems', { + part: 'snippet,contentDetails,status', + playlistId: playlist_id, + maxResults: 50, + pageToken: page_token }.compact) + end + + def channel id: nil, handle: nil + params = { part: 'snippet,contentDetails' } + params[:id] = id if id + params[:forHandle] = handle if handle + + get_json('/channels', params) + end + + private + + def get_json path, params + uri = URI(ENDPOINT + path) + uri.query = URI.encode_www_form(params.merge(key: @api_key)) + + response = Net::HTTP.start(uri.host, + uri.port, + use_ssl: true, + open_timeout: 10, + read_timeout: 30) do |http| + http.get(uri) + end + + unless response.is_a?(Net::HTTPSuccess) + raise "YouTube API error: #{ response.code } #{ response.body }" + end + + JSON.parse(response.body) + end + end +end diff --git a/backend/app/services/youtube/search_client.rb b/backend/app/services/youtube/search_client.rb new file mode 100644 index 0000000..66bed09 --- /dev/null +++ b/backend/app/services/youtube/search_client.rb @@ -0,0 +1,34 @@ +module Youtube + class SearchClient + API_BASE = 'https://www.googleapis.com/youtube/v3' + + def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') + @api_key = api_key + end + + def search_videos query:, published_after: nil, published_before: nil, page_token: nil + response = connection.get('search', { + part: 'snippet', + q: query, + type: 'video', + order: 'date', + maxResults: 50, + regionCode: 'JP', + relevanceLanguage: 'ja', + publishedAfter: published_after&.iso8601, + publishedBefore: published_before&.iso8601, + pageToken: page_token, + key: @api_key }.compact) + + JSON.parse(response.body) + end + + private + + def connection + @connection ||= Faraday.new(url: API_BASE) do |faraday| + faraday.response :raise_error + end + end + end +end diff --git a/backend/app/services/youtube/sync.rb b/backend/app/services/youtube/sync.rb new file mode 100644 index 0000000..146441b --- /dev/null +++ b/backend/app/services/youtube/sync.rb @@ -0,0 +1,152 @@ +require 'open-uri' +require 'set' +require 'time' + + +module Youtube + class Sync + def initialize client: ApiClient.new + @client = client + end + + def sync! + video_ids = discover_video_ids + return if video_ids.empty? + + video_ids.each_slice(50) do |ids| + @client.videos(ids).fetch('items', []).each do |item| + sync_video!(VideoItem.new (item)) + end + end + end + + private + + def discover_video_ids + ids = Set.new + + query_terms.each do |q| + response = @client.search_videos(q:, published_after: sync_since) + + response.fetch('items', []).each do |item| + video_id = item.dig('id', 'videoId') + ids << video_id if video_id.present? + end + end + + playlist_ids.each do |playlist_id| + response = @client.playlist_items(playlist_id:) + response.fetch('items', []).each do |item| + video_id = item.dig('contentDetails', 'videoId') + ids << video_id if video_id.present? + end + end + + ids.to_a + end + + def sync_video! video + post = Post.where('url REGEXP ?', youtube_url_regexp(video.id)).first + + original_created_from = video.published_at.change(sec: 0) + original_created_before = original_created_from + 1.minute + + post_created = false + post_changed = false + + if post + post.assign_attributes( + title: video.title, + original_created_from:, + original_created_before:, + thumbnail_base: video.thumbnail_url) + + post_changed = post.changed? + post.save! if post_changed + + attach_thumbnail_if_needed!(post, video.thumbnail_url) + else + post_created = true + post = Post.create!( + title: video.title, + url: video.url, + thumbnail_base: video.thumbnail_url, + uploaded_user: nil, + original_created_from:, + original_created_before:) + + attach_thumbnail_if_needed!(post, video.thumbnail_url) + + sync_post_tags!(post, [Tag.tagme.id, Tag.bot.id, Tag.youtube.id, Tag.video.id]) + end + + kept_tag_ids = post.tags.pluck(:id).to_set + desired_tag_ids = kept_tag_ids.to_a + + deerjikist = Deerjikist.find_by(platform: :youtube, code: video.channel_id) + if deerjikist + desired_tag_ids << deerjikist.tag_id + elsif post.tags.where(category: :deerjikist).none? + desired_tag_ids << Tag.no_deerjikist.id + end + + desired_tag_ids.uniq! + + sync_post_tags!(post, desired_tag_ids, current_tag_ids: kept_tag_ids) + + if post_created + PostVersionRecorder.record!(post:, event_type: :create, created_by_user: nil) + elsif post_changed || kept_tag_ids != desired_tag_ids.to_set + PostVersionRecorder.ensure_snapshot!(post, created_by_user: nil) + PostVersionRecorder.record!(post:, event_type: :update, created_by_user: nil) + end + end + + def sync_post_tags! post, desired_tag_ids, current_tag_ids: nil + current_tag_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set + desired_tag_ids = desired_tag_ids.compact.to_set + + to_add = desired_tag_ids - current_tag_ids + to_remove = current_tag_ids - desired_tag_ids + + Tag.where(id: to_add.to_a).find_each do |tag| + begin + PostTag.create!(post:, tag:) + rescue ActiveRecord::RecordNotUnique + ; + end + end + + PostTag.where(post_id: post.id, tag_id: to_remove.to_a).kept.find_each do |pt| + pt.discard_by!(nil) + end + end + + def attach_thumbnail_if_needed! post, thumbnail_url + return if post.thumbnail.attached? + return if thumbnail_url.blank? + + post.thumbnail.attach( + io: URI.open (thumbnail_url), + filename: File.basename (URI.parse (thumbnail_url).path), + content_type: 'image/jpeg') + + post.resized_thumbnail! + end + + def youtube_url_regexp id + escaped = Regexp.escape(id) + "(youtube\\.com/watch\\?v=#{ escaped }|youtu\\.be/#{ escaped })([^A-Za-z0-9_-]|$)" + end + + def query_terms = ['ぼざろクリーチャーシリーズ', '伊地知ニジカ', '伊地知虹鹿'] + + def playlist_ids + ['PLrOch4zHkI5vu29b-f9umUQQ4tQkuWLPX', + 'PLrOch4zHkI5vOK0RaytQq6PbucxQkkL0K', + 'PLrOch4zHkI5tdwm9vSegiDQJOM-hgpcOC'] + end + + def sync_since = 14.days.ago + end +end diff --git a/backend/app/services/youtube/video_item.rb b/backend/app/services/youtube/video_item.rb new file mode 100644 index 0000000..b8bad47 --- /dev/null +++ b/backend/app/services/youtube/video_item.rb @@ -0,0 +1,29 @@ +module Youtube + class VideoItem + attr_reader :id, :title, :channel_id, :published_at, :thumbnail_url, :raw_tags + + def initialize item + snippet = item.fetch('snippet') + + @id = item.fetch('id') + @title = snippet['title'] + @channel_id = snippet['channelId'] + @published_at = Time.iso8601(snippet['publishedAt']) + @thumbnail_url = pick_thumbnail(snippet['thumbnails'] || { }) + @raw_tags = snippet['tags'] || [] + end + + def url = "https://www.youtube.com/watch?v=#{ @id }" + + private + + def pick_thumbnail thumbnails + ['maxres', 'standard', 'high', 'medium', 'default'].each do |key| + url = thumbnails.dig(key, 'url') + return url if url.present? + end + + nil + end + end +end diff --git a/backend/lib/tasks/sync_posts.rake b/backend/lib/tasks/sync_posts.rake new file mode 100644 index 0000000..267d474 --- /dev/null +++ b/backend/lib/tasks/sync_posts.rake @@ -0,0 +1,6 @@ +namespace :post do + desc '投稿同期(ニコニコ以外)' + task sync: :environment do + Youtube::Sync.new.sync! + end +end