From 39101ca422b8144c46e682118c741ab3a7af090d Mon Sep 17 00:00:00 2001 From: miteruzo Date: Sat, 2 May 2026 02:48:40 +0900 Subject: [PATCH 1/5] #314 --- backend/app/models/tag.rb | 1 + .../discovery/youtube_video_discoverer.rb | 46 ++++++ backend/app/services/post_importer.rb | 51 ++++++ .../services/video_sources/youtube/client.rb | 87 ++++++++++ backend/app/services/youtube/api_client.rb | 68 ++++++++ backend/app/services/youtube/search_client.rb | 34 ++++ backend/app/services/youtube/sync.rb | 152 ++++++++++++++++++ backend/app/services/youtube/video_item.rb | 29 ++++ backend/lib/tasks/sync_posts.rake | 6 + 9 files changed, 474 insertions(+) create mode 100644 backend/app/services/discovery/youtube_video_discoverer.rb create mode 100644 backend/app/services/post_importer.rb create mode 100644 backend/app/services/video_sources/youtube/client.rb create mode 100644 backend/app/services/youtube/api_client.rb create mode 100644 backend/app/services/youtube/search_client.rb create mode 100644 backend/app/services/youtube/sync.rb create mode 100644 backend/app/services/youtube/video_item.rb create mode 100644 backend/lib/tasks/sync_posts.rake diff --git a/backend/app/models/tag.rb b/backend/app/models/tag.rb index 54c3d68..acdc5c7 100644 --- a/backend/app/models/tag.rb +++ b/backend/app/models/tag.rb @@ -84,6 +84,7 @@ class Tag < ApplicationRecord def self.no_deerjikist = find_or_create_by_tag_name!('ニジラー情報不詳', category: :meta) def self.video = find_or_create_by_tag_name!('動画', category: :meta) def self.niconico = find_or_create_by_tag_name!('ニコニコ', category: :meta) + def self.youtube = find_or_create_by_tag_name!('YouTube', category: :meta) def self.normalise_tags tag_names, with_tagme: true, with_no_deerjikist: true, diff --git a/backend/app/services/discovery/youtube_video_discoverer.rb b/backend/app/services/discovery/youtube_video_discoverer.rb new file mode 100644 index 0000000..70eac89 --- /dev/null +++ b/backend/app/services/discovery/youtube_video_discoverer.rb @@ -0,0 +1,46 @@ +class Discovery::YoutubeVideoDiscoverer + def initialize(client: Youtube::SearchClient.new) + @client = client + end + + def call discovery_query:, published_after:, published_before: + body = @client.search_videos( + query: discovery_query.query, + published_after: published_after, + published_before: published_before) + + body.fetch('items', []).each do |item| + next unless item.dig('id', 'kind') == 'youtube#video' + + upsert_candidate!(discovery_query, item) + end + end + + private + + def upsert_candidate! discovery_query, item + snippet = item.fetch('snippet') + code = item.fetch('id').fetch('videoId') + + candidate = VideoCandidate.find_or_initialize_by(provider: 'youtube', code:) + + candidate.title = snippet['title'].to_s + candidate.description = snippet['description'].to_s + candidate.channel_code = snippet['channelId'] + candidate.channel_title = snippet['channelTitle'] + candidate.published_at = Time.zone.parse(snippet['publishedAt']) + candidate.thumbnail_url = snippet.dig('thumbnails', 'high', 'url') + candidate.raw_data = item + candidate.last_discovered_at = Time.current + candidate.save! + + VideoCandidateHit.find_or_create_by!( + video_candidate: candidate, + discovery_query: discovery_query + ) do |hit| + hit.matched_field = 'youtube_search' + hit.score = 0 + hit.searched_at = Time.current + end + end +end diff --git a/backend/app/services/post_importer.rb b/backend/app/services/post_importer.rb new file mode 100644 index 0000000..6d86287 --- /dev/null +++ b/backend/app/services/post_importer.rb @@ -0,0 +1,51 @@ +class PostImporter + def initialize provider: + @provider = provider + end + + def import_posts source_videos + alive_codes = [] + + ApplicationRecord.transaction do + source_videos.each do |source_video| + alive_codes << source_video.fetch(:code) + + deerjikist_tag = deerjikist_tag_of(source_video) + video = upsert_post(source_video, deerjikist_tag) + end + end + end + + private + + attr_reader :provider + + def deerjikist_tag_of source_video + user_code = source_video[:user_code] + return nil if user_code.blank? + + deerjikist&.tag + end + + def upsert_post source_video, deerjikist_tag + url = + case provider + when 'youtube' + "https://youtube.com/watch/#{ source_video.fetch(:code) }" + end + raise 'ちんぽ!' if url.blank? + + Post.find_or_initialize_by(url:).tap do |post| + post.title = source_viedo.fetch(:title) + post.uploaded_user_id = nil + post.original_created_from = source_video.fetch(:uploaded_at) + post.original_created_before = post.original_created_from + 1.min + post.tags << (deerjikist_tag || no_deerjikist) + post.tags << Tag.youtube + post.tags << Tag.video + post.tags << Tag.bot + post.tags << Tag.tagme + post.save! + end + end +end diff --git a/backend/app/services/video_sources/youtube/client.rb b/backend/app/services/video_sources/youtube/client.rb new file mode 100644 index 0000000..ac797f2 --- /dev/null +++ b/backend/app/services/video_sources/youtube/client.rb @@ -0,0 +1,87 @@ +module VideoSources + module Youtube + class Client + API_BASE = 'https://www.googleapis.com/youtube/v3' + + def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') + @api_key = api_key + end + + def videos video_ids + return [] if video_ids.empty? + + response = connection.get('videos', part: 'snippet,statistics', + id: video_ids.join(','), + key: @api_key) + + JSON.parse(response.body).fetch('items', []).map do |item| + build_video(item) + end + end + + def comments video_id + comments = [] + page_token = nil + + loop do + response = connection.get('commentThreads', { + part: 'snippet', + videoId: video_id, + maxResults: 100, + textFormat: 'plainText', + pageToken: page_token, + key: @api_key }.compact) + + body = JSON.parse(response.body) + + comments.concat(body.fetch('items', []).map { |item| build_comment(item) }) + + page_token = body['nextPageToken'] + break if page_token.blank? + end + + comments + rescue Faraday::ForbiddenError + [] + end + + private + + def connection + @connection ||= Faraday.new(url: API_BASE) do |faraday| + faraday.response :raise_error + end + end + + def build_video item + snippet = item.fetch('snippet') + statistics = item.fetch('statistics', { }) + + { provider: 'youtube', + code: item.fetch('id'), + user_code: snippet['channelId'], + title: snippet['title'].to_s, + description: snippet['description'].to_s, + tag_names: snippet.fetch('tags', []), + views_count: statistics.fetch('viewCount', 0).to_i, + uploaded_at: Time.zone.parse(snippet.fetch('publishedAt')) } + end + + def build_comment item + snippet = + item + .fetch('snippet') + .fetch('topLevelComment') + .fetch('snippet') + + { provider_comment_id: item.fetch('id'), + user_code: snippet['authorChannelId']&.fetch('value', nil), + content: snippet['textDisplay'].to_s, + posted_at: Time.zone.parse(snippet.fetch('publishedAt')), + reaction_count: snippet.fetch('likeCount', 0).to_i, + comment_no: nil, + vpos_ms: nil } + end + end + end +end diff --git a/backend/app/services/youtube/api_client.rb b/backend/app/services/youtube/api_client.rb new file mode 100644 index 0000000..8ab0b04 --- /dev/null +++ b/backend/app/services/youtube/api_client.rb @@ -0,0 +1,68 @@ +require 'json' +require 'net/http' +require 'uri' + + +module Youtube + class ApiClient + ENDPOINT = 'https://www.googleapis.com/youtube/v3' + + def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') + @api_key = api_key + end + + def search_videos q:, published_after:, page_token: nil + get_json('/search', { + part: 'snippet', + type: 'video', + q: q, + order: 'date', + maxResults: 50, + publishedAfter: published_after.iso8601, + pageToken: page_token }.compact) + end + + def videos ids + return { 'items' => [] } if ids.empty? + + get_json('/videos', part: 'snippet,status,contentDetails', id: ids.join (',')) + end + + def playlist_items playlist_id:, page_token: nil + get_json('/playlistItems', { + part: 'snippet,contentDetails,status', + playlistId: playlist_id, + maxResults: 50, + pageToken: page_token }.compact) + end + + def channel id: nil, handle: nil + params = { part: 'snippet,contentDetails' } + params[:id] = id if id + params[:forHandle] = handle if handle + + get_json('/channels', params) + end + + private + + def get_json path, params + uri = URI(ENDPOINT + path) + uri.query = URI.encode_www_form(params.merge(key: @api_key)) + + response = Net::HTTP.start(uri.host, + uri.port, + use_ssl: true, + open_timeout: 10, + read_timeout: 30) do |http| + http.get(uri) + end + + unless response.is_a?(Net::HTTPSuccess) + raise "YouTube API error: #{ response.code } #{ response.body }" + end + + JSON.parse(response.body) + end + end +end diff --git a/backend/app/services/youtube/search_client.rb b/backend/app/services/youtube/search_client.rb new file mode 100644 index 0000000..66bed09 --- /dev/null +++ b/backend/app/services/youtube/search_client.rb @@ -0,0 +1,34 @@ +module Youtube + class SearchClient + API_BASE = 'https://www.googleapis.com/youtube/v3' + + def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') + @api_key = api_key + end + + def search_videos query:, published_after: nil, published_before: nil, page_token: nil + response = connection.get('search', { + part: 'snippet', + q: query, + type: 'video', + order: 'date', + maxResults: 50, + regionCode: 'JP', + relevanceLanguage: 'ja', + publishedAfter: published_after&.iso8601, + publishedBefore: published_before&.iso8601, + pageToken: page_token, + key: @api_key }.compact) + + JSON.parse(response.body) + end + + private + + def connection + @connection ||= Faraday.new(url: API_BASE) do |faraday| + faraday.response :raise_error + end + end + end +end diff --git a/backend/app/services/youtube/sync.rb b/backend/app/services/youtube/sync.rb new file mode 100644 index 0000000..146441b --- /dev/null +++ b/backend/app/services/youtube/sync.rb @@ -0,0 +1,152 @@ +require 'open-uri' +require 'set' +require 'time' + + +module Youtube + class Sync + def initialize client: ApiClient.new + @client = client + end + + def sync! + video_ids = discover_video_ids + return if video_ids.empty? + + video_ids.each_slice(50) do |ids| + @client.videos(ids).fetch('items', []).each do |item| + sync_video!(VideoItem.new (item)) + end + end + end + + private + + def discover_video_ids + ids = Set.new + + query_terms.each do |q| + response = @client.search_videos(q:, published_after: sync_since) + + response.fetch('items', []).each do |item| + video_id = item.dig('id', 'videoId') + ids << video_id if video_id.present? + end + end + + playlist_ids.each do |playlist_id| + response = @client.playlist_items(playlist_id:) + response.fetch('items', []).each do |item| + video_id = item.dig('contentDetails', 'videoId') + ids << video_id if video_id.present? + end + end + + ids.to_a + end + + def sync_video! video + post = Post.where('url REGEXP ?', youtube_url_regexp(video.id)).first + + original_created_from = video.published_at.change(sec: 0) + original_created_before = original_created_from + 1.minute + + post_created = false + post_changed = false + + if post + post.assign_attributes( + title: video.title, + original_created_from:, + original_created_before:, + thumbnail_base: video.thumbnail_url) + + post_changed = post.changed? + post.save! if post_changed + + attach_thumbnail_if_needed!(post, video.thumbnail_url) + else + post_created = true + post = Post.create!( + title: video.title, + url: video.url, + thumbnail_base: video.thumbnail_url, + uploaded_user: nil, + original_created_from:, + original_created_before:) + + attach_thumbnail_if_needed!(post, video.thumbnail_url) + + sync_post_tags!(post, [Tag.tagme.id, Tag.bot.id, Tag.youtube.id, Tag.video.id]) + end + + kept_tag_ids = post.tags.pluck(:id).to_set + desired_tag_ids = kept_tag_ids.to_a + + deerjikist = Deerjikist.find_by(platform: :youtube, code: video.channel_id) + if deerjikist + desired_tag_ids << deerjikist.tag_id + elsif post.tags.where(category: :deerjikist).none? + desired_tag_ids << Tag.no_deerjikist.id + end + + desired_tag_ids.uniq! + + sync_post_tags!(post, desired_tag_ids, current_tag_ids: kept_tag_ids) + + if post_created + PostVersionRecorder.record!(post:, event_type: :create, created_by_user: nil) + elsif post_changed || kept_tag_ids != desired_tag_ids.to_set + PostVersionRecorder.ensure_snapshot!(post, created_by_user: nil) + PostVersionRecorder.record!(post:, event_type: :update, created_by_user: nil) + end + end + + def sync_post_tags! post, desired_tag_ids, current_tag_ids: nil + current_tag_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set + desired_tag_ids = desired_tag_ids.compact.to_set + + to_add = desired_tag_ids - current_tag_ids + to_remove = current_tag_ids - desired_tag_ids + + Tag.where(id: to_add.to_a).find_each do |tag| + begin + PostTag.create!(post:, tag:) + rescue ActiveRecord::RecordNotUnique + ; + end + end + + PostTag.where(post_id: post.id, tag_id: to_remove.to_a).kept.find_each do |pt| + pt.discard_by!(nil) + end + end + + def attach_thumbnail_if_needed! post, thumbnail_url + return if post.thumbnail.attached? + return if thumbnail_url.blank? + + post.thumbnail.attach( + io: URI.open (thumbnail_url), + filename: File.basename (URI.parse (thumbnail_url).path), + content_type: 'image/jpeg') + + post.resized_thumbnail! + end + + def youtube_url_regexp id + escaped = Regexp.escape(id) + "(youtube\\.com/watch\\?v=#{ escaped }|youtu\\.be/#{ escaped })([^A-Za-z0-9_-]|$)" + end + + def query_terms = ['ぼざろクリーチャーシリーズ', '伊地知ニジカ', '伊地知虹鹿'] + + def playlist_ids + ['PLrOch4zHkI5vu29b-f9umUQQ4tQkuWLPX', + 'PLrOch4zHkI5vOK0RaytQq6PbucxQkkL0K', + 'PLrOch4zHkI5tdwm9vSegiDQJOM-hgpcOC'] + end + + def sync_since = 14.days.ago + end +end diff --git a/backend/app/services/youtube/video_item.rb b/backend/app/services/youtube/video_item.rb new file mode 100644 index 0000000..b8bad47 --- /dev/null +++ b/backend/app/services/youtube/video_item.rb @@ -0,0 +1,29 @@ +module Youtube + class VideoItem + attr_reader :id, :title, :channel_id, :published_at, :thumbnail_url, :raw_tags + + def initialize item + snippet = item.fetch('snippet') + + @id = item.fetch('id') + @title = snippet['title'] + @channel_id = snippet['channelId'] + @published_at = Time.iso8601(snippet['publishedAt']) + @thumbnail_url = pick_thumbnail(snippet['thumbnails'] || { }) + @raw_tags = snippet['tags'] || [] + end + + def url = "https://www.youtube.com/watch?v=#{ @id }" + + private + + def pick_thumbnail thumbnails + ['maxres', 'standard', 'high', 'medium', 'default'].each do |key| + url = thumbnails.dig(key, 'url') + return url if url.present? + end + + nil + end + end +end diff --git a/backend/lib/tasks/sync_posts.rake b/backend/lib/tasks/sync_posts.rake new file mode 100644 index 0000000..267d474 --- /dev/null +++ b/backend/lib/tasks/sync_posts.rake @@ -0,0 +1,6 @@ +namespace :post do + desc '投稿同期(ニコニコ以外)' + task sync: :environment do + Youtube::Sync.new.sync! + end +end -- 2.34.1 From e90c4d612e079258562b74f30818b63eae0f017f Mon Sep 17 00:00:00 2001 From: miteruzo Date: Sat, 2 May 2026 03:38:09 +0900 Subject: [PATCH 2/5] #314 --- backend/app/services/post_importer.rb | 51 ----------- .../services/video_sources/youtube/client.rb | 87 ------------------- backend/app/services/youtube/api_client.rb | 2 +- backend/app/services/youtube/search_client.rb | 34 -------- backend/app/services/youtube/sync.rb | 23 +++-- 5 files changed, 12 insertions(+), 185 deletions(-) delete mode 100644 backend/app/services/post_importer.rb delete mode 100644 backend/app/services/video_sources/youtube/client.rb delete mode 100644 backend/app/services/youtube/search_client.rb diff --git a/backend/app/services/post_importer.rb b/backend/app/services/post_importer.rb deleted file mode 100644 index 6d86287..0000000 --- a/backend/app/services/post_importer.rb +++ /dev/null @@ -1,51 +0,0 @@ -class PostImporter - def initialize provider: - @provider = provider - end - - def import_posts source_videos - alive_codes = [] - - ApplicationRecord.transaction do - source_videos.each do |source_video| - alive_codes << source_video.fetch(:code) - - deerjikist_tag = deerjikist_tag_of(source_video) - video = upsert_post(source_video, deerjikist_tag) - end - end - end - - private - - attr_reader :provider - - def deerjikist_tag_of source_video - user_code = source_video[:user_code] - return nil if user_code.blank? - - deerjikist&.tag - end - - def upsert_post source_video, deerjikist_tag - url = - case provider - when 'youtube' - "https://youtube.com/watch/#{ source_video.fetch(:code) }" - end - raise 'ちんぽ!' if url.blank? - - Post.find_or_initialize_by(url:).tap do |post| - post.title = source_viedo.fetch(:title) - post.uploaded_user_id = nil - post.original_created_from = source_video.fetch(:uploaded_at) - post.original_created_before = post.original_created_from + 1.min - post.tags << (deerjikist_tag || no_deerjikist) - post.tags << Tag.youtube - post.tags << Tag.video - post.tags << Tag.bot - post.tags << Tag.tagme - post.save! - end - end -end diff --git a/backend/app/services/video_sources/youtube/client.rb b/backend/app/services/video_sources/youtube/client.rb deleted file mode 100644 index ac797f2..0000000 --- a/backend/app/services/video_sources/youtube/client.rb +++ /dev/null @@ -1,87 +0,0 @@ -module VideoSources - module Youtube - class Client - API_BASE = 'https://www.googleapis.com/youtube/v3' - - def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') - @api_key = api_key - end - - def videos video_ids - return [] if video_ids.empty? - - response = connection.get('videos', part: 'snippet,statistics', - id: video_ids.join(','), - key: @api_key) - - JSON.parse(response.body).fetch('items', []).map do |item| - build_video(item) - end - end - - def comments video_id - comments = [] - page_token = nil - - loop do - response = connection.get('commentThreads', { - part: 'snippet', - videoId: video_id, - maxResults: 100, - textFormat: 'plainText', - pageToken: page_token, - key: @api_key }.compact) - - body = JSON.parse(response.body) - - comments.concat(body.fetch('items', []).map { |item| build_comment(item) }) - - page_token = body['nextPageToken'] - break if page_token.blank? - end - - comments - rescue Faraday::ForbiddenError - [] - end - - private - - def connection - @connection ||= Faraday.new(url: API_BASE) do |faraday| - faraday.response :raise_error - end - end - - def build_video item - snippet = item.fetch('snippet') - statistics = item.fetch('statistics', { }) - - { provider: 'youtube', - code: item.fetch('id'), - user_code: snippet['channelId'], - title: snippet['title'].to_s, - description: snippet['description'].to_s, - tag_names: snippet.fetch('tags', []), - views_count: statistics.fetch('viewCount', 0).to_i, - uploaded_at: Time.zone.parse(snippet.fetch('publishedAt')) } - end - - def build_comment item - snippet = - item - .fetch('snippet') - .fetch('topLevelComment') - .fetch('snippet') - - { provider_comment_id: item.fetch('id'), - user_code: snippet['authorChannelId']&.fetch('value', nil), - content: snippet['textDisplay'].to_s, - posted_at: Time.zone.parse(snippet.fetch('publishedAt')), - reaction_count: snippet.fetch('likeCount', 0).to_i, - comment_no: nil, - vpos_ms: nil } - end - end - end -end diff --git a/backend/app/services/youtube/api_client.rb b/backend/app/services/youtube/api_client.rb index 8ab0b04..0f81eb8 100644 --- a/backend/app/services/youtube/api_client.rb +++ b/backend/app/services/youtube/api_client.rb @@ -25,7 +25,7 @@ module Youtube def videos ids return { 'items' => [] } if ids.empty? - get_json('/videos', part: 'snippet,status,contentDetails', id: ids.join (',')) + get_json('/videos', part: 'snippet,status,contentDetails', id: ids.join(',')) end def playlist_items playlist_id:, page_token: nil diff --git a/backend/app/services/youtube/search_client.rb b/backend/app/services/youtube/search_client.rb deleted file mode 100644 index 66bed09..0000000 --- a/backend/app/services/youtube/search_client.rb +++ /dev/null @@ -1,34 +0,0 @@ -module Youtube - class SearchClient - API_BASE = 'https://www.googleapis.com/youtube/v3' - - def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') - @api_key = api_key - end - - def search_videos query:, published_after: nil, published_before: nil, page_token: nil - response = connection.get('search', { - part: 'snippet', - q: query, - type: 'video', - order: 'date', - maxResults: 50, - regionCode: 'JP', - relevanceLanguage: 'ja', - publishedAfter: published_after&.iso8601, - publishedBefore: published_before&.iso8601, - pageToken: page_token, - key: @api_key }.compact) - - JSON.parse(response.body) - end - - private - - def connection - @connection ||= Faraday.new(url: API_BASE) do |faraday| - faraday.response :raise_error - end - end - end -end diff --git a/backend/app/services/youtube/sync.rb b/backend/app/services/youtube/sync.rb index 146441b..da8dc88 100644 --- a/backend/app/services/youtube/sync.rb +++ b/backend/app/services/youtube/sync.rb @@ -15,7 +15,7 @@ module Youtube video_ids.each_slice(50) do |ids| @client.videos(ids).fetch('items', []).each do |item| - sync_video!(VideoItem.new (item)) + sync_video!(VideoItem.new(item)) end end end @@ -55,11 +55,10 @@ module Youtube post_changed = false if post - post.assign_attributes( - title: video.title, - original_created_from:, - original_created_before:, - thumbnail_base: video.thumbnail_url) + post.assign_attributes(title: video.title, + original_created_from:, + original_created_before:, + thumbnail_base: video.thumbnail_url) post_changed = post.changed? post.save! if post_changed @@ -68,10 +67,10 @@ module Youtube else post_created = true post = Post.create!( - title: video.title, - url: video.url, - thumbnail_base: video.thumbnail_url, - uploaded_user: nil, + title: video.title, + url: video.url, + thumbnail_base: video.thumbnail_url, + uploaded_user: nil, original_created_from:, original_created_before:) @@ -127,8 +126,8 @@ module Youtube return if thumbnail_url.blank? post.thumbnail.attach( - io: URI.open (thumbnail_url), - filename: File.basename (URI.parse (thumbnail_url).path), + io: URI.open(thumbnail_url), + filename: File.basename(URI.parse(thumbnail_url).path), content_type: 'image/jpeg') post.resized_thumbnail! -- 2.34.1 From 43122a49c3837b9373202aa35e56e25c917192d2 Mon Sep 17 00:00:00 2001 From: miteruzo Date: Sat, 2 May 2026 03:39:31 +0900 Subject: [PATCH 3/5] #314 --- .../discovery/youtube_video_discoverer.rb | 46 ------------------- 1 file changed, 46 deletions(-) delete mode 100644 backend/app/services/discovery/youtube_video_discoverer.rb diff --git a/backend/app/services/discovery/youtube_video_discoverer.rb b/backend/app/services/discovery/youtube_video_discoverer.rb deleted file mode 100644 index 70eac89..0000000 --- a/backend/app/services/discovery/youtube_video_discoverer.rb +++ /dev/null @@ -1,46 +0,0 @@ -class Discovery::YoutubeVideoDiscoverer - def initialize(client: Youtube::SearchClient.new) - @client = client - end - - def call discovery_query:, published_after:, published_before: - body = @client.search_videos( - query: discovery_query.query, - published_after: published_after, - published_before: published_before) - - body.fetch('items', []).each do |item| - next unless item.dig('id', 'kind') == 'youtube#video' - - upsert_candidate!(discovery_query, item) - end - end - - private - - def upsert_candidate! discovery_query, item - snippet = item.fetch('snippet') - code = item.fetch('id').fetch('videoId') - - candidate = VideoCandidate.find_or_initialize_by(provider: 'youtube', code:) - - candidate.title = snippet['title'].to_s - candidate.description = snippet['description'].to_s - candidate.channel_code = snippet['channelId'] - candidate.channel_title = snippet['channelTitle'] - candidate.published_at = Time.zone.parse(snippet['publishedAt']) - candidate.thumbnail_url = snippet.dig('thumbnails', 'high', 'url') - candidate.raw_data = item - candidate.last_discovered_at = Time.current - candidate.save! - - VideoCandidateHit.find_or_create_by!( - video_candidate: candidate, - discovery_query: discovery_query - ) do |hit| - hit.matched_field = 'youtube_search' - hit.score = 0 - hit.searched_at = Time.current - end - end -end -- 2.34.1 From 6ee621e565e94c97d4669d886bf5579a83321527 Mon Sep 17 00:00:00 2001 From: miteruzo Date: Sat, 2 May 2026 04:00:32 +0900 Subject: [PATCH 4/5] #314 --- backend/app/services/youtube/api_client.rb | 11 ++++++++--- backend/app/services/youtube/sync.rb | 23 +++++++++++++++++++--- backend/app/services/youtube/video_item.rb | 3 +++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/backend/app/services/youtube/api_client.rb b/backend/app/services/youtube/api_client.rb index 0f81eb8..e572b6f 100644 --- a/backend/app/services/youtube/api_client.rb +++ b/backend/app/services/youtube/api_client.rb @@ -11,14 +11,17 @@ module Youtube @api_key = api_key end - def search_videos q:, published_after:, page_token: nil + def search_videos q:, published_after: nil, published_before: nil, page_token: nil get_json('/search', { part: 'snippet', type: 'video', - q: q, + q:, order: 'date', maxResults: 50, - publishedAfter: published_after.iso8601, + regionCode: 'JP', + relevanceLanguage: 'ja', + publishedAfter: published_after&.iso8601, + publishedBefore: published_before&.iso8601, pageToken: page_token }.compact) end @@ -37,6 +40,8 @@ module Youtube end def channel id: nil, handle: nil + raise ArgumentError, 'id or handle is required' if id.present? == handle.present? + params = { part: 'snippet,contentDetails' } params[:id] = id if id params[:forHandle] = handle if handle diff --git a/backend/app/services/youtube/sync.rb b/backend/app/services/youtube/sync.rb index da8dc88..2056dc2 100644 --- a/backend/app/services/youtube/sync.rb +++ b/backend/app/services/youtube/sync.rb @@ -35,9 +35,10 @@ module Youtube end playlist_ids.each do |playlist_id| - response = @client.playlist_items(playlist_id:) - response.fetch('items', []).each do |item| + each_playlist_item(playlist_id) do |item| video_id = item.dig('contentDetails', 'videoId') + video_id ||= item.dig('snippet', 'resourceId', 'videoId') + ids << video_id if video_id.present? end end @@ -70,7 +71,7 @@ module Youtube title: video.title, url: video.url, thumbnail_base: video.thumbnail_url, - uploaded_user: nil, + uploaded_user_id: nil, original_created_from:, original_created_before:) @@ -84,6 +85,7 @@ module Youtube deerjikist = Deerjikist.find_by(platform: :youtube, code: video.channel_id) if deerjikist + desired_tag_ids.delete(Tag.no_deerjikist.id) desired_tag_ids << deerjikist.tag_id elsif post.tags.where(category: :deerjikist).none? desired_tag_ids << Tag.no_deerjikist.id @@ -147,5 +149,20 @@ module Youtube end def sync_since = 14.days.ago + + def each_playlist_item playlist_id + page_token = nil + + loop do + response = @client.playlist_items(playlist_id:, page_token:) + + response.fetch('items', []).each do |item| + yield item + end + + page_token = response['nextPageToken'] + break if page_token.blank? + end + end end end diff --git a/backend/app/services/youtube/video_item.rb b/backend/app/services/youtube/video_item.rb index b8bad47..fea2b15 100644 --- a/backend/app/services/youtube/video_item.rb +++ b/backend/app/services/youtube/video_item.rb @@ -1,3 +1,6 @@ +require 'time' + + module Youtube class VideoItem attr_reader :id, :title, :channel_id, :published_at, :thumbnail_url, :raw_tags -- 2.34.1 From fd479cd93a3df23414e3c75c00864e6561a70fd0 Mon Sep 17 00:00:00 2001 From: miteruzo Date: Sat, 2 May 2026 04:26:25 +0900 Subject: [PATCH 5/5] #314 --- backend/app/services/youtube/api_client.rb | 4 +- backend/config/schedule.rb | 8 + .../spec/services/youtube/api_client_spec.rb | 130 ++++++++ backend/spec/services/youtube/sync_spec.rb | 310 ++++++++++++++++++ .../spec/services/youtube/video_item_spec.rb | 93 ++++++ backend/spec/tasks/post_sync_spec.rb | 25 ++ 6 files changed, 568 insertions(+), 2 deletions(-) create mode 100644 backend/spec/services/youtube/api_client_spec.rb create mode 100644 backend/spec/services/youtube/sync_spec.rb create mode 100644 backend/spec/services/youtube/video_item_spec.rb create mode 100644 backend/spec/tasks/post_sync_spec.rb diff --git a/backend/app/services/youtube/api_client.rb b/backend/app/services/youtube/api_client.rb index e572b6f..e38ca57 100644 --- a/backend/app/services/youtube/api_client.rb +++ b/backend/app/services/youtube/api_client.rb @@ -43,8 +43,8 @@ module Youtube raise ArgumentError, 'id or handle is required' if id.present? == handle.present? params = { part: 'snippet,contentDetails' } - params[:id] = id if id - params[:forHandle] = handle if handle + params[:id] = id if id.present? + params[:forHandle] = handle if handle.present? get_json('/channels', params) end diff --git a/backend/config/schedule.rb b/backend/config/schedule.rb index 23b6c73..1209ab1 100644 --- a/backend/config/schedule.rb +++ b/backend/config/schedule.rb @@ -17,3 +17,11 @@ every 1.day, at: '0:00 am' do rake 'post_similarity:calc', environment: 'production' rake 'tag_similarity:calc', environment: 'production' end + +every 1.day, at: '7:50 am' do + rake 'nico:export', environment: 'production' +end + +every :hour do + rake 'post:sync', environment: 'production' +end diff --git a/backend/spec/services/youtube/api_client_spec.rb b/backend/spec/services/youtube/api_client_spec.rb new file mode 100644 index 0000000..5fb9298 --- /dev/null +++ b/backend/spec/services/youtube/api_client_spec.rb @@ -0,0 +1,130 @@ +require 'rails_helper' + +RSpec.describe Youtube::ApiClient do + let(:api_key) { 'test-api-key' } + let(:client) { described_class.new(api_key:) } + + describe '#search_videos' do + it 'calls YouTube search API with expected params' do + published_after = Time.zone.parse('2026-05-01 00:00:00') + published_before = Time.zone.parse('2026-05-02 00:00:00') + + expect(client).to receive(:get_json).with( + '/search', + { + part: 'snippet', + type: 'video', + q: 'ぼざろクリーチャー', + order: 'date', + maxResults: 50, + regionCode: 'JP', + relevanceLanguage: 'ja', + publishedAfter: published_after.iso8601, + publishedBefore: published_before.iso8601, + pageToken: 'NEXT' + } + ).and_return({ 'items' => [] }) + + client.search_videos( + q: 'ぼざろクリーチャー', + published_after:, + published_before:, + page_token: 'NEXT' + ) + end + + it 'omits nil optional params' do + expect(client).to receive(:get_json).with( + '/search', + hash_excluding(:publishedAfter, :publishedBefore, :pageToken) + ).and_return({ 'items' => [] }) + + client.search_videos(q: 'ぼざろクリーチャー') + end + end + + describe '#videos' do + it 'returns empty items when ids are empty' do + expect(client).not_to receive(:get_json) + + expect(client.videos([])).to eq({ 'items' => [] }) + end + + it 'calls videos API with comma separated ids' do + expect(client).to receive(:get_json).with( + '/videos', + { + part: 'snippet,status,contentDetails', + id: 'video-1,video-2' + } + ).and_return({ 'items' => [] }) + + client.videos(['video-1', 'video-2']) + end + end + + describe '#playlist_items' do + it 'calls playlistItems API with page token' do + expect(client).to receive(:get_json).with( + '/playlistItems', + { + part: 'snippet,contentDetails,status', + playlistId: 'PL123', + maxResults: 50, + pageToken: 'NEXT' + } + ).and_return({ 'items' => [] }) + + client.playlist_items(playlist_id: 'PL123', page_token: 'NEXT') + end + + it 'omits page token when nil' do + expect(client).to receive(:get_json).with( + '/playlistItems', + { + part: 'snippet,contentDetails,status', + playlistId: 'PL123', + maxResults: 50 + } + ).and_return({ 'items' => [] }) + + client.playlist_items(playlist_id: 'PL123') + end + end + + describe '#channel' do + it 'calls channels API by id' do + expect(client).to receive(:get_json).with( + '/channels', + { + part: 'snippet,contentDetails', + id: 'UC123' + } + ).and_return({ 'items' => [] }) + + client.channel(id: 'UC123') + end + + it 'calls channels API by handle' do + expect(client).to receive(:get_json).with( + '/channels', + { + part: 'snippet,contentDetails', + forHandle: '@some_handle' + } + ).and_return({ 'items' => [] }) + + client.channel(handle: '@some_handle') + end + + it 'raises when neither id nor handle is given' do + expect { client.channel }.to raise_error(ArgumentError, 'id or handle is required') + end + + it 'raises when both id and handle are given' do + expect do + client.channel(id: 'UC123', handle: '@some_handle') + end.to raise_error(ArgumentError, 'id or handle is required') + end + end +end diff --git a/backend/spec/services/youtube/sync_spec.rb b/backend/spec/services/youtube/sync_spec.rb new file mode 100644 index 0000000..df8009a --- /dev/null +++ b/backend/spec/services/youtube/sync_spec.rb @@ -0,0 +1,310 @@ +require 'rails_helper' + +RSpec.describe Youtube::Sync do + let(:client) { instance_double(Youtube::ApiClient) } + let(:sync) { described_class.new(client:) } + + before do + allow(PostVersionRecorder).to receive(:record!) + allow(PostVersionRecorder).to receive(:ensure_snapshot!) + allow(sync).to receive(:attach_thumbnail_if_needed!) + end + + describe '#sync!' do + it 'returns without fetching video details when no video ids are discovered' do + allow(sync).to receive(:query_terms).and_return([]) + allow(sync).to receive(:playlist_ids).and_return([]) + + expect(client).not_to receive(:videos) + + sync.sync! + end + + it 'discovers ids from search and all playlist pages' do + allow(sync).to receive(:query_terms).and_return(['ぼざろクリーチャー']) + allow(sync).to receive(:playlist_ids).and_return(['PL123']) + allow(sync).to receive(:sync_since).and_return(Time.zone.parse('2026-05-01 00:00:00')) + + allow(client).to receive(:search_videos).with( + q: 'ぼざろクリーチャー', + published_after: Time.zone.parse('2026-05-01 00:00:00') + ).and_return({ + 'items' => [ + { + 'id' => { + 'videoId' => 'search-video-1' + } + } + ] + }) + + allow(client).to receive(:playlist_items).with( + playlist_id: 'PL123', + page_token: nil + ).and_return({ + 'items' => [ + { + 'contentDetails' => { + 'videoId' => 'playlist-video-1' + } + } + ], + 'nextPageToken' => 'NEXT' + }) + + allow(client).to receive(:playlist_items).with( + playlist_id: 'PL123', + page_token: 'NEXT' + ).and_return({ + 'items' => [ + { + 'snippet' => { + 'resourceId' => { + 'videoId' => 'playlist-video-2' + } + } + } + ] + }) + + expect(client).to receive(:videos).with( + satisfy do |ids| + ids.sort == ['playlist-video-1', 'playlist-video-2', 'search-video-1'] + end + ).and_return({ 'items' => [] }) + + sync.sync! + end + + it 'creates a YouTube post with default tags and no_deerjikist when no deerjikist mapping exists' do + Tag.tagme + Tag.bot + Tag.youtube + Tag.video + Tag.no_deerjikist + + allow(sync).to receive(:query_terms).and_return([]) + allow(sync).to receive(:playlist_ids).and_return(['PL123']) + + allow(client).to receive(:playlist_items).with( + playlist_id: 'PL123', + page_token: nil + ).and_return({ + 'items' => [ + { + 'contentDetails' => { + 'videoId' => 'video-1' + } + } + ] + }) + + allow(client).to receive(:videos).with(['video-1']).and_return({ + 'items' => [ + youtube_video_item( + id: 'video-1', + title: 'YouTube テスト動画', + channel_id: 'UC_NO_MAPPING' + ) + ] + }) + + expect do + sync.sync! + end.to change(Post, :count).by(1) + + post = Post.find_by!(url: 'https://www.youtube.com/watch?v=video-1') + tag_ids = post.tags.pluck(:id) + + expect(post.title).to eq('YouTube テスト動画') + expect(post.uploaded_user_id).to be_nil + expect(post.original_created_from).to eq(Time.zone.parse('2026-05-01 12:34:00')) + expect(post.original_created_before).to eq(Time.zone.parse('2026-05-01 12:35:00')) + + expect(tag_ids).to include(Tag.tagme.id) + expect(tag_ids).to include(Tag.bot.id) + expect(tag_ids).to include(Tag.youtube.id) + expect(tag_ids).to include(Tag.video.id) + expect(tag_ids).to include(Tag.no_deerjikist.id) + + expect(PostVersionRecorder).to have_received(:record!).with( + post:, + event_type: :create, + created_by_user: nil + ) + end + + it 'uses deerjikist tag when channel id is mapped' do + Tag.tagme + Tag.bot + Tag.youtube + Tag.video + Tag.no_deerjikist + + deerjikist_tag = Tag.find_or_create_by_tag_name!('テスト投稿者', category: :deerjikist) + Deerjikist.create!( + platform: 'youtube', + code: 'UC_MAPPED', + tag: deerjikist_tag + ) + + allow(sync).to receive(:query_terms).and_return([]) + allow(sync).to receive(:playlist_ids).and_return(['PL123']) + + allow(client).to receive(:playlist_items).with( + playlist_id: 'PL123', + page_token: nil + ).and_return({ + 'items' => [ + { + 'contentDetails' => { + 'videoId' => 'video-1' + } + } + ] + }) + + allow(client).to receive(:videos).with(['video-1']).and_return({ + 'items' => [ + youtube_video_item( + id: 'video-1', + title: 'YouTube テスト動画', + channel_id: 'UC_MAPPED' + ) + ] + }) + + sync.sync! + + post = Post.find_by!(url: 'https://www.youtube.com/watch?v=video-1') + tag_ids = post.tags.pluck(:id) + + expect(tag_ids).to include(deerjikist_tag.id) + expect(tag_ids).not_to include(Tag.no_deerjikist.id) + end + + it 'removes no_deerjikist when deerjikist mapping is added later' do + Tag.no_deerjikist + + post = Post.create!( + title: '旧タイトル', + url: 'https://www.youtube.com/watch?v=video-1', + uploaded_user_id: nil, + original_created_from: Time.zone.parse('2026-05-01 00:00:00'), + original_created_before: Time.zone.parse('2026-05-01 00:01:00') + ) + PostTag.create!(post:, tag: Tag.no_deerjikist) + + deerjikist_tag = Tag.find_or_create_by_tag_name!('後から判明した投稿者', category: :deerjikist) + Deerjikist.create!( + platform: 'youtube', + code: 'UC_MAPPED_LATER', + tag: deerjikist_tag + ) + + allow(sync).to receive(:query_terms).and_return([]) + allow(sync).to receive(:playlist_ids).and_return(['PL123']) + + allow(client).to receive(:playlist_items).with( + playlist_id: 'PL123', + page_token: nil + ).and_return({ + 'items' => [ + { + 'contentDetails' => { + 'videoId' => 'video-1' + } + } + ] + }) + + allow(client).to receive(:videos).with(['video-1']).and_return({ + 'items' => [ + youtube_video_item( + id: 'video-1', + title: '新タイトル', + channel_id: 'UC_MAPPED_LATER' + ) + ] + }) + + sync.sync! + + post.reload + tag_ids = post.tags.pluck(:id) + + expect(post.title).to eq('新タイトル') + expect(tag_ids).to include(deerjikist_tag.id) + expect(tag_ids).not_to include(Tag.no_deerjikist.id) + + expect(PostVersionRecorder).to have_received(:ensure_snapshot!).with( + post, + created_by_user: nil + ) + expect(PostVersionRecorder).to have_received(:record!).with( + post:, + event_type: :update, + created_by_user: nil + ) + end + + it 'matches existing youtu.be URL and does not create duplicate post' do + post = Post.create!( + title: '旧タイトル', + url: 'https://youtu.be/video-1', + uploaded_user_id: nil, + original_created_from: Time.zone.parse('2026-05-01 00:00:00'), + original_created_before: Time.zone.parse('2026-05-01 00:01:00') + ) + + allow(sync).to receive(:query_terms).and_return([]) + allow(sync).to receive(:playlist_ids).and_return(['PL123']) + + allow(client).to receive(:playlist_items).with( + playlist_id: 'PL123', + page_token: nil + ).and_return({ + 'items' => [ + { + 'contentDetails' => { + 'videoId' => 'video-1' + } + } + ] + }) + + allow(client).to receive(:videos).with(['video-1']).and_return({ + 'items' => [ + youtube_video_item( + id: 'video-1', + title: '新タイトル', + channel_id: 'UC_NO_MAPPING' + ) + ] + }) + + expect do + sync.sync! + end.not_to change(Post, :count) + + expect(post.reload.title).to eq('新タイトル') + end + end + + def youtube_video_item(id:, title:, channel_id:) + { + 'id' => id, + 'snippet' => { + 'title' => title, + 'channelId' => channel_id, + 'publishedAt' => '2026-05-01T12:34:56Z', + 'thumbnails' => { + 'high' => { + 'url' => "https://img.youtube.com/#{id}.jpg" + } + }, + 'tags' => ['tag-a', 'tag-b'] + } + } + end +end diff --git a/backend/spec/services/youtube/video_item_spec.rb b/backend/spec/services/youtube/video_item_spec.rb new file mode 100644 index 0000000..4db52da --- /dev/null +++ b/backend/spec/services/youtube/video_item_spec.rb @@ -0,0 +1,93 @@ +require 'rails_helper' + +RSpec.describe Youtube::VideoItem do + describe '#initialize' do + it 'extracts fields from YouTube video API item' do + item = { + 'id' => 'video-1', + 'snippet' => { + 'title' => 'テスト動画', + 'channelId' => 'UC123', + 'publishedAt' => '2026-05-01T12:34:56Z', + 'tags' => ['tag-a', 'tag-b'], + 'thumbnails' => { + 'high' => { + 'url' => 'https://img.youtube.com/high.jpg' + }, + 'medium' => { + 'url' => 'https://img.youtube.com/medium.jpg' + } + } + } + } + + video = described_class.new(item) + + expect(video.id).to eq('video-1') + expect(video.title).to eq('テスト動画') + expect(video.channel_id).to eq('UC123') + expect(video.published_at).to eq(Time.iso8601('2026-05-01T12:34:56Z')) + expect(video.thumbnail_url).to eq('https://img.youtube.com/high.jpg') + expect(video.raw_tags).to eq(['tag-a', 'tag-b']) + expect(video.url).to eq('https://www.youtube.com/watch?v=video-1') + end + + it 'uses highest priority thumbnail' do + item = { + 'id' => 'video-1', + 'snippet' => { + 'title' => 'テスト動画', + 'channelId' => 'UC123', + 'publishedAt' => '2026-05-01T12:34:56Z', + 'thumbnails' => { + 'default' => { + 'url' => 'https://img.youtube.com/default.jpg' + }, + 'standard' => { + 'url' => 'https://img.youtube.com/standard.jpg' + }, + 'maxres' => { + 'url' => 'https://img.youtube.com/maxres.jpg' + } + } + } + } + + video = described_class.new(item) + + expect(video.thumbnail_url).to eq('https://img.youtube.com/maxres.jpg') + end + + it 'falls back to empty raw tags' do + item = { + 'id' => 'video-1', + 'snippet' => { + 'title' => 'テスト動画', + 'channelId' => 'UC123', + 'publishedAt' => '2026-05-01T12:34:56Z', + 'thumbnails' => {} + } + } + + video = described_class.new(item) + + expect(video.raw_tags).to eq([]) + end + + it 'returns nil thumbnail when no thumbnail exists' do + item = { + 'id' => 'video-1', + 'snippet' => { + 'title' => 'テスト動画', + 'channelId' => 'UC123', + 'publishedAt' => '2026-05-01T12:34:56Z', + 'thumbnails' => {} + } + } + + video = described_class.new(item) + + expect(video.thumbnail_url).to be_nil + end + end +end diff --git a/backend/spec/tasks/post_sync_spec.rb b/backend/spec/tasks/post_sync_spec.rb new file mode 100644 index 0000000..c9ce486 --- /dev/null +++ b/backend/spec/tasks/post_sync_spec.rb @@ -0,0 +1,25 @@ +require 'rails_helper' +require 'rake' + +RSpec.describe 'post:sync' do + around do |example| + original_application = Rake.application + Rake.application = Rake::Application.new + + Rake::Task.define_task(:environment) + load Rails.root.join('lib/tasks/sync_posts.rake') + + example.run + ensure + Rake.application = original_application + end + + it 'runs Youtube::Sync' do + sync = instance_double(Youtube::Sync) + + expect(Youtube::Sync).to receive(:new).once.and_return(sync) + expect(sync).to receive(:sync!).once + + Rake::Task['post:sync'].invoke + end +end -- 2.34.1