| @@ -84,6 +84,7 @@ class Tag < ApplicationRecord | |||||
| def self.no_deerjikist = find_or_create_by_tag_name!('ニジラー情報不詳', category: :meta) | def self.no_deerjikist = find_or_create_by_tag_name!('ニジラー情報不詳', category: :meta) | ||||
| def self.video = find_or_create_by_tag_name!('動画', category: :meta) | def self.video = find_or_create_by_tag_name!('動画', category: :meta) | ||||
| def self.niconico = find_or_create_by_tag_name!('ニコニコ', category: :meta) | def self.niconico = find_or_create_by_tag_name!('ニコニコ', category: :meta) | ||||
| def self.youtube = find_or_create_by_tag_name!('YouTube', category: :meta) | |||||
| def self.normalise_tags tag_names, with_tagme: true, | def self.normalise_tags tag_names, with_tagme: true, | ||||
| with_no_deerjikist: true, | with_no_deerjikist: true, | ||||
| @@ -0,0 +1,46 @@ | |||||
| class Discovery::YoutubeVideoDiscoverer | |||||
| def initialize(client: Youtube::SearchClient.new) | |||||
| @client = client | |||||
| end | |||||
| def call discovery_query:, published_after:, published_before: | |||||
| body = @client.search_videos( | |||||
| query: discovery_query.query, | |||||
| published_after: published_after, | |||||
| published_before: published_before) | |||||
| body.fetch('items', []).each do |item| | |||||
| next unless item.dig('id', 'kind') == 'youtube#video' | |||||
| upsert_candidate!(discovery_query, item) | |||||
| end | |||||
| end | |||||
| private | |||||
| def upsert_candidate! discovery_query, item | |||||
| snippet = item.fetch('snippet') | |||||
| code = item.fetch('id').fetch('videoId') | |||||
| candidate = VideoCandidate.find_or_initialize_by(provider: 'youtube', code:) | |||||
| candidate.title = snippet['title'].to_s | |||||
| candidate.description = snippet['description'].to_s | |||||
| candidate.channel_code = snippet['channelId'] | |||||
| candidate.channel_title = snippet['channelTitle'] | |||||
| candidate.published_at = Time.zone.parse(snippet['publishedAt']) | |||||
| candidate.thumbnail_url = snippet.dig('thumbnails', 'high', 'url') | |||||
| candidate.raw_data = item | |||||
| candidate.last_discovered_at = Time.current | |||||
| candidate.save! | |||||
| VideoCandidateHit.find_or_create_by!( | |||||
| video_candidate: candidate, | |||||
| discovery_query: discovery_query | |||||
| ) do |hit| | |||||
| hit.matched_field = 'youtube_search' | |||||
| hit.score = 0 | |||||
| hit.searched_at = Time.current | |||||
| end | |||||
| end | |||||
| end | |||||
| @@ -0,0 +1,51 @@ | |||||
| class PostImporter | |||||
| def initialize provider: | |||||
| @provider = provider | |||||
| end | |||||
| def import_posts source_videos | |||||
| alive_codes = [] | |||||
| ApplicationRecord.transaction do | |||||
| source_videos.each do |source_video| | |||||
| alive_codes << source_video.fetch(:code) | |||||
| deerjikist_tag = deerjikist_tag_of(source_video) | |||||
| video = upsert_post(source_video, deerjikist_tag) | |||||
| end | |||||
| end | |||||
| end | |||||
| private | |||||
| attr_reader :provider | |||||
| def deerjikist_tag_of source_video | |||||
| user_code = source_video[:user_code] | |||||
| return nil if user_code.blank? | |||||
| deerjikist&.tag | |||||
| end | |||||
| def upsert_post source_video, deerjikist_tag | |||||
| url = | |||||
| case provider | |||||
| when 'youtube' | |||||
| "https://youtube.com/watch/#{ source_video.fetch(:code) }" | |||||
| end | |||||
| raise 'ちんぽ!' if url.blank? | |||||
| Post.find_or_initialize_by(url:).tap do |post| | |||||
| post.title = source_viedo.fetch(:title) | |||||
| post.uploaded_user_id = nil | |||||
| post.original_created_from = source_video.fetch(:uploaded_at) | |||||
| post.original_created_before = post.original_created_from + 1.min | |||||
| post.tags << (deerjikist_tag || no_deerjikist) | |||||
| post.tags << Tag.youtube | |||||
| post.tags << Tag.video | |||||
| post.tags << Tag.bot | |||||
| post.tags << Tag.tagme | |||||
| post.save! | |||||
| end | |||||
| end | |||||
| end | |||||
| @@ -0,0 +1,87 @@ | |||||
| module VideoSources | |||||
| module Youtube | |||||
| class Client | |||||
| API_BASE = 'https://www.googleapis.com/youtube/v3' | |||||
| def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') | |||||
| @api_key = api_key | |||||
| end | |||||
| def videos video_ids | |||||
| return [] if video_ids.empty? | |||||
| response = connection.get('videos', part: 'snippet,statistics', | |||||
| id: video_ids.join(','), | |||||
| key: @api_key) | |||||
| JSON.parse(response.body).fetch('items', []).map do |item| | |||||
| build_video(item) | |||||
| end | |||||
| end | |||||
| def comments video_id | |||||
| comments = [] | |||||
| page_token = nil | |||||
| loop do | |||||
| response = connection.get('commentThreads', { | |||||
| part: 'snippet', | |||||
| videoId: video_id, | |||||
| maxResults: 100, | |||||
| textFormat: 'plainText', | |||||
| pageToken: page_token, | |||||
| key: @api_key }.compact) | |||||
| body = JSON.parse(response.body) | |||||
| comments.concat(body.fetch('items', []).map { |item| build_comment(item) }) | |||||
| page_token = body['nextPageToken'] | |||||
| break if page_token.blank? | |||||
| end | |||||
| comments | |||||
| rescue Faraday::ForbiddenError | |||||
| [] | |||||
| end | |||||
| private | |||||
| def connection | |||||
| @connection ||= Faraday.new(url: API_BASE) do |faraday| | |||||
| faraday.response :raise_error | |||||
| end | |||||
| end | |||||
| def build_video item | |||||
| snippet = item.fetch('snippet') | |||||
| statistics = item.fetch('statistics', { }) | |||||
| { provider: 'youtube', | |||||
| code: item.fetch('id'), | |||||
| user_code: snippet['channelId'], | |||||
| title: snippet['title'].to_s, | |||||
| description: snippet['description'].to_s, | |||||
| tag_names: snippet.fetch('tags', []), | |||||
| views_count: statistics.fetch('viewCount', 0).to_i, | |||||
| uploaded_at: Time.zone.parse(snippet.fetch('publishedAt')) } | |||||
| end | |||||
| def build_comment item | |||||
| snippet = | |||||
| item | |||||
| .fetch('snippet') | |||||
| .fetch('topLevelComment') | |||||
| .fetch('snippet') | |||||
| { provider_comment_id: item.fetch('id'), | |||||
| user_code: snippet['authorChannelId']&.fetch('value', nil), | |||||
| content: snippet['textDisplay'].to_s, | |||||
| posted_at: Time.zone.parse(snippet.fetch('publishedAt')), | |||||
| reaction_count: snippet.fetch('likeCount', 0).to_i, | |||||
| comment_no: nil, | |||||
| vpos_ms: nil } | |||||
| end | |||||
| end | |||||
| end | |||||
| end | |||||
| @@ -0,0 +1,68 @@ | |||||
| require 'json' | |||||
| require 'net/http' | |||||
| require 'uri' | |||||
| module Youtube | |||||
| class ApiClient | |||||
| ENDPOINT = 'https://www.googleapis.com/youtube/v3' | |||||
| def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') | |||||
| @api_key = api_key | |||||
| end | |||||
| def search_videos q:, published_after:, page_token: nil | |||||
| get_json('/search', { | |||||
| part: 'snippet', | |||||
| type: 'video', | |||||
| q: q, | |||||
| order: 'date', | |||||
| maxResults: 50, | |||||
| publishedAfter: published_after.iso8601, | |||||
| pageToken: page_token }.compact) | |||||
| end | |||||
| def videos ids | |||||
| return { 'items' => [] } if ids.empty? | |||||
| get_json('/videos', part: 'snippet,status,contentDetails', id: ids.join (',')) | |||||
| end | |||||
| def playlist_items playlist_id:, page_token: nil | |||||
| get_json('/playlistItems', { | |||||
| part: 'snippet,contentDetails,status', | |||||
| playlistId: playlist_id, | |||||
| maxResults: 50, | |||||
| pageToken: page_token }.compact) | |||||
| end | |||||
| def channel id: nil, handle: nil | |||||
| params = { part: 'snippet,contentDetails' } | |||||
| params[:id] = id if id | |||||
| params[:forHandle] = handle if handle | |||||
| get_json('/channels', params) | |||||
| end | |||||
| private | |||||
| def get_json path, params | |||||
| uri = URI(ENDPOINT + path) | |||||
| uri.query = URI.encode_www_form(params.merge(key: @api_key)) | |||||
| response = Net::HTTP.start(uri.host, | |||||
| uri.port, | |||||
| use_ssl: true, | |||||
| open_timeout: 10, | |||||
| read_timeout: 30) do |http| | |||||
| http.get(uri) | |||||
| end | |||||
| unless response.is_a?(Net::HTTPSuccess) | |||||
| raise "YouTube API error: #{ response.code } #{ response.body }" | |||||
| end | |||||
| JSON.parse(response.body) | |||||
| end | |||||
| end | |||||
| end | |||||
| @@ -0,0 +1,34 @@ | |||||
| module Youtube | |||||
| class SearchClient | |||||
| API_BASE = 'https://www.googleapis.com/youtube/v3' | |||||
| def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') | |||||
| @api_key = api_key | |||||
| end | |||||
| def search_videos query:, published_after: nil, published_before: nil, page_token: nil | |||||
| response = connection.get('search', { | |||||
| part: 'snippet', | |||||
| q: query, | |||||
| type: 'video', | |||||
| order: 'date', | |||||
| maxResults: 50, | |||||
| regionCode: 'JP', | |||||
| relevanceLanguage: 'ja', | |||||
| publishedAfter: published_after&.iso8601, | |||||
| publishedBefore: published_before&.iso8601, | |||||
| pageToken: page_token, | |||||
| key: @api_key }.compact) | |||||
| JSON.parse(response.body) | |||||
| end | |||||
| private | |||||
| def connection | |||||
| @connection ||= Faraday.new(url: API_BASE) do |faraday| | |||||
| faraday.response :raise_error | |||||
| end | |||||
| end | |||||
| end | |||||
| end | |||||
| @@ -0,0 +1,152 @@ | |||||
| require 'open-uri' | |||||
| require 'set' | |||||
| require 'time' | |||||
| module Youtube | |||||
| class Sync | |||||
| def initialize client: ApiClient.new | |||||
| @client = client | |||||
| end | |||||
| def sync! | |||||
| video_ids = discover_video_ids | |||||
| return if video_ids.empty? | |||||
| video_ids.each_slice(50) do |ids| | |||||
| @client.videos(ids).fetch('items', []).each do |item| | |||||
| sync_video!(VideoItem.new (item)) | |||||
| end | |||||
| end | |||||
| end | |||||
| private | |||||
| def discover_video_ids | |||||
| ids = Set.new | |||||
| query_terms.each do |q| | |||||
| response = @client.search_videos(q:, published_after: sync_since) | |||||
| response.fetch('items', []).each do |item| | |||||
| video_id = item.dig('id', 'videoId') | |||||
| ids << video_id if video_id.present? | |||||
| end | |||||
| end | |||||
| playlist_ids.each do |playlist_id| | |||||
| response = @client.playlist_items(playlist_id:) | |||||
| response.fetch('items', []).each do |item| | |||||
| video_id = item.dig('contentDetails', 'videoId') | |||||
| ids << video_id if video_id.present? | |||||
| end | |||||
| end | |||||
| ids.to_a | |||||
| end | |||||
| def sync_video! video | |||||
| post = Post.where('url REGEXP ?', youtube_url_regexp(video.id)).first | |||||
| original_created_from = video.published_at.change(sec: 0) | |||||
| original_created_before = original_created_from + 1.minute | |||||
| post_created = false | |||||
| post_changed = false | |||||
| if post | |||||
| post.assign_attributes( | |||||
| title: video.title, | |||||
| original_created_from:, | |||||
| original_created_before:, | |||||
| thumbnail_base: video.thumbnail_url) | |||||
| post_changed = post.changed? | |||||
| post.save! if post_changed | |||||
| attach_thumbnail_if_needed!(post, video.thumbnail_url) | |||||
| else | |||||
| post_created = true | |||||
| post = Post.create!( | |||||
| title: video.title, | |||||
| url: video.url, | |||||
| thumbnail_base: video.thumbnail_url, | |||||
| uploaded_user: nil, | |||||
| original_created_from:, | |||||
| original_created_before:) | |||||
| attach_thumbnail_if_needed!(post, video.thumbnail_url) | |||||
| sync_post_tags!(post, [Tag.tagme.id, Tag.bot.id, Tag.youtube.id, Tag.video.id]) | |||||
| end | |||||
| kept_tag_ids = post.tags.pluck(:id).to_set | |||||
| desired_tag_ids = kept_tag_ids.to_a | |||||
| deerjikist = Deerjikist.find_by(platform: :youtube, code: video.channel_id) | |||||
| if deerjikist | |||||
| desired_tag_ids << deerjikist.tag_id | |||||
| elsif post.tags.where(category: :deerjikist).none? | |||||
| desired_tag_ids << Tag.no_deerjikist.id | |||||
| end | |||||
| desired_tag_ids.uniq! | |||||
| sync_post_tags!(post, desired_tag_ids, current_tag_ids: kept_tag_ids) | |||||
| if post_created | |||||
| PostVersionRecorder.record!(post:, event_type: :create, created_by_user: nil) | |||||
| elsif post_changed || kept_tag_ids != desired_tag_ids.to_set | |||||
| PostVersionRecorder.ensure_snapshot!(post, created_by_user: nil) | |||||
| PostVersionRecorder.record!(post:, event_type: :update, created_by_user: nil) | |||||
| end | |||||
| end | |||||
| def sync_post_tags! post, desired_tag_ids, current_tag_ids: nil | |||||
| current_tag_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set | |||||
| desired_tag_ids = desired_tag_ids.compact.to_set | |||||
| to_add = desired_tag_ids - current_tag_ids | |||||
| to_remove = current_tag_ids - desired_tag_ids | |||||
| Tag.where(id: to_add.to_a).find_each do |tag| | |||||
| begin | |||||
| PostTag.create!(post:, tag:) | |||||
| rescue ActiveRecord::RecordNotUnique | |||||
| ; | |||||
| end | |||||
| end | |||||
| PostTag.where(post_id: post.id, tag_id: to_remove.to_a).kept.find_each do |pt| | |||||
| pt.discard_by!(nil) | |||||
| end | |||||
| end | |||||
| def attach_thumbnail_if_needed! post, thumbnail_url | |||||
| return if post.thumbnail.attached? | |||||
| return if thumbnail_url.blank? | |||||
| post.thumbnail.attach( | |||||
| io: URI.open (thumbnail_url), | |||||
| filename: File.basename (URI.parse (thumbnail_url).path), | |||||
| content_type: 'image/jpeg') | |||||
| post.resized_thumbnail! | |||||
| end | |||||
| def youtube_url_regexp id | |||||
| escaped = Regexp.escape(id) | |||||
| "(youtube\\.com/watch\\?v=#{ escaped }|youtu\\.be/#{ escaped })([^A-Za-z0-9_-]|$)" | |||||
| end | |||||
| def query_terms = ['ぼざろクリーチャーシリーズ', '伊地知ニジカ', '伊地知虹鹿'] | |||||
| def playlist_ids | |||||
| ['PLrOch4zHkI5vu29b-f9umUQQ4tQkuWLPX', | |||||
| 'PLrOch4zHkI5vOK0RaytQq6PbucxQkkL0K', | |||||
| 'PLrOch4zHkI5tdwm9vSegiDQJOM-hgpcOC'] | |||||
| end | |||||
| def sync_since = 14.days.ago | |||||
| end | |||||
| end | |||||
| @@ -0,0 +1,29 @@ | |||||
| module Youtube | |||||
| class VideoItem | |||||
| attr_reader :id, :title, :channel_id, :published_at, :thumbnail_url, :raw_tags | |||||
| def initialize item | |||||
| snippet = item.fetch('snippet') | |||||
| @id = item.fetch('id') | |||||
| @title = snippet['title'] | |||||
| @channel_id = snippet['channelId'] | |||||
| @published_at = Time.iso8601(snippet['publishedAt']) | |||||
| @thumbnail_url = pick_thumbnail(snippet['thumbnails'] || { }) | |||||
| @raw_tags = snippet['tags'] || [] | |||||
| end | |||||
| def url = "https://www.youtube.com/watch?v=#{ @id }" | |||||
| private | |||||
| def pick_thumbnail thumbnails | |||||
| ['maxres', 'standard', 'high', 'medium', 'default'].each do |key| | |||||
| url = thumbnails.dig(key, 'url') | |||||
| return url if url.present? | |||||
| end | |||||
| nil | |||||
| end | |||||
| end | |||||
| end | |||||
| @@ -0,0 +1,6 @@ | |||||
| namespace :post do | |||||
| desc '投稿同期(ニコニコ以外)' | |||||
| task sync: :environment do | |||||
| Youtube::Sync.new.sync! | |||||
| end | |||||
| end | |||||