| @@ -84,6 +84,7 @@ class Tag < ApplicationRecord | |||
| def self.no_deerjikist = find_or_create_by_tag_name!('ニジラー情報不詳', category: :meta) | |||
| def self.video = find_or_create_by_tag_name!('動画', category: :meta) | |||
| def self.niconico = find_or_create_by_tag_name!('ニコニコ', category: :meta) | |||
| def self.youtube = find_or_create_by_tag_name!('YouTube', category: :meta) | |||
| def self.normalise_tags tag_names, with_tagme: true, | |||
| with_no_deerjikist: true, | |||
| @@ -0,0 +1,46 @@ | |||
| class Discovery::YoutubeVideoDiscoverer | |||
| def initialize(client: Youtube::SearchClient.new) | |||
| @client = client | |||
| end | |||
| def call discovery_query:, published_after:, published_before: | |||
| body = @client.search_videos( | |||
| query: discovery_query.query, | |||
| published_after: published_after, | |||
| published_before: published_before) | |||
| body.fetch('items', []).each do |item| | |||
| next unless item.dig('id', 'kind') == 'youtube#video' | |||
| upsert_candidate!(discovery_query, item) | |||
| end | |||
| end | |||
| private | |||
| def upsert_candidate! discovery_query, item | |||
| snippet = item.fetch('snippet') | |||
| code = item.fetch('id').fetch('videoId') | |||
| candidate = VideoCandidate.find_or_initialize_by(provider: 'youtube', code:) | |||
| candidate.title = snippet['title'].to_s | |||
| candidate.description = snippet['description'].to_s | |||
| candidate.channel_code = snippet['channelId'] | |||
| candidate.channel_title = snippet['channelTitle'] | |||
| candidate.published_at = Time.zone.parse(snippet['publishedAt']) | |||
| candidate.thumbnail_url = snippet.dig('thumbnails', 'high', 'url') | |||
| candidate.raw_data = item | |||
| candidate.last_discovered_at = Time.current | |||
| candidate.save! | |||
| VideoCandidateHit.find_or_create_by!( | |||
| video_candidate: candidate, | |||
| discovery_query: discovery_query | |||
| ) do |hit| | |||
| hit.matched_field = 'youtube_search' | |||
| hit.score = 0 | |||
| hit.searched_at = Time.current | |||
| end | |||
| end | |||
| end | |||
| @@ -0,0 +1,51 @@ | |||
| class PostImporter | |||
| def initialize provider: | |||
| @provider = provider | |||
| end | |||
| def import_posts source_videos | |||
| alive_codes = [] | |||
| ApplicationRecord.transaction do | |||
| source_videos.each do |source_video| | |||
| alive_codes << source_video.fetch(:code) | |||
| deerjikist_tag = deerjikist_tag_of(source_video) | |||
| video = upsert_post(source_video, deerjikist_tag) | |||
| end | |||
| end | |||
| end | |||
| private | |||
| attr_reader :provider | |||
| def deerjikist_tag_of source_video | |||
| user_code = source_video[:user_code] | |||
| return nil if user_code.blank? | |||
| deerjikist&.tag | |||
| end | |||
| def upsert_post source_video, deerjikist_tag | |||
| url = | |||
| case provider | |||
| when 'youtube' | |||
| "https://youtube.com/watch/#{ source_video.fetch(:code) }" | |||
| end | |||
| raise 'ちんぽ!' if url.blank? | |||
| Post.find_or_initialize_by(url:).tap do |post| | |||
| post.title = source_viedo.fetch(:title) | |||
| post.uploaded_user_id = nil | |||
| post.original_created_from = source_video.fetch(:uploaded_at) | |||
| post.original_created_before = post.original_created_from + 1.min | |||
| post.tags << (deerjikist_tag || no_deerjikist) | |||
| post.tags << Tag.youtube | |||
| post.tags << Tag.video | |||
| post.tags << Tag.bot | |||
| post.tags << Tag.tagme | |||
| post.save! | |||
| end | |||
| end | |||
| end | |||
| @@ -0,0 +1,87 @@ | |||
| module VideoSources | |||
| module Youtube | |||
| class Client | |||
| API_BASE = 'https://www.googleapis.com/youtube/v3' | |||
| def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') | |||
| @api_key = api_key | |||
| end | |||
| def videos video_ids | |||
| return [] if video_ids.empty? | |||
| response = connection.get('videos', part: 'snippet,statistics', | |||
| id: video_ids.join(','), | |||
| key: @api_key) | |||
| JSON.parse(response.body).fetch('items', []).map do |item| | |||
| build_video(item) | |||
| end | |||
| end | |||
| def comments video_id | |||
| comments = [] | |||
| page_token = nil | |||
| loop do | |||
| response = connection.get('commentThreads', { | |||
| part: 'snippet', | |||
| videoId: video_id, | |||
| maxResults: 100, | |||
| textFormat: 'plainText', | |||
| pageToken: page_token, | |||
| key: @api_key }.compact) | |||
| body = JSON.parse(response.body) | |||
| comments.concat(body.fetch('items', []).map { |item| build_comment(item) }) | |||
| page_token = body['nextPageToken'] | |||
| break if page_token.blank? | |||
| end | |||
| comments | |||
| rescue Faraday::ForbiddenError | |||
| [] | |||
| end | |||
| private | |||
| def connection | |||
| @connection ||= Faraday.new(url: API_BASE) do |faraday| | |||
| faraday.response :raise_error | |||
| end | |||
| end | |||
| def build_video item | |||
| snippet = item.fetch('snippet') | |||
| statistics = item.fetch('statistics', { }) | |||
| { provider: 'youtube', | |||
| code: item.fetch('id'), | |||
| user_code: snippet['channelId'], | |||
| title: snippet['title'].to_s, | |||
| description: snippet['description'].to_s, | |||
| tag_names: snippet.fetch('tags', []), | |||
| views_count: statistics.fetch('viewCount', 0).to_i, | |||
| uploaded_at: Time.zone.parse(snippet.fetch('publishedAt')) } | |||
| end | |||
| def build_comment item | |||
| snippet = | |||
| item | |||
| .fetch('snippet') | |||
| .fetch('topLevelComment') | |||
| .fetch('snippet') | |||
| { provider_comment_id: item.fetch('id'), | |||
| user_code: snippet['authorChannelId']&.fetch('value', nil), | |||
| content: snippet['textDisplay'].to_s, | |||
| posted_at: Time.zone.parse(snippet.fetch('publishedAt')), | |||
| reaction_count: snippet.fetch('likeCount', 0).to_i, | |||
| comment_no: nil, | |||
| vpos_ms: nil } | |||
| end | |||
| end | |||
| end | |||
| end | |||
| @@ -0,0 +1,68 @@ | |||
| require 'json' | |||
| require 'net/http' | |||
| require 'uri' | |||
| module Youtube | |||
| class ApiClient | |||
| ENDPOINT = 'https://www.googleapis.com/youtube/v3' | |||
| def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') | |||
| @api_key = api_key | |||
| end | |||
| def search_videos q:, published_after:, page_token: nil | |||
| get_json('/search', { | |||
| part: 'snippet', | |||
| type: 'video', | |||
| q: q, | |||
| order: 'date', | |||
| maxResults: 50, | |||
| publishedAfter: published_after.iso8601, | |||
| pageToken: page_token }.compact) | |||
| end | |||
| def videos ids | |||
| return { 'items' => [] } if ids.empty? | |||
| get_json('/videos', part: 'snippet,status,contentDetails', id: ids.join (',')) | |||
| end | |||
| def playlist_items playlist_id:, page_token: nil | |||
| get_json('/playlistItems', { | |||
| part: 'snippet,contentDetails,status', | |||
| playlistId: playlist_id, | |||
| maxResults: 50, | |||
| pageToken: page_token }.compact) | |||
| end | |||
| def channel id: nil, handle: nil | |||
| params = { part: 'snippet,contentDetails' } | |||
| params[:id] = id if id | |||
| params[:forHandle] = handle if handle | |||
| get_json('/channels', params) | |||
| end | |||
| private | |||
| def get_json path, params | |||
| uri = URI(ENDPOINT + path) | |||
| uri.query = URI.encode_www_form(params.merge(key: @api_key)) | |||
| response = Net::HTTP.start(uri.host, | |||
| uri.port, | |||
| use_ssl: true, | |||
| open_timeout: 10, | |||
| read_timeout: 30) do |http| | |||
| http.get(uri) | |||
| end | |||
| unless response.is_a?(Net::HTTPSuccess) | |||
| raise "YouTube API error: #{ response.code } #{ response.body }" | |||
| end | |||
| JSON.parse(response.body) | |||
| end | |||
| end | |||
| end | |||
| @@ -0,0 +1,34 @@ | |||
| module Youtube | |||
| class SearchClient | |||
| API_BASE = 'https://www.googleapis.com/youtube/v3' | |||
| def initialize api_key: ENV.fetch('YOUTUBE_API_KEY') | |||
| @api_key = api_key | |||
| end | |||
| def search_videos query:, published_after: nil, published_before: nil, page_token: nil | |||
| response = connection.get('search', { | |||
| part: 'snippet', | |||
| q: query, | |||
| type: 'video', | |||
| order: 'date', | |||
| maxResults: 50, | |||
| regionCode: 'JP', | |||
| relevanceLanguage: 'ja', | |||
| publishedAfter: published_after&.iso8601, | |||
| publishedBefore: published_before&.iso8601, | |||
| pageToken: page_token, | |||
| key: @api_key }.compact) | |||
| JSON.parse(response.body) | |||
| end | |||
| private | |||
| def connection | |||
| @connection ||= Faraday.new(url: API_BASE) do |faraday| | |||
| faraday.response :raise_error | |||
| end | |||
| end | |||
| end | |||
| end | |||
| @@ -0,0 +1,152 @@ | |||
| require 'open-uri' | |||
| require 'set' | |||
| require 'time' | |||
| module Youtube | |||
| class Sync | |||
| def initialize client: ApiClient.new | |||
| @client = client | |||
| end | |||
| def sync! | |||
| video_ids = discover_video_ids | |||
| return if video_ids.empty? | |||
| video_ids.each_slice(50) do |ids| | |||
| @client.videos(ids).fetch('items', []).each do |item| | |||
| sync_video!(VideoItem.new (item)) | |||
| end | |||
| end | |||
| end | |||
| private | |||
| def discover_video_ids | |||
| ids = Set.new | |||
| query_terms.each do |q| | |||
| response = @client.search_videos(q:, published_after: sync_since) | |||
| response.fetch('items', []).each do |item| | |||
| video_id = item.dig('id', 'videoId') | |||
| ids << video_id if video_id.present? | |||
| end | |||
| end | |||
| playlist_ids.each do |playlist_id| | |||
| response = @client.playlist_items(playlist_id:) | |||
| response.fetch('items', []).each do |item| | |||
| video_id = item.dig('contentDetails', 'videoId') | |||
| ids << video_id if video_id.present? | |||
| end | |||
| end | |||
| ids.to_a | |||
| end | |||
| def sync_video! video | |||
| post = Post.where('url REGEXP ?', youtube_url_regexp(video.id)).first | |||
| original_created_from = video.published_at.change(sec: 0) | |||
| original_created_before = original_created_from + 1.minute | |||
| post_created = false | |||
| post_changed = false | |||
| if post | |||
| post.assign_attributes( | |||
| title: video.title, | |||
| original_created_from:, | |||
| original_created_before:, | |||
| thumbnail_base: video.thumbnail_url) | |||
| post_changed = post.changed? | |||
| post.save! if post_changed | |||
| attach_thumbnail_if_needed!(post, video.thumbnail_url) | |||
| else | |||
| post_created = true | |||
| post = Post.create!( | |||
| title: video.title, | |||
| url: video.url, | |||
| thumbnail_base: video.thumbnail_url, | |||
| uploaded_user: nil, | |||
| original_created_from:, | |||
| original_created_before:) | |||
| attach_thumbnail_if_needed!(post, video.thumbnail_url) | |||
| sync_post_tags!(post, [Tag.tagme.id, Tag.bot.id, Tag.youtube.id, Tag.video.id]) | |||
| end | |||
| kept_tag_ids = post.tags.pluck(:id).to_set | |||
| desired_tag_ids = kept_tag_ids.to_a | |||
| deerjikist = Deerjikist.find_by(platform: :youtube, code: video.channel_id) | |||
| if deerjikist | |||
| desired_tag_ids << deerjikist.tag_id | |||
| elsif post.tags.where(category: :deerjikist).none? | |||
| desired_tag_ids << Tag.no_deerjikist.id | |||
| end | |||
| desired_tag_ids.uniq! | |||
| sync_post_tags!(post, desired_tag_ids, current_tag_ids: kept_tag_ids) | |||
| if post_created | |||
| PostVersionRecorder.record!(post:, event_type: :create, created_by_user: nil) | |||
| elsif post_changed || kept_tag_ids != desired_tag_ids.to_set | |||
| PostVersionRecorder.ensure_snapshot!(post, created_by_user: nil) | |||
| PostVersionRecorder.record!(post:, event_type: :update, created_by_user: nil) | |||
| end | |||
| end | |||
| def sync_post_tags! post, desired_tag_ids, current_tag_ids: nil | |||
| current_tag_ids ||= PostTag.kept.where(post_id: post.id).pluck(:tag_id).to_set | |||
| desired_tag_ids = desired_tag_ids.compact.to_set | |||
| to_add = desired_tag_ids - current_tag_ids | |||
| to_remove = current_tag_ids - desired_tag_ids | |||
| Tag.where(id: to_add.to_a).find_each do |tag| | |||
| begin | |||
| PostTag.create!(post:, tag:) | |||
| rescue ActiveRecord::RecordNotUnique | |||
| ; | |||
| end | |||
| end | |||
| PostTag.where(post_id: post.id, tag_id: to_remove.to_a).kept.find_each do |pt| | |||
| pt.discard_by!(nil) | |||
| end | |||
| end | |||
| def attach_thumbnail_if_needed! post, thumbnail_url | |||
| return if post.thumbnail.attached? | |||
| return if thumbnail_url.blank? | |||
| post.thumbnail.attach( | |||
| io: URI.open (thumbnail_url), | |||
| filename: File.basename (URI.parse (thumbnail_url).path), | |||
| content_type: 'image/jpeg') | |||
| post.resized_thumbnail! | |||
| end | |||
| def youtube_url_regexp id | |||
| escaped = Regexp.escape(id) | |||
| "(youtube\\.com/watch\\?v=#{ escaped }|youtu\\.be/#{ escaped })([^A-Za-z0-9_-]|$)" | |||
| end | |||
| def query_terms = ['ぼざろクリーチャーシリーズ', '伊地知ニジカ', '伊地知虹鹿'] | |||
| def playlist_ids | |||
| ['PLrOch4zHkI5vu29b-f9umUQQ4tQkuWLPX', | |||
| 'PLrOch4zHkI5vOK0RaytQq6PbucxQkkL0K', | |||
| 'PLrOch4zHkI5tdwm9vSegiDQJOM-hgpcOC'] | |||
| end | |||
| def sync_since = 14.days.ago | |||
| end | |||
| end | |||
| @@ -0,0 +1,29 @@ | |||
| module Youtube | |||
| class VideoItem | |||
| attr_reader :id, :title, :channel_id, :published_at, :thumbnail_url, :raw_tags | |||
| def initialize item | |||
| snippet = item.fetch('snippet') | |||
| @id = item.fetch('id') | |||
| @title = snippet['title'] | |||
| @channel_id = snippet['channelId'] | |||
| @published_at = Time.iso8601(snippet['publishedAt']) | |||
| @thumbnail_url = pick_thumbnail(snippet['thumbnails'] || { }) | |||
| @raw_tags = snippet['tags'] || [] | |||
| end | |||
| def url = "https://www.youtube.com/watch?v=#{ @id }" | |||
| private | |||
| def pick_thumbnail thumbnails | |||
| ['maxres', 'standard', 'high', 'medium', 'default'].each do |key| | |||
| url = thumbnails.dig(key, 'url') | |||
| return url if url.present? | |||
| end | |||
| nil | |||
| end | |||
| end | |||
| end | |||
| @@ -0,0 +1,6 @@ | |||
| namespace :post do | |||
| desc '投稿同期(ニコニコ以外)' | |||
| task sync: :environment do | |||
| Youtube::Sync.new.sync! | |||
| end | |||
| end | |||