|
- # pylint: disable = missing-class-docstring
- # pylint: disable = missing-function-docstring
-
- """
- 日次で実行し,ぼざクリ DB を最新に更新する.
- """
-
- from __future__ import annotations
-
- import json
- import logging
- import random
- import string
- import time
- import unicodedata
- from datetime import date, datetime, timedelta
- from typing import Any, TypedDict, cast
-
- import jaconv
- import requests
-
- from db.config import DB
- from db.models import (Comment,
- Tag,
- TrackedVideo,
- User,
- Video,
- VideoHistory,
- VideoTag)
-
- logger = logging.getLogger (__name__)
- logging.basicConfig (
- level = logging.INFO,
- format = '%(asctime)s %(levelname)s %(message)s')
-
-
- class SearchNicoResult (TypedDict):
- videos: list['VideoResult']
- is_complete: bool
-
-
- class UpdateContext (TypedDict):
- api_data: list['VideoResult']
- comments_by_video_code: dict[str, list['CommentResult']]
- deletable: bool
-
-
- class VideoSearchParam (TypedDict):
- q: str
- targets: str
- _sort: str
- fields: str
- _limit: int
- jsonFilter: str
-
-
- class VideoResult (TypedDict):
- contentId: str
- userId: int | None
- title: str
- tags: str
- description: str | None
- viewCounter: int
- startTime: str
-
-
- class CommentResult (TypedDict):
- no: int
- userId: str
- body: str
- postedAt: str
- nicoruCount: int
- vposMs: int
-
-
- def main (
- ) -> None:
- now = datetime.now ()
- today = now.date ()
-
- search_result = search_nico_by_tags (['伊地知ニジカ',
- 'ぼざろクリーチャーシリーズ',
- 'ぼざろクリーチャーシリーズ外伝'])
- comments_by_video_code = fetch_comments_by_video_code (search_result['videos'])
-
- context: UpdateContext = { 'api_data': search_result['videos'],
- 'comments_by_video_code': comments_by_video_code,
- 'deletable': search_result['is_complete'] }
-
- connection = DB.connection ()
- connection.begin_transaction ()
- try:
- update_tables (context, now, today)
- connection.commit ()
- except Exception:
- connection.rollback ()
- raise
-
-
- def update_tables (
- context: UpdateContext,
- now: datetime,
- today: date,
- ) -> None:
- alive_video_codes: list[str] = []
-
- for datum in context['api_data']:
- tag_names = datum['tags'].split ()
- normalised_tag_names = {normalise (tag_name) for tag_name in tag_names}
-
- user: User | None = None
- if datum['userId'] is not None:
- user = User.where ('code', str (datum['userId'])).first ()
- if user is None:
- user = User ()
- user.code = str (datum['userId'])
- user.save ()
-
- video = Video ()
- video.code = datum['contentId']
- video.user_id = user.id if user else None
- video.title = datum['title']
- video.description = datum['description'] or ''
- video.uploaded_at = datetime.fromisoformat (datum['startTime'])
- video.deleted_at = None
- video.upsert ()
- alive_video_codes.append (video.code)
-
- video_history = VideoHistory ()
- video_history.video_id = video.id
- video_history.fetched_at = today
- video_history.views_count = datum['viewCounter']
- video_history.upsert ()
-
- video_tags = [video_tag for video_tag in video.video_tags
- if video_tag.untagged_at is None]
- for video_tag in video_tags:
- tag = video_tag.tag
- if tag is None:
- continue
- if normalise (tag.name) in normalised_tag_names:
- continue
- video_tag.untagged_at = today
- video_tag.save ()
-
- for tag_name in tag_names:
- tag = Tag.where ('name', tag_name).first ()
- if tag is None:
- tag = Tag ()
- tag.name = tag_name
- tag.save ()
-
- video_tag = (VideoTag.where ('video_id', video.id)
- .where ('tag_id', tag.id)
- .first ())
- if video_tag is None:
- video_tag = VideoTag ()
- video_tag.video_id = video.id
- video_tag.tag_id = tag.id
-
- video_tag.tagged_at = video_tag.tagged_at or today
- video_tag.untagged_at = None
- video_tag.upsert ()
-
- for com in context['comments_by_video_code'].get (video.code, []):
- user = User.where ('code', com['userId']).first ()
- if user is None:
- user = User ()
- user.code = com['userId']
- user.save ()
-
- comment = Comment ()
- comment.video_id = video.id
- comment.comment_no = com['no']
- comment.user_id = user.id
- comment.content = com['body']
- comment.posted_at = datetime.fromisoformat (com['postedAt'])
- comment.nico_count = com.get ('nicoruCount', 0)
- comment.vpos_ms = com.get ('vposMs', 0)
- comment.upsert ()
-
- if not context['deletable']:
- logger.warning ('skip soft-delete because the latest fetch was incomplete')
- return
-
- if not alive_video_codes:
- logger.warning ('skip soft-delete because no alive videos were fetched')
- return
-
- videos = (Video.where_not_in ('code', alive_video_codes)
- .where_null ('deleted_at')
- .get ())
- for video in videos:
- video.deleted_at = now
- video.save ()
-
-
- def fetch_video_data (
- video_code: str,
- ) -> dict[str, Any]:
- time.sleep (1.2)
-
- headers = { 'X-Frontend-Id': '6',
- 'X-Frontend-Version': '0' }
-
- action_track_id = (
- ''.join (random.choice (string.ascii_letters + string.digits)
- for _ in range (10))
- + '_'
- + str (random.randrange (10 ** 12, 10 ** 13)))
-
- url = (f'https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }'
- + f'?actionTrackId={ action_track_id }')
-
- return requests.post (url, headers = headers, timeout = 60).json ()
-
-
- def fetch_comments_by_video_code (
- videos: list[VideoResult],
- ) -> dict[str, list[CommentResult]]:
- comments_by_video_code: dict[str, list[CommentResult]] = {}
-
- for video in videos:
- video_code = video['contentId']
- try:
- comments_by_video_code[video_code] = fetch_comments (video_code)
- except (KeyError,
- TypeError,
- ValueError,
- requests.RequestException) as exc:
- logger.warning ('failed to fetch comments: %s (%s)', video_code, exc)
- comments_by_video_code[video_code] = []
-
- return comments_by_video_code
-
-
- def fetch_comments (
- video_code: str,
- ) -> list[CommentResult]:
- video_data = fetch_video_data (video_code)
- nv_comment = (video_data.get ('data', {})
- .get ('comment', {})
- .get ('nvComment'))
- if nv_comment is None:
- return []
-
- headers = { 'X-Frontend-Id': '6',
- 'X-Frontend-Version': '0',
- 'Content-Type': 'application/json' }
-
- params = { 'params': nv_comment['params'],
- 'additionals': {},
- 'threadKey': nv_comment['threadKey'] }
-
- url = nv_comment['server'] + '/v1/threads'
-
- response = requests.post (url,
- json = params,
- headers = headers,
- timeout = 60)
- response.raise_for_status ()
- res = response.json ()
-
- return select_comments_from_threads (res)
-
-
- def select_comments_from_threads (
- response: dict[str, Any],
- ) -> list[CommentResult]:
- threads = response.get ('data', {}).get ('threads', [])
- if not isinstance (threads, list):
- return []
-
- main_comments: list[CommentResult] = []
- fallback_comments: list[CommentResult] = []
-
- for thread in threads:
- comments = thread.get ('comments') if isinstance (thread, dict) else None
- if not isinstance (comments, list):
- continue
-
- casted_comments = cast (list[CommentResult], comments)
- if len (casted_comments) > len (fallback_comments):
- fallback_comments = casted_comments
-
- fork = str (thread.get ('fork', '')).lower ()
- label = str (thread.get ('label', '')).lower ()
- thread_id = str (thread.get ('id', '')).lower ()
- if fork == 'main' or 'main' in label or 'main' in thread_id:
- main_comments = casted_comments
-
- selected_comments = main_comments or fallback_comments
- deduped_comments: dict[int, CommentResult] = {}
- for comment in selected_comments:
- comment_no = comment.get ('no')
- if not isinstance (comment_no, int):
- continue
- deduped_comments[comment_no] = comment
-
- return [deduped_comments[comment_no]
- for comment_no in sorted (deduped_comments)]
-
-
- def search_nico_by_tags (
- tags: list[str],
- ) -> SearchNicoResult:
- today = datetime.now ()
-
- url = ('https://snapshot.search.nicovideo.jp'
- + '/api/v2/snapshot/video/contents/search')
-
- result_by_video_code: dict[str, VideoResult] = {}
- is_complete = True
- to = datetime (2022, 12, 3)
- while to <= today:
- time.sleep (1.2)
- until = to + timedelta (days = 14)
- # pylint: disable = consider-using-f-string
- query_filter = json.dumps ({ 'type': 'or',
- 'filters': [
- { 'type': 'range',
- 'field': 'startTime',
- 'from': ('%04d-%02d-%02dT00:00:00+09:00'
- % (to.year, to.month, to.day)),
- 'to': ('%04d-%02d-%02dT23:59:59+09:00'
- % (until.year, until.month, until.day)),
- 'include_lower': True }] })
- params: VideoSearchParam = { 'q': ' OR '.join (tags),
- 'targets': 'tagsExact',
- '_sort': '-viewCounter',
- 'fields': ('contentId,'
- 'userId,'
- 'title,'
- 'tags,'
- 'description,'
- 'viewCounter,'
- 'startTime'),
- '_limit': 100,
- 'jsonFilter': query_filter }
- try:
- response = requests.get (
- url,
- params = cast (dict[str, int | str], params),
- timeout = 60)
- response.raise_for_status ()
- res = response.json ()
- for datum in cast (list[VideoResult], res.get ('data', [])):
- result_by_video_code[datum['contentId']] = datum
- except (ValueError, requests.RequestException) as exc:
- logger.warning ('snapshot fetch failed: %s - %s (%s)',
- to.date (),
- until.date (),
- exc)
- is_complete = False
- to = until + timedelta (days = 1)
-
- for video in TrackedVideo.get ():
- if video.code in result_by_video_code:
- continue
- try:
- video_data = fetch_video_data (video.code)['data']
- result_by_video_code[video.code] = {
- 'contentId': video.code,
- 'userId': video_data['video']['userId'],
- 'title': video_data['video']['title'],
- 'tags': ' '.join (map (lambda t: t['name'],
- video_data['tag']['items'])),
- 'description': video_data['video']['description'],
- 'viewCounter': video_data['video']['count']['view'],
- 'startTime': video_data['video']['registeredAt'] }
- except (KeyError,
- TypeError,
- ValueError,
- requests.RequestException) as exc:
- logger.warning ('tracked video fetch failed: %s (%s)', video.code, exc)
- is_complete = False
-
- return { 'videos': list (result_by_video_code.values ()),
- 'is_complete': is_complete }
-
-
- def normalise (
- text: str,
- ) -> str:
- return jaconv.hira2kata (
- unicodedata.normalize ('NFKC', text.strip ())).lower ()
-
-
- if __name__ == '__main__':
- main ()
|