|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406 |
- """
- 日次で実行し,ぼざクリ DB を最新に更新する.
- """
-
- from __future__ import annotations
-
- import json
- import os
- import random
- import string
- import time
- from dataclasses import dataclass
- from datetime import date, datetime, timedelta
- from typing import Any, Type, TypedDict, cast
-
- import requests
- from eloquent import DatabaseManager, Model
- from eloquent.orm.relations.dynamic_property import DynamicProperty
-
- config: dict[str, DbConfig] = { 'mysql': { 'driver': 'mysql',
- 'host': 'localhost',
- 'database': 'nizika_nico',
- 'user': os.environ['MYSQL_USER'],
- 'password': os.environ['MYSQL_PASS'],
- 'prefix': '' } }
- db = DatabaseManager (config)
- Model.set_connection_resolver (db)
-
-
- def main (
- ) -> None:
- now = datetime.now ()
-
- api_data = search_nico_by_tags (['伊地知ニジカ', 'ぼざろクリーチャーシリーズ'])
- update_tables (api_data, now)
-
-
- def update_tables (
- api_data: list[VideoResult],
- now: datetime,
- ) -> None:
- alive_video_codes: list[str] = []
-
- for datum in api_data:
- tag_names: list[str] = datum['tags'].split ()
- video = Video ()
- video.code = datum['contentId']
- video.title = datum['title']
- video.description = datum['description'] or ''
- video.uploaded_at = datetime.fromisoformat (datum['startTime'])
- video.deleted_at = None
- video.upsert ()
- alive_video_codes.append (video.code)
- video_history = VideoHistory ()
- video_history.video_id = video.id
- video_history.fetched_at = now
- video_history.views_count = datum['viewCounter']
- video_history.save ()
- video_tags = video.video_tags.where_not_null ('untagged_at').get ()
- for video_tag in video_tags:
- tag = video_tag.tag
- if (tag is not None
- and (tag.name.upper () not in map (str.upper, tag_names))):
- video_tag.untagged_at = now
- video_tag.save ()
- for tag_name in tag_names:
- tag = Tag.where ('name', tag_name).first ()
- if tag is None:
- tag = Tag ()
- tag.name = tag_name
- tag.save ()
- video_tag = (Video.where ('video_id', video.id)
- .where ('tag_id', tag.id)
- .where_null ('untagged_at')
- .first ())
- if video_tag is None:
- video_tag = VideoTag ()
- video_tag.video_id = video.id
- video_tag.tag_id = tag.id
- video_tag.tagged_at = now
- video_tag.untagged_at = None
- video_tag.save ()
- for com in fetch_comments (video.code):
- user = User.where ('code', com['userId']).first ()
- if user is None:
- user = User ()
- user.code = com['userId']
- user.save ()
- comment = Comment ()
- comment.video_id = video.id
- comment.comment_no = com['no']
- comment.user_id = user.id
- comment.content = com['body']
- comment.posted_at = datetime.fromisoformat (com['postedAt'])
- comment.nico_count = com['nicoruCount']
- comment.vpos_ms = com['vposMs']
- comment.upsert ()
-
- # 削除動画
- videos = (Video.where_not_in ('code', alive_video_codes)
- .where_null ('deleted_at')
- .get ())
- for video in videos:
- if video.code not in alive_video_codes:
- video.deleted_at = now
- video.save ()
-
-
- def fetch_comments (
- video_code: str,
- ) -> list[CommentResult]:
- time.sleep (1.2)
-
- headers = { 'X-Frontend-Id': '6',
- 'X-Frontend-Version': '0' }
-
- action_track_id = (
- ''.join (random.choice (string.ascii_letters + string.digits)
- for _ in range (10))
- + '_'
- + str (random.randrange (10 ** 12, 10 ** 13)))
-
- url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
- + f"?actionTrackId={ action_track_id }")
-
- res = requests.post (url, headers = headers, timeout = 60).json ()
-
- try:
- nv_comment = res['data']['comment']['nvComment']
- except KeyError:
- return []
- if nv_comment is None:
- return []
-
- headers = { 'X-Frontend-Id': '6',
- 'X-Frontend-Version': '0',
- 'Content-Type': 'application/json' }
-
- params = { 'params': nv_comment['params'],
- 'additionals': { },
- 'threadKey': nv_comment['threadKey'] }
-
- url = nv_comment['server'] + '/v1/threads'
-
- res = (requests.post (url, json.dumps (params),
- headers = headers,
- timeout = 60)
- .json ())
-
- try:
- return res['data']['threads'][1]['comments']
- except (IndexError, KeyError):
- return []
-
-
- def search_nico_by_tag (
- tag: str,
- ) -> list[VideoResult]:
- return search_nico_by_tags ([tag])
-
-
- def search_nico_by_tags (
- tags: list[str],
- ) -> list[VideoResult]:
- today = datetime.now ()
-
- url = ('https://snapshot.search.nicovideo.jp'
- + '/api/v2/snapshot/video/contents/search')
-
- result_data: list[VideoResult] = []
- to = datetime (2022, 12, 3)
- while to <= today:
- time.sleep (1.2)
- until = to + timedelta (days = 14)
- query_filter = json.dumps ({ 'type': 'or',
- 'filters': [
- { 'type': 'range',
- 'field': 'startTime',
- 'from': '%04d-%02d-%02dT00:00:00+09:00' % (to.year, to.month, to.day),
- 'to': '%04d-%02d-%02dT23:59:59+09:00' % (until.year, until.month, until.day),
- 'include_lower': True }] })
- params: VideoSearchParam = { 'q': ' OR '.join (tags),
- 'targets': 'tagsExact',
- '_sort': '-viewCounter',
- 'fields': ('contentId,'
- 'title,'
- 'tags,'
- 'description,'
- 'viewCounter,'
- 'startTime'),
- '_limit': 100,
- 'jsonFilter': query_filter }
- res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
- try:
- result_data += res['data']
- except KeyError:
- pass
- to = until + timedelta (days = 1)
-
- return result_data
-
-
- class Comment (Model):
- __timestamps__ = False
-
- @property
- def video (
- self,
- ) -> DynamicProperty:
- return self.belongs_to (Video)
-
- @property
- def user (
- self,
- ) -> DynamicProperty:
- return self.belongs_to (User)
-
-
- class Tag (Model):
- __timestamps__ = False
-
- @property
- def video_tags (
- self,
- ) -> DynamicProperty:
- return self.has_many (VideoTag)
-
-
- class User (Model):
- __timestamps__ = False
-
- @property
- def comments (
- self,
- ) -> DynamicProperty:
- return self.has_many (Comment)
-
-
- class Video (Model):
- __timestamps__ = False
-
- @property
- def video_histories (
- self,
- ) -> DynamicProperty:
- return self.has_many (VideoHistory)
-
- @property
- def video_tags (
- self,
- ) -> DynamicProperty:
- return self.has_many (VideoTag)
-
- @property
- def comments (
- self,
- ) -> DynamicProperty:
- return self.has_many (Comment)
-
- def upsert (
- self,
- ) -> None:
- row = Video.where ('code', self.code).first ()
- if row is not None:
- self.id = row.id
- self.save ()
-
-
- class VideoHistory (Model):
- __timestamps__ = False
-
- @property
- def video (
- self,
- ) -> DynamicProperty:
- return self.belongs_to (Video)
-
- def upsert (
- self,
- ) -> None:
- row = (Video
- .where ('video_id', self.video_id)
- .where ('fetched_at', self.fetched_at)
- .first ())
- if row is not None:
- self.id = row.id
- self.save ()
-
-
- class VideoTag (Model):
- __timestamps__ = False
-
- @property
- def video (
- self,
- ) -> DynamicProperty:
- return self.belongs_to (Video)
-
- @property
- def tag (
- self,
- ) -> DynamicProperty:
- return self.belongs_to (Tag)
-
- def upsert (
- self,
- ) -> None:
- row = (Video
- .where ('video_id', self.video_id)
- .where ('tag_id', self.tag_id)
- .first ())
- if row is not None:
- self.id = row.id
- self.save ()
-
-
- class DbConfig (TypedDict):
- driver: str
- host: str
- database: str
- user: str
- password: str
- prefix: str
-
-
- class VideoSearchParam (TypedDict):
- q: str
- targets: str
- _sort: str
- fields: str
- _limit: int
- jsonFilter: str
-
-
- class VideoResult (TypedDict):
- contentId: str
- title: str
- tags: str
- description: str | None
- viewCounter: int
- startTime: str
-
-
- class CommentResult (TypedDict):
- id: str
- no: int
- vposMs: int
- body: str
- commands: list[str]
- userId: str
- isPremium: bool
- score: int
- postedAt: str
- nicoruCount: int
- nicoruId: Any
- source: str
- isMyPost: bool
-
-
- class CommentRow (TypedDict):
- id: int
- video_id: int
- comment_no: int
- user_id: int
- content: str
- posted_at: datetime
- nico_count: int
- vpos_ms: int | None
-
-
- class TagRow (TypedDict):
- id: int
- name: str
-
-
- class UserRow (TypedDict):
- id: int
- code: str
-
-
- class VideoRow (TypedDict):
- id: int
- code: str
- title: str
- description: str
- uploaded_at: datetime
- deleted_at: datetime | None
-
-
- class VideoHistoryRow (TypedDict):
- id: int
- video_id: int
- fetched_at: date
- views_count: int
-
-
- class VideoTagRow (TypedDict):
- id: int
- video_id: int
- tag_id: int
- tagged_at: date
- untagged_at: date | None
-
-
- if __name__ == '__main__':
- main ()
|