ぼざろクリーチャーシリーズ DB 兼 API(自分用)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

277 lines
8.8 KiB

  1. # pylint: disable = missing-class-docstring
  2. # pylint: disable = missing-function-docstring
  3. """
  4. 日次で実行し,ぼざクリ DB を最新に更新する.
  5. """
  6. from __future__ import annotations
  7. import json
  8. import os
  9. import random
  10. import string
  11. import time
  12. import unicodedata
  13. from datetime import datetime, timedelta
  14. from typing import Any, TypedDict, cast
  15. import jaconv
  16. import requests
  17. from eloquent import DatabaseManager, Model
  18. from db.config import DB
  19. from db.models import (Comment,
  20. Tag,
  21. TrackedVideo,
  22. User,
  23. Video,
  24. VideoHistory,
  25. VideoTag)
  26. def main (
  27. ) -> None:
  28. now = datetime.now ()
  29. api_data = search_nico_by_tags (['伊地知ニジカ',
  30. 'ぼざろクリーチャーシリーズ',
  31. 'ぼざろクリーチャーシリーズ外伝'])
  32. DB.begin_transaction ()
  33. try:
  34. update_tables (api_data, now)
  35. DB.commit ()
  36. except Exception:
  37. DB.rollback ()
  38. raise
  39. def update_tables (
  40. api_data: list[VideoResult],
  41. now: datetime,
  42. ) -> None:
  43. alive_video_codes: list[str] = []
  44. for datum in api_data:
  45. tag_names: list[str] = datum['tags'].split ()
  46. video = Video ()
  47. video.code = datum['contentId']
  48. video.title = datum['title']
  49. video.description = datum['description'] or ''
  50. video.uploaded_at = datetime.fromisoformat (datum['startTime'])
  51. video.deleted_at = None
  52. video.upsert ()
  53. alive_video_codes.append (video.code)
  54. video_history = VideoHistory ()
  55. video_history.video_id = video.id
  56. video_history.fetched_at = now
  57. video_history.views_count = datum['viewCounter']
  58. video_history.upsert ()
  59. video_tags = [video_tag for video_tag in video.video_tags
  60. if video_tag.untagged_at is None]
  61. tag: Tag | None
  62. video_tag: VideoTag | None
  63. for video_tag in video_tags:
  64. tag = video_tag.tag
  65. if (tag is not None
  66. and (normalise (tag.name) not in map (normalise, tag_names))):
  67. video_tag.untagged_at = now
  68. video_tag.save ()
  69. for tag_name in tag_names:
  70. tag = Tag.where ('name', tag_name).first ()
  71. if tag is None:
  72. tag = Tag ()
  73. tag.name = tag_name
  74. tag.save ()
  75. video_tag = (VideoTag.where ('video_id', video.id)
  76. .where ('tag_id', tag.id)
  77. .where_null ('untagged_at')
  78. .first ())
  79. if video_tag is None:
  80. video_tag = VideoTag ()
  81. video_tag.video_id = video.id
  82. video_tag.tag_id = tag.id
  83. video_tag.tagged_at = now
  84. video_tag.untagged_at = None
  85. video_tag.save ()
  86. for com in fetch_comments (video.code):
  87. user = User.where ('code', com['userId']).first ()
  88. if user is None:
  89. user = User ()
  90. user.code = com['userId']
  91. user.save ()
  92. comment = Comment ()
  93. comment.video_id = video.id
  94. comment.comment_no = com['no']
  95. comment.user_id = user.id
  96. comment.content = com['body']
  97. comment.posted_at = datetime.fromisoformat (com['postedAt'])
  98. comment.nico_count = com['nicoruCount']
  99. comment.vpos_ms = com['vposMs']
  100. comment.upsert ()
  101. # 削除動画
  102. videos = (Video.where_not_in ('code', alive_video_codes)
  103. .where_null ('deleted_at')
  104. .get ())
  105. for video in videos:
  106. if video.code not in alive_video_codes:
  107. video.deleted_at = now
  108. video.save ()
  109. def fetch_video_data (
  110. video_code: str,
  111. ) -> dict[str, Any]:
  112. time.sleep (1.2)
  113. headers = { 'X-Frontend-Id': '6',
  114. 'X-Frontend-Version': '0' }
  115. action_track_id = (
  116. ''.join (random.choice (string.ascii_letters + string.digits)
  117. for _ in range (10))
  118. + '_'
  119. + str (random.randrange (10 ** 12, 10 ** 13)))
  120. url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
  121. + f"?actionTrackId={ action_track_id }")
  122. return requests.post (url, headers = headers, timeout = 60).json ()
  123. def fetch_comments (
  124. video_code: str,
  125. ) -> list[CommentResult]:
  126. try:
  127. nv_comment = fetch_video_data (video_code)['data']['comment']['nvComment']
  128. except KeyError:
  129. return []
  130. if nv_comment is None:
  131. return []
  132. headers = { 'X-Frontend-Id': '6',
  133. 'X-Frontend-Version': '0',
  134. 'Content-Type': 'application/json' }
  135. params = { 'params': nv_comment['params'],
  136. 'additionals': { },
  137. 'threadKey': nv_comment['threadKey'] }
  138. url = nv_comment['server'] + '/v1/threads'
  139. res = (requests.post (url, json.dumps (params),
  140. headers = headers,
  141. timeout = 60)
  142. .json ())
  143. try:
  144. return res['data']['threads'][1]['comments']
  145. except (IndexError, KeyError):
  146. return []
  147. def search_nico_by_tags (
  148. tags: list[str],
  149. ) -> list[VideoResult]:
  150. today = datetime.now ()
  151. url = ('https://snapshot.search.nicovideo.jp'
  152. + '/api/v2/snapshot/video/contents/search')
  153. result_data: list[VideoResult] = []
  154. to = datetime (2022, 12, 3)
  155. while to <= today:
  156. time.sleep (1.2)
  157. until = to + timedelta (days = 14)
  158. # pylint: disable = consider-using-f-string
  159. query_filter = json.dumps ({ 'type': 'or',
  160. 'filters': [
  161. { 'type': 'range',
  162. 'field': 'startTime',
  163. 'from': ('%04d-%02d-%02dT00:00:00+09:00'
  164. % (to.year, to.month, to.day)),
  165. 'to': ('%04d-%02d-%02dT23:59:59+09:00'
  166. % (until.year, until.month, until.day)),
  167. 'include_lower': True }] })
  168. params: VideoSearchParam = { 'q': ' OR '.join (tags),
  169. 'targets': 'tagsExact',
  170. '_sort': '-viewCounter',
  171. 'fields': ('contentId,'
  172. 'title,'
  173. 'tags,'
  174. 'description,'
  175. 'viewCounter,'
  176. 'startTime'),
  177. '_limit': 100,
  178. 'jsonFilter': query_filter }
  179. res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
  180. try:
  181. result_data += res['data']
  182. except KeyError:
  183. pass
  184. to = until + timedelta (days = 1)
  185. for video in TrackedVideo.get ():
  186. if video.code in map (lambda v: v['contentId'], result_data):
  187. continue
  188. try:
  189. video_data = fetch_video_data (video.code)['data']
  190. result_data.append ({
  191. 'contentId': video.code,
  192. 'title': video_data['video']['title'],
  193. 'tags': ' '.join (map (lambda t: t['name'],
  194. video_data['tag']['items'])),
  195. 'description': video_data['video']['description'],
  196. 'viewCounter': video_data['video']['count']['view'],
  197. 'startTime': video_data['video']['registeredAt'] })
  198. except Exception:
  199. pass
  200. return result_data
  201. class VideoSearchParam (TypedDict):
  202. q: str
  203. targets: str
  204. _sort: str
  205. fields: str
  206. _limit: int
  207. jsonFilter: str
  208. class VideoResult (TypedDict):
  209. contentId: str
  210. title: str
  211. tags: str
  212. description: str | None
  213. viewCounter: int
  214. startTime: str
  215. class CommentResult (TypedDict):
  216. id: str
  217. no: int
  218. vposMs: int
  219. body: str
  220. commands: list[str]
  221. userId: str
  222. isPremium: bool
  223. score: int
  224. postedAt: str
  225. nicoruCount: int
  226. nicoruId: Any
  227. source: str
  228. isMyPost: bool
  229. def normalise (
  230. s: str,
  231. ) -> str:
  232. return jaconv.hira2kata (unicodedata.normalize ('NFKC', s)).lower ()
  233. if __name__ == '__main__':
  234. main ()