ぼざろクリーチャーシリーズ DB 兼 API(自分用)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

257 lines
7.9 KiB

  1. # pylint: disable = missing-class-docstring
  2. # pylint: disable = missing-function-docstring
  3. """
  4. 日次で実行し,ぼざクリ DB を最新に更新する.
  5. """
  6. from __future__ import annotations
  7. import json
  8. import os
  9. import random
  10. import string
  11. import time
  12. import unicodedata
  13. from datetime import datetime, timedelta
  14. from typing import Any, TypedDict, cast
  15. import jaconv
  16. import requests
  17. from eloquent import DatabaseManager, Model
  18. from db.config import DB
  19. from db.models import Comment, Tag, User, Video, VideoHistory, VideoTag
  20. def main (
  21. ) -> None:
  22. now = datetime.now ()
  23. api_data = search_nico_by_tags (['伊地知ニジカ',
  24. 'ぼざろクリーチャーシリーズ',
  25. 'ぼざろクリーチャーシリーズ外伝'])
  26. DB.begin_transaction ()
  27. try:
  28. update_tables (api_data, now)
  29. DB.commit ()
  30. except Exception:
  31. DB.rollback ()
  32. raise
  33. def update_tables (
  34. api_data: list[VideoResult],
  35. now: datetime,
  36. ) -> None:
  37. alive_video_codes: list[str] = []
  38. for datum in api_data:
  39. tag_names: list[str] = datum['tags'].split ()
  40. video = Video ()
  41. video.code = datum['contentId']
  42. video.title = datum['title']
  43. video.description = datum['description'] or ''
  44. video.uploaded_at = datetime.fromisoformat (datum['startTime'])
  45. video.deleted_at = None
  46. video.upsert ()
  47. alive_video_codes.append (video.code)
  48. video_history = VideoHistory ()
  49. video_history.video_id = video.id
  50. video_history.fetched_at = now
  51. video_history.views_count = datum['viewCounter']
  52. video_history.upsert ()
  53. video_tags = [video_tag for video_tag in video.video_tags
  54. if video_tag.untagged_at is None]
  55. tag: Tag | None
  56. video_tag: VideoTag | None
  57. for video_tag in video_tags:
  58. tag = video_tag.tag
  59. if (tag is not None
  60. and (normalise (tag.name) not in map (normalise, tag_names))):
  61. video_tag.untagged_at = now
  62. video_tag.save ()
  63. for tag_name in tag_names:
  64. tag = Tag.where ('name', tag_name).first ()
  65. if tag is None:
  66. tag = Tag ()
  67. tag.name = tag_name
  68. tag.save ()
  69. video_tag = (VideoTag.where ('video_id', video.id)
  70. .where ('tag_id', tag.id)
  71. .where_null ('untagged_at')
  72. .first ())
  73. if video_tag is None:
  74. video_tag = VideoTag ()
  75. video_tag.video_id = video.id
  76. video_tag.tag_id = tag.id
  77. video_tag.tagged_at = now
  78. video_tag.untagged_at = None
  79. video_tag.save ()
  80. for com in fetch_comments (video.code):
  81. user = User.where ('code', com['userId']).first ()
  82. if user is None:
  83. user = User ()
  84. user.code = com['userId']
  85. user.save ()
  86. comment = Comment ()
  87. comment.video_id = video.id
  88. comment.comment_no = com['no']
  89. comment.user_id = user.id
  90. comment.content = com['body']
  91. comment.posted_at = datetime.fromisoformat (com['postedAt'])
  92. comment.nico_count = com['nicoruCount']
  93. comment.vpos_ms = com['vposMs']
  94. comment.upsert ()
  95. # 削除動画
  96. videos = (Video.where_not_in ('code', alive_video_codes)
  97. .where_null ('deleted_at')
  98. .get ())
  99. for video in videos:
  100. if video.code not in alive_video_codes:
  101. video.deleted_at = now
  102. video.save ()
  103. def fetch_comments (
  104. video_code: str,
  105. ) -> list[CommentResult]:
  106. time.sleep (1.2)
  107. headers = { 'X-Frontend-Id': '6',
  108. 'X-Frontend-Version': '0' }
  109. action_track_id = (
  110. ''.join (random.choice (string.ascii_letters + string.digits)
  111. for _ in range (10))
  112. + '_'
  113. + str (random.randrange (10 ** 12, 10 ** 13)))
  114. url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
  115. + f"?actionTrackId={ action_track_id }")
  116. res = requests.post (url, headers = headers, timeout = 60).json ()
  117. try:
  118. nv_comment = res['data']['comment']['nvComment']
  119. except KeyError:
  120. return []
  121. if nv_comment is None:
  122. return []
  123. headers = { 'X-Frontend-Id': '6',
  124. 'X-Frontend-Version': '0',
  125. 'Content-Type': 'application/json' }
  126. params = { 'params': nv_comment['params'],
  127. 'additionals': { },
  128. 'threadKey': nv_comment['threadKey'] }
  129. url = nv_comment['server'] + '/v1/threads'
  130. res = (requests.post (url, json.dumps (params),
  131. headers = headers,
  132. timeout = 60)
  133. .json ())
  134. try:
  135. return res['data']['threads'][1]['comments']
  136. except (IndexError, KeyError):
  137. return []
  138. def search_nico_by_tag (
  139. tag: str,
  140. ) -> list[VideoResult]:
  141. return search_nico_by_tags ([tag])
  142. def search_nico_by_tags (
  143. tags: list[str],
  144. ) -> list[VideoResult]:
  145. today = datetime.now ()
  146. url = ('https://snapshot.search.nicovideo.jp'
  147. + '/api/v2/snapshot/video/contents/search')
  148. result_data: list[VideoResult] = []
  149. to = datetime (2022, 12, 3)
  150. while to <= today:
  151. time.sleep (1.2)
  152. until = to + timedelta (days = 14)
  153. # pylint: disable = consider-using-f-string
  154. query_filter = json.dumps ({ 'type': 'or',
  155. 'filters': [
  156. { 'type': 'range',
  157. 'field': 'startTime',
  158. 'from': ('%04d-%02d-%02dT00:00:00+09:00'
  159. % (to.year, to.month, to.day)),
  160. 'to': ('%04d-%02d-%02dT23:59:59+09:00'
  161. % (until.year, until.month, until.day)),
  162. 'include_lower': True }] })
  163. params: VideoSearchParam = { 'q': ' OR '.join (tags),
  164. 'targets': 'tagsExact',
  165. '_sort': '-viewCounter',
  166. 'fields': ('contentId,'
  167. 'title,'
  168. 'tags,'
  169. 'description,'
  170. 'viewCounter,'
  171. 'startTime'),
  172. '_limit': 100,
  173. 'jsonFilter': query_filter }
  174. res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
  175. try:
  176. result_data += res['data']
  177. except KeyError:
  178. pass
  179. to = until + timedelta (days = 1)
  180. return result_data
  181. class VideoSearchParam (TypedDict):
  182. q: str
  183. targets: str
  184. _sort: str
  185. fields: str
  186. _limit: int
  187. jsonFilter: str
  188. class VideoResult (TypedDict):
  189. contentId: str
  190. title: str
  191. tags: str
  192. description: str | None
  193. viewCounter: int
  194. startTime: str
  195. class CommentResult (TypedDict):
  196. id: str
  197. no: int
  198. vposMs: int
  199. body: str
  200. commands: list[str]
  201. userId: str
  202. isPremium: bool
  203. score: int
  204. postedAt: str
  205. nicoruCount: int
  206. nicoruId: Any
  207. source: str
  208. isMyPost: bool
  209. def normalise (
  210. s: str,
  211. ) -> str:
  212. return jaconv.hira2kata (unicodedata.normalize ('NFKC', s)).lower ()
  213. if __name__ == '__main__':
  214. main ()