ぼざろクリーチャーシリーズ DB 兼 API(自分用)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

288 lines
9.3 KiB

  1. # pylint: disable = missing-class-docstring
  2. # pylint: disable = missing-function-docstring
  3. """
  4. 日次で実行し,ぼざクリ DB を最新に更新する.
  5. """
  6. from __future__ import annotations
  7. import json
  8. import os
  9. import random
  10. import string
  11. import time
  12. import unicodedata
  13. from datetime import datetime, timedelta
  14. from typing import Any, TypedDict, cast
  15. import jaconv
  16. import requests
  17. from eloquent import DatabaseManager, Model
  18. from db.config import DB
  19. from db.models import (Comment,
  20. Tag,
  21. TrackedVideo,
  22. User,
  23. Video,
  24. VideoHistory,
  25. VideoTag)
  26. def main (
  27. ) -> None:
  28. now = datetime.now ()
  29. api_data = search_nico_by_tags (['伊地知ニジカ',
  30. 'ぼざろクリーチャーシリーズ',
  31. 'ぼざろクリーチャーシリーズ外伝'])
  32. DB.begin_transaction ()
  33. try:
  34. update_tables (api_data, now)
  35. DB.commit ()
  36. except Exception:
  37. DB.rollback ()
  38. raise
  39. def update_tables (
  40. api_data: list[VideoResult],
  41. now: datetime,
  42. ) -> None:
  43. alive_video_codes: list[str] = []
  44. for datum in api_data:
  45. tag_names: list[str] = datum['tags'].split ()
  46. user: User | None = None
  47. if datum['userId']:
  48. user = User.where('code', str (datum['userId'])).first ()
  49. if user is None:
  50. user = User ()
  51. user.code = str (datum['userId'])
  52. user.save ()
  53. video = Video ()
  54. video.code = datum['contentId']
  55. video.user_id = user.id if user else None
  56. video.title = datum['title']
  57. video.description = datum['description'] or ''
  58. video.uploaded_at = datetime.fromisoformat (datum['startTime'])
  59. video.deleted_at = None
  60. video.upsert ()
  61. alive_video_codes.append (video.code)
  62. video_history = VideoHistory ()
  63. video_history.video_id = video.id
  64. video_history.fetched_at = now
  65. video_history.views_count = datum['viewCounter']
  66. video_history.upsert ()
  67. video_tags = [video_tag for video_tag in video.video_tags
  68. if video_tag.untagged_at is None]
  69. tag: Tag | None
  70. video_tag: VideoTag | None
  71. for video_tag in video_tags:
  72. tag = video_tag.tag
  73. if (tag is not None
  74. and (normalise (tag.name) not in map (normalise, tag_names))):
  75. video_tag.untagged_at = now
  76. video_tag.save ()
  77. for tag_name in tag_names:
  78. tag = Tag.where ('name', tag_name).first ()
  79. if tag is None:
  80. tag = Tag ()
  81. tag.name = tag_name
  82. tag.save ()
  83. video_tag = (VideoTag.where ('video_id', video.id)
  84. .where ('tag_id', tag.id)
  85. .where_null ('untagged_at')
  86. .first ())
  87. if video_tag is None:
  88. video_tag = VideoTag ()
  89. video_tag.video_id = video.id
  90. video_tag.tag_id = tag.id
  91. video_tag.tagged_at = now
  92. video_tag.untagged_at = None
  93. video_tag.save ()
  94. for com in fetch_comments (video.code):
  95. user = User.where ('code', com['userId']).first ()
  96. if user is None:
  97. user = User ()
  98. user.code = com['userId']
  99. user.save ()
  100. comment = Comment ()
  101. comment.video_id = video.id
  102. comment.comment_no = com['no']
  103. comment.user_id = user.id
  104. comment.content = com['body']
  105. comment.posted_at = datetime.fromisoformat (com['postedAt'])
  106. comment.nico_count = com['nicoruCount']
  107. comment.vpos_ms = com['vposMs']
  108. comment.upsert ()
  109. # 削除動画
  110. videos = (Video.where_not_in ('code', alive_video_codes)
  111. .where_null ('deleted_at')
  112. .get ())
  113. for video in videos:
  114. if video.code not in alive_video_codes:
  115. video.deleted_at = now
  116. video.save ()
  117. def fetch_video_data (
  118. video_code: str,
  119. ) -> dict[str, Any]:
  120. time.sleep (1.2)
  121. headers = { 'X-Frontend-Id': '6',
  122. 'X-Frontend-Version': '0' }
  123. action_track_id = (
  124. ''.join (random.choice (string.ascii_letters + string.digits)
  125. for _ in range (10))
  126. + '_'
  127. + str (random.randrange (10 ** 12, 10 ** 13)))
  128. url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
  129. + f"?actionTrackId={ action_track_id }")
  130. return requests.post (url, headers = headers, timeout = 60).json ()
  131. def fetch_comments (
  132. video_code: str,
  133. ) -> list[CommentResult]:
  134. try:
  135. nv_comment = fetch_video_data (video_code)['data']['comment']['nvComment']
  136. except KeyError:
  137. return []
  138. if nv_comment is None:
  139. return []
  140. headers = { 'X-Frontend-Id': '6',
  141. 'X-Frontend-Version': '0',
  142. 'Content-Type': 'application/json' }
  143. params = { 'params': nv_comment['params'],
  144. 'additionals': { },
  145. 'threadKey': nv_comment['threadKey'] }
  146. url = nv_comment['server'] + '/v1/threads'
  147. res = (requests.post (url, json.dumps (params),
  148. headers = headers,
  149. timeout = 60)
  150. .json ())
  151. try:
  152. return res['data']['threads'][1]['comments']
  153. except (IndexError, KeyError):
  154. return []
  155. def search_nico_by_tags (
  156. tags: list[str],
  157. ) -> list[VideoResult]:
  158. today = datetime.now ()
  159. url = ('https://snapshot.search.nicovideo.jp'
  160. + '/api/v2/snapshot/video/contents/search')
  161. result_data: list[VideoResult] = []
  162. to = datetime (2022, 12, 3)
  163. while to <= today:
  164. time.sleep (1.2)
  165. until = to + timedelta (days = 14)
  166. # pylint: disable = consider-using-f-string
  167. query_filter = json.dumps ({ 'type': 'or',
  168. 'filters': [
  169. { 'type': 'range',
  170. 'field': 'startTime',
  171. 'from': ('%04d-%02d-%02dT00:00:00+09:00'
  172. % (to.year, to.month, to.day)),
  173. 'to': ('%04d-%02d-%02dT23:59:59+09:00'
  174. % (until.year, until.month, until.day)),
  175. 'include_lower': True }] })
  176. params: VideoSearchParam = { 'q': ' OR '.join (tags),
  177. 'targets': 'tagsExact',
  178. '_sort': '-viewCounter',
  179. 'fields': ('contentId,'
  180. 'userId,'
  181. 'title,'
  182. 'tags,'
  183. 'description,'
  184. 'viewCounter,'
  185. 'startTime'),
  186. '_limit': 100,
  187. 'jsonFilter': query_filter }
  188. res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
  189. try:
  190. result_data += res['data']
  191. except KeyError:
  192. pass
  193. to = until + timedelta (days = 1)
  194. for video in TrackedVideo.get ():
  195. if video.code in map (lambda v: v['contentId'], result_data):
  196. continue
  197. try:
  198. video_data = fetch_video_data (video.code)['data']
  199. result_data.append ({
  200. 'contentId': video.code,
  201. 'userId': video_data['video']['userId'],
  202. 'title': video_data['video']['title'],
  203. 'tags': ' '.join (map (lambda t: t['name'],
  204. video_data['tag']['items'])),
  205. 'description': video_data['video']['description'],
  206. 'viewCounter': video_data['video']['count']['view'],
  207. 'startTime': video_data['video']['registeredAt'] })
  208. except Exception:
  209. pass
  210. return result_data
  211. class VideoSearchParam (TypedDict):
  212. q: str
  213. targets: str
  214. _sort: str
  215. fields: str
  216. _limit: int
  217. jsonFilter: str
  218. class VideoResult (TypedDict):
  219. contentId: str
  220. userId: int | None
  221. title: str
  222. tags: str
  223. description: str | None
  224. viewCounter: int
  225. startTime: str
  226. class CommentResult (TypedDict):
  227. id: str
  228. no: int
  229. vposMs: int
  230. body: str
  231. commands: list[str]
  232. userId: str
  233. isPremium: bool
  234. score: int
  235. postedAt: str
  236. nicoruCount: int
  237. nicoruId: Any
  238. source: str
  239. isMyPost: bool
  240. def normalise (
  241. s: str,
  242. ) -> str:
  243. return jaconv.hira2kata (unicodedata.normalize ('NFKC', s)).lower ()
  244. if __name__ == '__main__':
  245. main ()