ぼざろクリーチャーシリーズ DB 兼 API(自分用)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

update_db.py 8.2 KiB

2 months ago
2 months ago
2 months ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
2 months ago
2 months ago
2 months ago
2 months ago
2 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. # pylint: disable = missing-class-docstring
  2. # pylint: disable = missing-function-docstring
  3. """
  4. 日次で実行し,ぼざクリ DB を最新に更新する.
  5. """
  6. from __future__ import annotations
  7. import json
  8. import os
  9. import random
  10. import string
  11. import time
  12. import unicodedata
  13. from datetime import datetime, timedelta
  14. from typing import Any, TypedDict, cast
  15. import requests
  16. from eloquent import DatabaseManager, Model
  17. from models import Comment, Tag, User, Video, VideoHistory, VideoTag
  18. def main (
  19. ) -> None:
  20. config: dict[str, DbConfig] = { 'mysql': { 'driver': 'mysql',
  21. 'host': 'localhost',
  22. 'database': 'nizika_nico',
  23. 'user': os.environ['MYSQL_USER'],
  24. 'password': os.environ['MYSQL_PASS'],
  25. 'prefix': '' } }
  26. db = DatabaseManager (config)
  27. Model.set_connection_resolver (db)
  28. now = datetime.now ()
  29. api_data = search_nico_by_tags (['伊地知ニジカ', 'ぼざろクリーチャーシリーズ'])
  30. update_tables (api_data, now)
  31. def update_tables (
  32. api_data: list[VideoResult],
  33. now: datetime,
  34. ) -> None:
  35. alive_video_codes: list[str] = []
  36. for datum in api_data:
  37. tag_names: list[str] = datum['tags'].split ()
  38. video = Video ()
  39. video.code = datum['contentId']
  40. video.title = datum['title']
  41. video.description = datum['description'] or ''
  42. video.uploaded_at = datetime.fromisoformat (datum['startTime'])
  43. video.deleted_at = None
  44. video.upsert ()
  45. alive_video_codes.append (video.code)
  46. video_history = VideoHistory ()
  47. video_history.video_id = video.id
  48. video_history.fetched_at = now
  49. video_history.views_count = datum['viewCounter']
  50. video_history.save ()
  51. video_tags = video.video_tags.where_not_null ('untagged_at').get ()
  52. tag: Tag | None
  53. video_tag: VideoTag | None
  54. for video_tag in video_tags:
  55. tag = video_tag.tag
  56. if (tag is not None
  57. and (normalise (tag.name) not in map (normalise, tag_names))):
  58. video_tag.untagged_at = now
  59. video_tag.save ()
  60. for tag_name in tag_names:
  61. tag = Tag.where ('name', tag_name).first ()
  62. if tag is None:
  63. tag = Tag ()
  64. tag.name = tag_name
  65. tag.save ()
  66. video_tag = (VideoTag.where ('video_id', video.id)
  67. .where ('tag_id', tag.id)
  68. .where_null ('untagged_at')
  69. .first ())
  70. if video_tag is None:
  71. video_tag = VideoTag ()
  72. video_tag.video_id = video.id
  73. video_tag.tag_id = tag.id
  74. video_tag.tagged_at = now
  75. video_tag.untagged_at = None
  76. video_tag.save ()
  77. for com in fetch_comments (video.code):
  78. user = User.where ('code', com['userId']).first ()
  79. if user is None:
  80. user = User ()
  81. user.code = com['userId']
  82. user.save ()
  83. comment = Comment ()
  84. comment.video_id = video.id
  85. comment.comment_no = com['no']
  86. comment.user_id = user.id
  87. comment.content = com['body']
  88. comment.posted_at = datetime.fromisoformat (com['postedAt'])
  89. comment.nico_count = com['nicoruCount']
  90. comment.vpos_ms = com['vposMs']
  91. comment.upsert ()
  92. # 削除動画
  93. videos = (Video.where_not_in ('code', alive_video_codes)
  94. .where_null ('deleted_at')
  95. .get ())
  96. for video in videos:
  97. if video.code not in alive_video_codes:
  98. video.deleted_at = now
  99. video.save ()
  100. def fetch_comments (
  101. video_code: str,
  102. ) -> list[CommentResult]:
  103. time.sleep (1.2)
  104. headers = { 'X-Frontend-Id': '6',
  105. 'X-Frontend-Version': '0' }
  106. action_track_id = (
  107. ''.join (random.choice (string.ascii_letters + string.digits)
  108. for _ in range (10))
  109. + '_'
  110. + str (random.randrange (10 ** 12, 10 ** 13)))
  111. url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
  112. + f"?actionTrackId={ action_track_id }")
  113. res = requests.post (url, headers = headers, timeout = 60).json ()
  114. try:
  115. nv_comment = res['data']['comment']['nvComment']
  116. except KeyError:
  117. return []
  118. if nv_comment is None:
  119. return []
  120. headers = { 'X-Frontend-Id': '6',
  121. 'X-Frontend-Version': '0',
  122. 'Content-Type': 'application/json' }
  123. params = { 'params': nv_comment['params'],
  124. 'additionals': { },
  125. 'threadKey': nv_comment['threadKey'] }
  126. url = nv_comment['server'] + '/v1/threads'
  127. res = (requests.post (url, json.dumps (params),
  128. headers = headers,
  129. timeout = 60)
  130. .json ())
  131. try:
  132. return res['data']['threads'][1]['comments']
  133. except (IndexError, KeyError):
  134. return []
  135. def search_nico_by_tag (
  136. tag: str,
  137. ) -> list[VideoResult]:
  138. return search_nico_by_tags ([tag])
  139. def search_nico_by_tags (
  140. tags: list[str],
  141. ) -> list[VideoResult]:
  142. today = datetime.now ()
  143. url = ('https://snapshot.search.nicovideo.jp'
  144. + '/api/v2/snapshot/video/contents/search')
  145. result_data: list[VideoResult] = []
  146. to = datetime (2022, 12, 3)
  147. while to <= today:
  148. time.sleep (1.2)
  149. until = to + timedelta (days = 14)
  150. # pylint: disable = consider-using-f-string
  151. query_filter = json.dumps ({ 'type': 'or',
  152. 'filters': [
  153. { 'type': 'range',
  154. 'field': 'startTime',
  155. 'from': ('%04d-%02d-%02dT00:00:00+09:00'
  156. % (to.year, to.month, to.day)),
  157. 'to': ('%04d-%02d-%02dT23:59:59+09:00'
  158. % (until.year, until.month, until.day)),
  159. 'include_lower': True }] })
  160. params: VideoSearchParam = { 'q': ' OR '.join (tags),
  161. 'targets': 'tagsExact',
  162. '_sort': '-viewCounter',
  163. 'fields': ('contentId,'
  164. 'title,'
  165. 'tags,'
  166. 'description,'
  167. 'viewCounter,'
  168. 'startTime'),
  169. '_limit': 100,
  170. 'jsonFilter': query_filter }
  171. res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
  172. try:
  173. result_data += res['data']
  174. except KeyError:
  175. pass
  176. to = until + timedelta (days = 1)
  177. return result_data
  178. class DbConfig (TypedDict):
  179. driver: str
  180. host: str
  181. database: str
  182. user: str
  183. password: str
  184. prefix: str
  185. class VideoSearchParam (TypedDict):
  186. q: str
  187. targets: str
  188. _sort: str
  189. fields: str
  190. _limit: int
  191. jsonFilter: str
  192. class VideoResult (TypedDict):
  193. contentId: str
  194. title: str
  195. tags: str
  196. description: str | None
  197. viewCounter: int
  198. startTime: str
  199. class CommentResult (TypedDict):
  200. id: str
  201. no: int
  202. vposMs: int
  203. body: str
  204. commands: list[str]
  205. userId: str
  206. isPremium: bool
  207. score: int
  208. postedAt: str
  209. nicoruCount: int
  210. nicoruId: Any
  211. source: str
  212. isMyPost: bool
  213. def normalise (
  214. s: str,
  215. ) -> str:
  216. return unicodedata.normalize ('NFKC', s).lower ()
  217. if __name__ == '__main__':
  218. main ()