ぼざろクリーチャーシリーズ DB 兼 API(自分用)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

update_db.py 11 KiB

2 months ago
2 months ago
2 months ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
2 months ago
2 months ago
2 months ago
2 months ago
2 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. """
  2. 日次で実行し,ぼざクリ DB を最新に更新する.
  3. """
  4. from __future__ import annotations
  5. import json
  6. import os
  7. import random
  8. import string
  9. import time
  10. from dataclasses import dataclass
  11. from datetime import date, datetime, timedelta
  12. from typing import Any, Type, TypedDict, cast
  13. import requests
  14. from eloquent import DatabaseManager, Model
  15. from eloquent.orm.relations.dynamic_property import DynamicProperty
  16. config: dict[str, DbConfig] = { 'mysql': { 'driver': 'mysql',
  17. 'host': 'localhost',
  18. 'database': 'nizika_nico',
  19. 'user': os.environ['MYSQL_USER'],
  20. 'password': os.environ['MYSQL_PASS'],
  21. 'prefix': '' } }
  22. db = DatabaseManager (config)
  23. Model.set_connection_resolver (db)
  24. def main (
  25. ) -> None:
  26. now = datetime.now ()
  27. api_data = search_nico_by_tags (['伊地知ニジカ', 'ぼざろクリーチャーシリーズ'])
  28. update_tables (api_data, now)
  29. def update_tables (
  30. api_data: list[VideoResult],
  31. now: datetime,
  32. ) -> None:
  33. alive_video_codes: list[str] = []
  34. for datum in api_data:
  35. tag_names: list[str] = datum['tags'].split ()
  36. video = Video ()
  37. video.code = datum['contentId']
  38. video.title = datum['title']
  39. video.description = datum['description'] or ''
  40. video.uploaded_at = datetime.fromisoformat (datum['startTime'])
  41. video.deleted_at = None
  42. video.upsert ()
  43. alive_video_codes.append (video.code)
  44. video_history = VideoHistory ()
  45. video_history.video_id = video.id
  46. video_history.fetched_at = now
  47. video_history.views_count = datum['viewCounter']
  48. video_history.save ()
  49. video_tags = video.video_tags.where_not_null ('untagged_at').get ()
  50. for video_tag in video_tags:
  51. tag = video_tag.tag
  52. if (tag is not None
  53. and (tag.name.upper () not in map (str.upper, tag_names))):
  54. video_tag.untagged_at = now
  55. video_tag.save ()
  56. for tag_name in tag_names:
  57. tag = Tag.where ('name', tag_name).first ()
  58. if tag is None:
  59. tag = Tag ()
  60. tag.name = tag_name
  61. tag.save ()
  62. video_tag = (Video.where ('video_id', video.id)
  63. .where ('tag_id', tag.id)
  64. .where_null ('untagged_at')
  65. .first ())
  66. if video_tag is None:
  67. video_tag = VideoTag ()
  68. video_tag.video_id = video.id
  69. video_tag.tag_id = tag.id
  70. video_tag.tagged_at = now
  71. video_tag.untagged_at = None
  72. video_tag.save ()
  73. for com in fetch_comments (video.code):
  74. user = User.where ('code', com['userId']).first ()
  75. if user is None:
  76. user = User ()
  77. user.code = com['userId']
  78. user.save ()
  79. comment = Comment ()
  80. comment.video_id = video.id
  81. comment.comment_no = com['no']
  82. comment.user_id = user.id
  83. comment.content = com['body']
  84. comment.posted_at = datetime.fromisoformat (com['postedAt'])
  85. comment.nico_count = com['nicoruCount']
  86. comment.vpos_ms = com['vposMs']
  87. comment.upsert ()
  88. # 削除動画
  89. videos = (Video.where_not_in ('code', alive_video_codes)
  90. .where_null ('deleted_at')
  91. .get ())
  92. for video in videos:
  93. if video.code not in alive_video_codes:
  94. video.deleted_at = now
  95. video.save ()
  96. def fetch_comments (
  97. video_code: str,
  98. ) -> list[CommentResult]:
  99. time.sleep (1.2)
  100. headers = { 'X-Frontend-Id': '6',
  101. 'X-Frontend-Version': '0' }
  102. action_track_id = (
  103. ''.join (random.choice (string.ascii_letters + string.digits)
  104. for _ in range (10))
  105. + '_'
  106. + str (random.randrange (10 ** 12, 10 ** 13)))
  107. url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
  108. + f"?actionTrackId={ action_track_id }")
  109. res = requests.post (url, headers = headers, timeout = 60).json ()
  110. try:
  111. nv_comment = res['data']['comment']['nvComment']
  112. except KeyError:
  113. return []
  114. if nv_comment is None:
  115. return []
  116. headers = { 'X-Frontend-Id': '6',
  117. 'X-Frontend-Version': '0',
  118. 'Content-Type': 'application/json' }
  119. params = { 'params': nv_comment['params'],
  120. 'additionals': { },
  121. 'threadKey': nv_comment['threadKey'] }
  122. url = nv_comment['server'] + '/v1/threads'
  123. res = (requests.post (url, json.dumps (params),
  124. headers = headers,
  125. timeout = 60)
  126. .json ())
  127. try:
  128. return res['data']['threads'][1]['comments']
  129. except (IndexError, KeyError):
  130. return []
  131. def search_nico_by_tag (
  132. tag: str,
  133. ) -> list[VideoResult]:
  134. return search_nico_by_tags ([tag])
  135. def search_nico_by_tags (
  136. tags: list[str],
  137. ) -> list[VideoResult]:
  138. today = datetime.now ()
  139. url = ('https://snapshot.search.nicovideo.jp'
  140. + '/api/v2/snapshot/video/contents/search')
  141. result_data: list[VideoResult] = []
  142. to = datetime (2022, 12, 3)
  143. while to <= today:
  144. time.sleep (1.2)
  145. until = to + timedelta (days = 14)
  146. query_filter = json.dumps ({ 'type': 'or',
  147. 'filters': [
  148. { 'type': 'range',
  149. 'field': 'startTime',
  150. 'from': '%04d-%02d-%02dT00:00:00+09:00' % (to.year, to.month, to.day),
  151. 'to': '%04d-%02d-%02dT23:59:59+09:00' % (until.year, until.month, until.day),
  152. 'include_lower': True }] })
  153. params: VideoSearchParam = { 'q': ' OR '.join (tags),
  154. 'targets': 'tagsExact',
  155. '_sort': '-viewCounter',
  156. 'fields': ('contentId,'
  157. 'title,'
  158. 'tags,'
  159. 'description,'
  160. 'viewCounter,'
  161. 'startTime'),
  162. '_limit': 100,
  163. 'jsonFilter': query_filter }
  164. res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
  165. try:
  166. result_data += res['data']
  167. except KeyError:
  168. pass
  169. to = until + timedelta (days = 1)
  170. return result_data
  171. class Comment (Model):
  172. __timestamps__ = False
  173. @property
  174. def video (
  175. self,
  176. ) -> DynamicProperty:
  177. return self.belongs_to (Video)
  178. @property
  179. def user (
  180. self,
  181. ) -> DynamicProperty:
  182. return self.belongs_to (User)
  183. class Tag (Model):
  184. __timestamps__ = False
  185. @property
  186. def video_tags (
  187. self,
  188. ) -> DynamicProperty:
  189. return self.has_many (VideoTag)
  190. class User (Model):
  191. __timestamps__ = False
  192. @property
  193. def comments (
  194. self,
  195. ) -> DynamicProperty:
  196. return self.has_many (Comment)
  197. class Video (Model):
  198. __timestamps__ = False
  199. @property
  200. def video_histories (
  201. self,
  202. ) -> DynamicProperty:
  203. return self.has_many (VideoHistory)
  204. @property
  205. def video_tags (
  206. self,
  207. ) -> DynamicProperty:
  208. return self.has_many (VideoTag)
  209. @property
  210. def comments (
  211. self,
  212. ) -> DynamicProperty:
  213. return self.has_many (Comment)
  214. def upsert (
  215. self,
  216. ) -> None:
  217. row = Video.where ('code', self.code).first ()
  218. if row is not None:
  219. self.id = row.id
  220. self.save ()
  221. class VideoHistory (Model):
  222. __timestamps__ = False
  223. @property
  224. def video (
  225. self,
  226. ) -> DynamicProperty:
  227. return self.belongs_to (Video)
  228. def upsert (
  229. self,
  230. ) -> None:
  231. row = (Video
  232. .where ('video_id', self.video_id)
  233. .where ('fetched_at', self.fetched_at)
  234. .first ())
  235. if row is not None:
  236. self.id = row.id
  237. self.save ()
  238. class VideoTag (Model):
  239. __timestamps__ = False
  240. @property
  241. def video (
  242. self,
  243. ) -> DynamicProperty:
  244. return self.belongs_to (Video)
  245. @property
  246. def tag (
  247. self,
  248. ) -> DynamicProperty:
  249. return self.belongs_to (Tag)
  250. def upsert (
  251. self,
  252. ) -> None:
  253. row = (Video
  254. .where ('video_id', self.video_id)
  255. .where ('tag_id', self.tag_id)
  256. .first ())
  257. if row is not None:
  258. self.id = row.id
  259. self.save ()
  260. class DbConfig (TypedDict):
  261. driver: str
  262. host: str
  263. database: str
  264. user: str
  265. password: str
  266. prefix: str
  267. class VideoSearchParam (TypedDict):
  268. q: str
  269. targets: str
  270. _sort: str
  271. fields: str
  272. _limit: int
  273. jsonFilter: str
  274. class VideoResult (TypedDict):
  275. contentId: str
  276. title: str
  277. tags: str
  278. description: str | None
  279. viewCounter: int
  280. startTime: str
  281. class CommentResult (TypedDict):
  282. id: str
  283. no: int
  284. vposMs: int
  285. body: str
  286. commands: list[str]
  287. userId: str
  288. isPremium: bool
  289. score: int
  290. postedAt: str
  291. nicoruCount: int
  292. nicoruId: Any
  293. source: str
  294. isMyPost: bool
  295. class CommentRow (TypedDict):
  296. id: int
  297. video_id: int
  298. comment_no: int
  299. user_id: int
  300. content: str
  301. posted_at: datetime
  302. nico_count: int
  303. vpos_ms: int | None
  304. class TagRow (TypedDict):
  305. id: int
  306. name: str
  307. class UserRow (TypedDict):
  308. id: int
  309. code: str
  310. class VideoRow (TypedDict):
  311. id: int
  312. code: str
  313. title: str
  314. description: str
  315. uploaded_at: datetime
  316. deleted_at: datetime | None
  317. class VideoHistoryRow (TypedDict):
  318. id: int
  319. video_id: int
  320. fetched_at: date
  321. views_count: int
  322. class VideoTagRow (TypedDict):
  323. id: int
  324. video_id: int
  325. tag_id: int
  326. tagged_at: date
  327. untagged_at: date | None
  328. if __name__ == '__main__':
  329. main ()