ぼざろクリーチャーシリーズ DB 兼 API(自分用)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

update_db.py 11 KiB

2 months ago
2 months ago
2 months ago
2 months ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
1 month ago
2 months ago
2 months ago
2 months ago
2 months ago
2 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. """
  2. 日次で実行し,ぼざクリ DB を最新に更新する.
  3. """
  4. from __future__ import annotations
  5. import json
  6. import os
  7. import random
  8. import string
  9. import time
  10. import unicodedata
  11. from dataclasses import dataclass
  12. from datetime import date, datetime, timedelta
  13. from typing import Any, Type, TypedDict, cast
  14. import requests
  15. from eloquent import DatabaseManager, Model
  16. from eloquent.orm.relations.dynamic_property import DynamicProperty
  17. config: dict[str, DbConfig] = { 'mysql': { 'driver': 'mysql',
  18. 'host': 'localhost',
  19. 'database': 'nizika_nico',
  20. 'user': os.environ['MYSQL_USER'],
  21. 'password': os.environ['MYSQL_PASS'],
  22. 'prefix': '' } }
  23. db = DatabaseManager (config)
  24. Model.set_connection_resolver (db)
  25. def main (
  26. ) -> None:
  27. now = datetime.now ()
  28. api_data = search_nico_by_tags (['伊地知ニジカ', 'ぼざろクリーチャーシリーズ'])
  29. update_tables (api_data, now)
  30. def update_tables (
  31. api_data: list[VideoResult],
  32. now: datetime,
  33. ) -> None:
  34. alive_video_codes: list[str] = []
  35. for datum in api_data:
  36. tag_names: list[str] = datum['tags'].split ()
  37. video = Video ()
  38. video.code = datum['contentId']
  39. video.title = datum['title']
  40. video.description = datum['description'] or ''
  41. video.uploaded_at = datetime.fromisoformat (datum['startTime'])
  42. video.deleted_at = None
  43. video.upsert ()
  44. alive_video_codes.append (video.code)
  45. video_history = VideoHistory ()
  46. video_history.video_id = video.id
  47. video_history.fetched_at = now
  48. video_history.views_count = datum['viewCounter']
  49. video_history.save ()
  50. video_tags = video.video_tags.where_not_null ('untagged_at').get ()
  51. for video_tag in video_tags:
  52. tag = video_tag.tag
  53. if (tag is not None
  54. and (normalise (tag.name) not in map (normalise, tag_names))):
  55. video_tag.untagged_at = now
  56. video_tag.save ()
  57. for tag_name in tag_names:
  58. tag = Tag.where ('name', tag_name).first ()
  59. if tag is None:
  60. tag = Tag ()
  61. tag.name = tag_name
  62. tag.save ()
  63. video_tag = (Video.where ('video_id', video.id)
  64. .where ('tag_id', tag.id)
  65. .where_null ('untagged_at')
  66. .first ())
  67. if video_tag is None:
  68. video_tag = VideoTag ()
  69. video_tag.video_id = video.id
  70. video_tag.tag_id = tag.id
  71. video_tag.tagged_at = now
  72. video_tag.untagged_at = None
  73. video_tag.save ()
  74. for com in fetch_comments (video.code):
  75. user = User.where ('code', com['userId']).first ()
  76. if user is None:
  77. user = User ()
  78. user.code = com['userId']
  79. user.save ()
  80. comment = Comment ()
  81. comment.video_id = video.id
  82. comment.comment_no = com['no']
  83. comment.user_id = user.id
  84. comment.content = com['body']
  85. comment.posted_at = datetime.fromisoformat (com['postedAt'])
  86. comment.nico_count = com['nicoruCount']
  87. comment.vpos_ms = com['vposMs']
  88. comment.upsert ()
  89. # 削除動画
  90. videos = (Video.where_not_in ('code', alive_video_codes)
  91. .where_null ('deleted_at')
  92. .get ())
  93. for video in videos:
  94. if video.code not in alive_video_codes:
  95. video.deleted_at = now
  96. video.save ()
  97. def fetch_comments (
  98. video_code: str,
  99. ) -> list[CommentResult]:
  100. time.sleep (1.2)
  101. headers = { 'X-Frontend-Id': '6',
  102. 'X-Frontend-Version': '0' }
  103. action_track_id = (
  104. ''.join (random.choice (string.ascii_letters + string.digits)
  105. for _ in range (10))
  106. + '_'
  107. + str (random.randrange (10 ** 12, 10 ** 13)))
  108. url = (f"https://www.nicovideo.jp/api/watch/v3_guest/{ video_code }"
  109. + f"?actionTrackId={ action_track_id }")
  110. res = requests.post (url, headers = headers, timeout = 60).json ()
  111. try:
  112. nv_comment = res['data']['comment']['nvComment']
  113. except KeyError:
  114. return []
  115. if nv_comment is None:
  116. return []
  117. headers = { 'X-Frontend-Id': '6',
  118. 'X-Frontend-Version': '0',
  119. 'Content-Type': 'application/json' }
  120. params = { 'params': nv_comment['params'],
  121. 'additionals': { },
  122. 'threadKey': nv_comment['threadKey'] }
  123. url = nv_comment['server'] + '/v1/threads'
  124. res = (requests.post (url, json.dumps (params),
  125. headers = headers,
  126. timeout = 60)
  127. .json ())
  128. try:
  129. return res['data']['threads'][1]['comments']
  130. except (IndexError, KeyError):
  131. return []
  132. def search_nico_by_tag (
  133. tag: str,
  134. ) -> list[VideoResult]:
  135. return search_nico_by_tags ([tag])
  136. def search_nico_by_tags (
  137. tags: list[str],
  138. ) -> list[VideoResult]:
  139. today = datetime.now ()
  140. url = ('https://snapshot.search.nicovideo.jp'
  141. + '/api/v2/snapshot/video/contents/search')
  142. result_data: list[VideoResult] = []
  143. to = datetime (2022, 12, 3)
  144. while to <= today:
  145. time.sleep (1.2)
  146. until = to + timedelta (days = 14)
  147. query_filter = json.dumps ({ 'type': 'or',
  148. 'filters': [
  149. { 'type': 'range',
  150. 'field': 'startTime',
  151. 'from': '%04d-%02d-%02dT00:00:00+09:00' % (to.year, to.month, to.day),
  152. 'to': '%04d-%02d-%02dT23:59:59+09:00' % (until.year, until.month, until.day),
  153. 'include_lower': True }] })
  154. params: VideoSearchParam = { 'q': ' OR '.join (tags),
  155. 'targets': 'tagsExact',
  156. '_sort': '-viewCounter',
  157. 'fields': ('contentId,'
  158. 'title,'
  159. 'tags,'
  160. 'description,'
  161. 'viewCounter,'
  162. 'startTime'),
  163. '_limit': 100,
  164. 'jsonFilter': query_filter }
  165. res = requests.get (url, params = cast (dict[str, int | str], params), timeout = 60).json ()
  166. try:
  167. result_data += res['data']
  168. except KeyError:
  169. pass
  170. to = until + timedelta (days = 1)
  171. return result_data
  172. class Comment (Model):
  173. __timestamps__ = False
  174. @property
  175. def video (
  176. self,
  177. ) -> DynamicProperty:
  178. return self.belongs_to (Video)
  179. @property
  180. def user (
  181. self,
  182. ) -> DynamicProperty:
  183. return self.belongs_to (User)
  184. class Tag (Model):
  185. __timestamps__ = False
  186. @property
  187. def video_tags (
  188. self,
  189. ) -> DynamicProperty:
  190. return self.has_many (VideoTag)
  191. class User (Model):
  192. __timestamps__ = False
  193. @property
  194. def comments (
  195. self,
  196. ) -> DynamicProperty:
  197. return self.has_many (Comment)
  198. class Video (Model):
  199. __timestamps__ = False
  200. @property
  201. def video_histories (
  202. self,
  203. ) -> DynamicProperty:
  204. return self.has_many (VideoHistory)
  205. @property
  206. def video_tags (
  207. self,
  208. ) -> DynamicProperty:
  209. return self.has_many (VideoTag)
  210. @property
  211. def comments (
  212. self,
  213. ) -> DynamicProperty:
  214. return self.has_many (Comment)
  215. def upsert (
  216. self,
  217. ) -> None:
  218. row = Video.where ('code', self.code).first ()
  219. if row is not None:
  220. self.id = row.id
  221. self.save ()
  222. class VideoHistory (Model):
  223. __timestamps__ = False
  224. @property
  225. def video (
  226. self,
  227. ) -> DynamicProperty:
  228. return self.belongs_to (Video)
  229. def upsert (
  230. self,
  231. ) -> None:
  232. row = (Video
  233. .where ('video_id', self.video_id)
  234. .where ('fetched_at', self.fetched_at)
  235. .first ())
  236. if row is not None:
  237. self.id = row.id
  238. self.save ()
  239. class VideoTag (Model):
  240. __timestamps__ = False
  241. @property
  242. def video (
  243. self,
  244. ) -> DynamicProperty:
  245. return self.belongs_to (Video)
  246. @property
  247. def tag (
  248. self,
  249. ) -> DynamicProperty:
  250. return self.belongs_to (Tag)
  251. def upsert (
  252. self,
  253. ) -> None:
  254. row = (Video
  255. .where ('video_id', self.video_id)
  256. .where ('tag_id', self.tag_id)
  257. .first ())
  258. if row is not None:
  259. self.id = row.id
  260. self.save ()
  261. class DbConfig (TypedDict):
  262. driver: str
  263. host: str
  264. database: str
  265. user: str
  266. password: str
  267. prefix: str
  268. class VideoSearchParam (TypedDict):
  269. q: str
  270. targets: str
  271. _sort: str
  272. fields: str
  273. _limit: int
  274. jsonFilter: str
  275. class VideoResult (TypedDict):
  276. contentId: str
  277. title: str
  278. tags: str
  279. description: str | None
  280. viewCounter: int
  281. startTime: str
  282. class CommentResult (TypedDict):
  283. id: str
  284. no: int
  285. vposMs: int
  286. body: str
  287. commands: list[str]
  288. userId: str
  289. isPremium: bool
  290. score: int
  291. postedAt: str
  292. nicoruCount: int
  293. nicoruId: Any
  294. source: str
  295. isMyPost: bool
  296. class CommentRow (TypedDict):
  297. id: int
  298. video_id: int
  299. comment_no: int
  300. user_id: int
  301. content: str
  302. posted_at: datetime
  303. nico_count: int
  304. vpos_ms: int | None
  305. class TagRow (TypedDict):
  306. id: int
  307. name: str
  308. class UserRow (TypedDict):
  309. id: int
  310. code: str
  311. class VideoRow (TypedDict):
  312. id: int
  313. code: str
  314. title: str
  315. description: str
  316. uploaded_at: datetime
  317. deleted_at: datetime | None
  318. class VideoHistoryRow (TypedDict):
  319. id: int
  320. video_id: int
  321. fetched_at: date
  322. views_count: int
  323. class VideoTagRow (TypedDict):
  324. id: int
  325. video_id: int
  326. tag_id: int
  327. tagged_at: date
  328. untagged_at: date | None
  329. def normalise (
  330. s: str
  331. ) -> str:
  332. return unicodedata.normalize ('NFKC', s).lower ()
  333. if __name__ == '__main__':
  334. main ()