You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

771 lines
20 KiB

  1. <?php
  2. use dokuwiki\File\MediaResolver;
  3. use dokuwiki\File\PageResolver;
  4. use dokuwiki\Utf8\PhpString;
  5. /**
  6. * The MetaData Renderer
  7. *
  8. * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
  9. * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
  10. * $persistent.
  11. *
  12. * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
  13. *
  14. * @author Esther Brunner <wikidesign@gmail.com>
  15. */
  16. class Doku_Renderer_metadata extends Doku_Renderer
  17. {
  18. /** the approximate byte lenght to capture for the abstract */
  19. public const ABSTRACT_LEN = 250;
  20. /** the maximum UTF8 character length for the abstract */
  21. public const ABSTRACT_MAX = 500;
  22. /** @var array transient meta data, will be reset on each rendering */
  23. public $meta = [];
  24. /** @var array persistent meta data, will be kept until explicitly deleted */
  25. public $persistent = [];
  26. /** @var array the list of headers used to create unique link ids */
  27. protected $headers = [];
  28. /** @var string temporary $doc store */
  29. protected $store = '';
  30. /** @var string keeps the first image reference */
  31. protected $firstimage = '';
  32. /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
  33. public $capturing = true;
  34. /** @var bool determines if enough data for the abstract was collected, yet */
  35. public $capture = true;
  36. /** @var int number of bytes captured for abstract */
  37. protected $captured = 0;
  38. /**
  39. * Returns the format produced by this renderer.
  40. *
  41. * @return string always 'metadata'
  42. */
  43. public function getFormat()
  44. {
  45. return 'metadata';
  46. }
  47. /**
  48. * Initialize the document
  49. *
  50. * Sets up some of the persistent info about the page if it doesn't exist, yet.
  51. */
  52. public function document_start()
  53. {
  54. global $ID;
  55. $this->headers = [];
  56. // external pages are missing create date
  57. if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) {
  58. $this->persistent['date']['created'] = filectime(wikiFN($ID));
  59. }
  60. if (!isset($this->persistent['user'])) {
  61. $this->persistent['user'] = '';
  62. }
  63. if (!isset($this->persistent['creator'])) {
  64. $this->persistent['creator'] = '';
  65. }
  66. // reset metadata to persistent values
  67. $this->meta = $this->persistent;
  68. }
  69. /**
  70. * Finalize the document
  71. *
  72. * Stores collected data in the metadata
  73. */
  74. public function document_end()
  75. {
  76. global $ID;
  77. // store internal info in metadata (notoc,nocache)
  78. $this->meta['internal'] = $this->info;
  79. if (!isset($this->meta['description']['abstract'])) {
  80. // cut off too long abstracts
  81. $this->doc = trim($this->doc);
  82. if (strlen($this->doc) > self::ABSTRACT_MAX) {
  83. $this->doc = PhpString::substr($this->doc, 0, self::ABSTRACT_MAX) . '…';
  84. }
  85. $this->meta['description']['abstract'] = $this->doc;
  86. }
  87. $this->meta['relation']['firstimage'] = $this->firstimage;
  88. if (!isset($this->meta['date']['modified'])) {
  89. $this->meta['date']['modified'] = filemtime(wikiFN($ID));
  90. }
  91. }
  92. /**
  93. * Render plain text data
  94. *
  95. * This function takes care of the amount captured data and will stop capturing when
  96. * enough abstract data is available
  97. *
  98. * @param $text
  99. */
  100. public function cdata($text)
  101. {
  102. if (!$this->capture || !$this->capturing) {
  103. return;
  104. }
  105. $this->doc .= $text;
  106. $this->captured += strlen($text);
  107. if ($this->captured > self::ABSTRACT_LEN) {
  108. $this->capture = false;
  109. }
  110. }
  111. /**
  112. * Add an item to the TOC
  113. *
  114. * @param string $id the hash link
  115. * @param string $text the text to display
  116. * @param int $level the nesting level
  117. */
  118. public function toc_additem($id, $text, $level)
  119. {
  120. global $conf;
  121. //only add items within configured levels
  122. if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
  123. // the TOC is one of our standard ul list arrays ;-)
  124. $this->meta['description']['tableofcontents'][] = [
  125. 'hid' => $id,
  126. 'title' => $text,
  127. 'type' => 'ul',
  128. 'level' => $level - $conf['toptoclevel'] + 1
  129. ];
  130. }
  131. }
  132. /**
  133. * Render a heading
  134. *
  135. * @param string $text the text to display
  136. * @param int $level header level
  137. * @param int $pos byte position in the original source
  138. */
  139. public function header($text, $level, $pos)
  140. {
  141. if (!isset($this->meta['title'])) {
  142. $this->meta['title'] = $text;
  143. }
  144. // add the header to the TOC
  145. $hid = $this->_headerToLink($text, true);
  146. $this->toc_additem($hid, $text, $level);
  147. // add to summary
  148. $this->cdata(DOKU_LF . $text . DOKU_LF);
  149. }
  150. /**
  151. * Open a paragraph
  152. */
  153. public function p_open()
  154. {
  155. $this->cdata(DOKU_LF);
  156. }
  157. /**
  158. * Close a paragraph
  159. */
  160. public function p_close()
  161. {
  162. $this->cdata(DOKU_LF);
  163. }
  164. /**
  165. * Create a line break
  166. */
  167. public function linebreak()
  168. {
  169. $this->cdata(DOKU_LF);
  170. }
  171. /**
  172. * Create a horizontal line
  173. */
  174. public function hr()
  175. {
  176. $this->cdata(DOKU_LF . '----------' . DOKU_LF);
  177. }
  178. /**
  179. * Callback for footnote start syntax
  180. *
  181. * All following content will go to the footnote instead of
  182. * the document. To achieve this the previous rendered content
  183. * is moved to $store and $doc is cleared
  184. *
  185. * @author Andreas Gohr <andi@splitbrain.org>
  186. */
  187. public function footnote_open()
  188. {
  189. if ($this->capture) {
  190. // move current content to store
  191. // this is required to ensure safe behaviour of plugins accessed within footnotes
  192. $this->store = $this->doc;
  193. $this->doc = '';
  194. // disable capturing
  195. $this->capturing = false;
  196. }
  197. }
  198. /**
  199. * Callback for footnote end syntax
  200. *
  201. * All content rendered whilst within footnote syntax mode is discarded,
  202. * the previously rendered content is restored and capturing is re-enabled.
  203. *
  204. * @author Andreas Gohr
  205. */
  206. public function footnote_close()
  207. {
  208. if ($this->capture) {
  209. // re-enable capturing
  210. $this->capturing = true;
  211. // restore previously rendered content
  212. $this->doc = $this->store;
  213. $this->store = '';
  214. }
  215. }
  216. /**
  217. * Open an unordered list
  218. */
  219. public function listu_open()
  220. {
  221. $this->cdata(DOKU_LF);
  222. }
  223. /**
  224. * Open an ordered list
  225. */
  226. public function listo_open()
  227. {
  228. $this->cdata(DOKU_LF);
  229. }
  230. /**
  231. * Open a list item
  232. *
  233. * @param int $level the nesting level
  234. * @param bool $node true when a node; false when a leaf
  235. */
  236. public function listitem_open($level, $node = false)
  237. {
  238. $this->cdata(str_repeat(DOKU_TAB, $level) . '* ');
  239. }
  240. /**
  241. * Close a list item
  242. */
  243. public function listitem_close()
  244. {
  245. $this->cdata(DOKU_LF);
  246. }
  247. /**
  248. * Output preformatted text
  249. *
  250. * @param string $text
  251. */
  252. public function preformatted($text)
  253. {
  254. $this->cdata($text);
  255. }
  256. /**
  257. * Start a block quote
  258. */
  259. public function quote_open()
  260. {
  261. $this->cdata(DOKU_LF . DOKU_TAB . '"');
  262. }
  263. /**
  264. * Stop a block quote
  265. */
  266. public function quote_close()
  267. {
  268. $this->cdata('"' . DOKU_LF);
  269. }
  270. /**
  271. * Display text as file content, optionally syntax highlighted
  272. *
  273. * @param string $text text to show
  274. * @param string $lang programming language to use for syntax highlighting
  275. * @param string $file file path label
  276. */
  277. public function file($text, $lang = null, $file = null)
  278. {
  279. $this->cdata(DOKU_LF . $text . DOKU_LF);
  280. }
  281. /**
  282. * Display text as code content, optionally syntax highlighted
  283. *
  284. * @param string $text text to show
  285. * @param string $language programming language to use for syntax highlighting
  286. * @param string $file file path label
  287. */
  288. public function code($text, $language = null, $file = null)
  289. {
  290. $this->cdata(DOKU_LF . $text . DOKU_LF);
  291. }
  292. /**
  293. * Format an acronym
  294. *
  295. * Uses $this->acronyms
  296. *
  297. * @param string $acronym
  298. */
  299. public function acronym($acronym)
  300. {
  301. $this->cdata($acronym);
  302. }
  303. /**
  304. * Format a smiley
  305. *
  306. * Uses $this->smiley
  307. *
  308. * @param string $smiley
  309. */
  310. public function smiley($smiley)
  311. {
  312. $this->cdata($smiley);
  313. }
  314. /**
  315. * Format an entity
  316. *
  317. * Entities are basically small text replacements
  318. *
  319. * Uses $this->entities
  320. *
  321. * @param string $entity
  322. */
  323. public function entity($entity)
  324. {
  325. $this->cdata($entity);
  326. }
  327. /**
  328. * Typographically format a multiply sign
  329. *
  330. * Example: ($x=640, $y=480) should result in "640×480"
  331. *
  332. * @param string|int $x first value
  333. * @param string|int $y second value
  334. */
  335. public function multiplyentity($x, $y)
  336. {
  337. $this->cdata($x . '×' . $y);
  338. }
  339. /**
  340. * Render an opening single quote char (language specific)
  341. */
  342. public function singlequoteopening()
  343. {
  344. global $lang;
  345. $this->cdata($lang['singlequoteopening']);
  346. }
  347. /**
  348. * Render a closing single quote char (language specific)
  349. */
  350. public function singlequoteclosing()
  351. {
  352. global $lang;
  353. $this->cdata($lang['singlequoteclosing']);
  354. }
  355. /**
  356. * Render an apostrophe char (language specific)
  357. */
  358. public function apostrophe()
  359. {
  360. global $lang;
  361. $this->cdata($lang['apostrophe']);
  362. }
  363. /**
  364. * Render an opening double quote char (language specific)
  365. */
  366. public function doublequoteopening()
  367. {
  368. global $lang;
  369. $this->cdata($lang['doublequoteopening']);
  370. }
  371. /**
  372. * Render an closinging double quote char (language specific)
  373. */
  374. public function doublequoteclosing()
  375. {
  376. global $lang;
  377. $this->cdata($lang['doublequoteclosing']);
  378. }
  379. /**
  380. * Render a CamelCase link
  381. *
  382. * @param string $link The link name
  383. * @see http://en.wikipedia.org/wiki/CamelCase
  384. */
  385. public function camelcaselink($link)
  386. {
  387. $this->internallink($link, $link);
  388. }
  389. /**
  390. * Render a page local link
  391. *
  392. * @param string $hash hash link identifier
  393. * @param string $name name for the link
  394. */
  395. public function locallink($hash, $name = null)
  396. {
  397. if (is_array($name)) {
  398. $this->_firstimage($name['src']);
  399. if ($name['type'] == 'internalmedia') {
  400. $this->_recordMediaUsage($name['src']);
  401. }
  402. }
  403. }
  404. /**
  405. * keep track of internal links in $this->meta['relation']['references']
  406. *
  407. * @param string $id page ID to link to. eg. 'wiki:syntax'
  408. * @param string|array|null $name name for the link, array for media file
  409. */
  410. public function internallink($id, $name = null)
  411. {
  412. global $ID;
  413. if (is_array($name)) {
  414. $this->_firstimage($name['src']);
  415. if ($name['type'] == 'internalmedia') {
  416. $this->_recordMediaUsage($name['src']);
  417. }
  418. }
  419. $parts = explode('?', $id, 2);
  420. if (count($parts) === 2) {
  421. $id = $parts[0];
  422. }
  423. $default = $this->_simpleTitle($id);
  424. // first resolve and clean up the $id
  425. $resolver = new PageResolver($ID);
  426. $id = $resolver->resolveId($id);
  427. [$page] = sexplode('#', $id, 2);
  428. // set metadata
  429. $this->meta['relation']['references'][$page] = page_exists($page);
  430. // $data = array('relation' => array('isreferencedby' => array($ID => true)));
  431. // p_set_metadata($id, $data);
  432. // add link title to summary
  433. if ($this->capture) {
  434. $name = $this->_getLinkTitle($name, $default, $id);
  435. $this->doc .= $name;
  436. }
  437. }
  438. /**
  439. * Render an external link
  440. *
  441. * @param string $url full URL with scheme
  442. * @param string|array|null $name name for the link, array for media file
  443. */
  444. public function externallink($url, $name = null)
  445. {
  446. if (is_array($name)) {
  447. $this->_firstimage($name['src']);
  448. if ($name['type'] == 'internalmedia') {
  449. $this->_recordMediaUsage($name['src']);
  450. }
  451. }
  452. if ($this->capture) {
  453. $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>');
  454. }
  455. }
  456. /**
  457. * Render an interwiki link
  458. *
  459. * You may want to use $this->_resolveInterWiki() here
  460. *
  461. * @param string $match original link - probably not much use
  462. * @param string|array $name name for the link, array for media file
  463. * @param string $wikiName indentifier (shortcut) for the remote wiki
  464. * @param string $wikiUri the fragment parsed from the original link
  465. */
  466. public function interwikilink($match, $name, $wikiName, $wikiUri)
  467. {
  468. if (is_array($name)) {
  469. $this->_firstimage($name['src']);
  470. if ($name['type'] == 'internalmedia') {
  471. $this->_recordMediaUsage($name['src']);
  472. }
  473. }
  474. if ($this->capture) {
  475. [$wikiUri] = explode('#', $wikiUri, 2);
  476. $name = $this->_getLinkTitle($name, $wikiUri);
  477. $this->doc .= $name;
  478. }
  479. }
  480. /**
  481. * Link to windows share
  482. *
  483. * @param string $url the link
  484. * @param string|array $name name for the link, array for media file
  485. */
  486. public function windowssharelink($url, $name = null)
  487. {
  488. if (is_array($name)) {
  489. $this->_firstimage($name['src']);
  490. if ($name['type'] == 'internalmedia') {
  491. $this->_recordMediaUsage($name['src']);
  492. }
  493. }
  494. if ($this->capture) {
  495. if ($name) {
  496. $this->doc .= $name;
  497. } else {
  498. $this->doc .= '<' . $url . '>';
  499. }
  500. }
  501. }
  502. /**
  503. * Render a linked E-Mail Address
  504. *
  505. * Should honor $conf['mailguard'] setting
  506. *
  507. * @param string $address Email-Address
  508. * @param string|array $name name for the link, array for media file
  509. */
  510. public function emaillink($address, $name = null)
  511. {
  512. if (is_array($name)) {
  513. $this->_firstimage($name['src']);
  514. if ($name['type'] == 'internalmedia') {
  515. $this->_recordMediaUsage($name['src']);
  516. }
  517. }
  518. if ($this->capture) {
  519. if ($name) {
  520. $this->doc .= $name;
  521. } else {
  522. $this->doc .= '<' . $address . '>';
  523. }
  524. }
  525. }
  526. /**
  527. * Render an internal media file
  528. *
  529. * @param string $src media ID
  530. * @param string $title descriptive text
  531. * @param string $align left|center|right
  532. * @param int $width width of media in pixel
  533. * @param int $height height of media in pixel
  534. * @param string $cache cache|recache|nocache
  535. * @param string $linking linkonly|detail|nolink
  536. */
  537. public function internalmedia(
  538. $src,
  539. $title = null,
  540. $align = null,
  541. $width = null,
  542. $height = null,
  543. $cache = null,
  544. $linking = null
  545. ) {
  546. if ($this->capture && $title) {
  547. $this->doc .= '[' . $title . ']';
  548. }
  549. $this->_firstimage($src);
  550. $this->_recordMediaUsage($src);
  551. }
  552. /**
  553. * Render an external media file
  554. *
  555. * @param string $src full media URL
  556. * @param string $title descriptive text
  557. * @param string $align left|center|right
  558. * @param int $width width of media in pixel
  559. * @param int $height height of media in pixel
  560. * @param string $cache cache|recache|nocache
  561. * @param string $linking linkonly|detail|nolink
  562. */
  563. public function externalmedia(
  564. $src,
  565. $title = null,
  566. $align = null,
  567. $width = null,
  568. $height = null,
  569. $cache = null,
  570. $linking = null
  571. ) {
  572. if ($this->capture && $title) {
  573. $this->doc .= '[' . $title . ']';
  574. }
  575. $this->_firstimage($src);
  576. }
  577. /**
  578. * Render the output of an RSS feed
  579. *
  580. * @param string $url URL of the feed
  581. * @param array $params Finetuning of the output
  582. */
  583. public function rss($url, $params)
  584. {
  585. $this->meta['relation']['haspart'][$url] = true;
  586. $this->meta['date']['valid']['age'] =
  587. isset($this->meta['date']['valid']['age']) ?
  588. min($this->meta['date']['valid']['age'], $params['refresh']) :
  589. $params['refresh'];
  590. }
  591. #region Utils
  592. /**
  593. * Removes any Namespace from the given name but keeps
  594. * casing and special chars
  595. *
  596. * @param string $name
  597. *
  598. * @return mixed|string
  599. * @author Andreas Gohr <andi@splitbrain.org>
  600. *
  601. */
  602. public function _simpleTitle($name)
  603. {
  604. global $conf;
  605. if (is_array($name)) {
  606. return '';
  607. }
  608. if ($conf['useslash']) {
  609. $nssep = '[:;/]';
  610. } else {
  611. $nssep = '[:;]';
  612. }
  613. $name = preg_replace('!.*' . $nssep . '!', '', $name);
  614. //if there is a hash we use the anchor name only
  615. $name = preg_replace('!.*#!', '', $name);
  616. return $name;
  617. }
  618. /**
  619. * Construct a title and handle images in titles
  620. *
  621. * @param string|array|null $title either string title or media array
  622. * @param string $default default title if nothing else is found
  623. * @param null|string $id linked page id (used to extract title from first heading)
  624. * @return string title text
  625. * @author Harry Fuecks <hfuecks@gmail.com>
  626. */
  627. public function _getLinkTitle($title, $default, $id = null)
  628. {
  629. if (is_array($title)) {
  630. if ($title['title']) {
  631. return '[' . $title['title'] . ']';
  632. } else {
  633. return $default;
  634. }
  635. } elseif (is_null($title) || trim($title) == '') {
  636. if (useHeading('content') && $id) {
  637. $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
  638. if ($heading) {
  639. return $heading;
  640. }
  641. }
  642. return $default;
  643. } else {
  644. return $title;
  645. }
  646. }
  647. /**
  648. * Remember first image
  649. *
  650. * @param string $src image URL or ID
  651. */
  652. protected function _firstimage($src)
  653. {
  654. global $ID;
  655. if ($this->firstimage) {
  656. return;
  657. }
  658. [$src] = explode('#', $src, 2);
  659. if (!media_isexternal($src)) {
  660. $src = (new MediaResolver($ID))->resolveId($src);
  661. }
  662. if (preg_match('/.(jpe?g|gif|png|webp|svg)$/i', $src)) {
  663. $this->firstimage = $src;
  664. }
  665. }
  666. /**
  667. * Store list of used media files in metadata
  668. *
  669. * @param string $src media ID
  670. */
  671. protected function _recordMediaUsage($src)
  672. {
  673. global $ID;
  674. [$src] = explode('#', $src, 2);
  675. if (media_isexternal($src)) {
  676. return;
  677. }
  678. $src = (new MediaResolver($ID))->resolveId($src);
  679. $file = mediaFN($src);
  680. $this->meta['relation']['media'][$src] = file_exists($file);
  681. }
  682. #endregion
  683. }
  684. //Setup VIM: ex: et ts=4 :