You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

176 lines
5.0 KiB

  1. <?php
  2. /**
  3. * Sitemap handling functions
  4. *
  5. * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
  6. * @author Michael Hamann <michael@content-space.de>
  7. */
  8. namespace dokuwiki\Sitemap;
  9. use dokuwiki\Extension\Event;
  10. use dokuwiki\HTTP\DokuHTTPClient;
  11. use dokuwiki\Logger;
  12. /**
  13. * A class for building sitemaps and pinging search engines with the sitemap URL.
  14. *
  15. * @author Michael Hamann
  16. */
  17. class Mapper
  18. {
  19. /**
  20. * Builds a Google Sitemap of all public pages known to the indexer
  21. *
  22. * The map is placed in the cache directory named sitemap.xml.gz - This
  23. * file needs to be writable!
  24. *
  25. * @author Michael Hamann
  26. * @author Andreas Gohr
  27. * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
  28. * @link http://www.sitemaps.org/
  29. *
  30. * @return bool
  31. */
  32. public static function generate()
  33. {
  34. global $conf;
  35. if ($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false;
  36. $sitemap = Mapper::getFilePath();
  37. if (file_exists($sitemap)) {
  38. if (!is_writable($sitemap)) return false;
  39. } elseif (!is_writable(dirname($sitemap))) {
  40. return false;
  41. }
  42. if (
  43. @filesize($sitemap) &&
  44. @filemtime($sitemap) > (time() - ($conf['sitemap'] * 86400))
  45. ) { // 60*60*24=86400
  46. Logger::debug('Sitemapper::generate(): Sitemap up to date');
  47. return false;
  48. }
  49. Logger::debug("Sitemapper::generate(): using $sitemap");
  50. $pages = idx_get_indexer()->getPages();
  51. Logger::debug('Sitemapper::generate(): creating sitemap using ' . count($pages) . ' pages');
  52. $items = [];
  53. // build the sitemap items
  54. foreach ($pages as $id) {
  55. //skip hidden, non existing and restricted files
  56. if (isHiddenPage($id)) continue;
  57. if (auth_aclcheck($id, '', []) < AUTH_READ) continue;
  58. $item = Item::createFromID($id);
  59. if ($item instanceof Item)
  60. $items[] = $item;
  61. }
  62. $eventData = ['items' => &$items, 'sitemap' => &$sitemap];
  63. $event = new Event('SITEMAP_GENERATE', $eventData);
  64. if ($event->advise_before(true)) {
  65. //save the new sitemap
  66. $event->result = io_saveFile($sitemap, (new Mapper())->getXML($items));
  67. }
  68. $event->advise_after();
  69. return $event->result;
  70. }
  71. /**
  72. * Builds the sitemap XML string from the given array auf SitemapItems.
  73. *
  74. * @param $items array The SitemapItems that shall be included in the sitemap.
  75. * @return string The sitemap XML.
  76. *
  77. * @author Michael Hamann
  78. */
  79. private function getXML($items)
  80. {
  81. ob_start();
  82. echo '<?xml version="1.0" encoding="UTF-8"?>' . NL;
  83. echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . NL;
  84. foreach ($items as $item) {
  85. /** @var Item $item */
  86. echo $item->toXML();
  87. }
  88. echo '</urlset>' . NL;
  89. $result = ob_get_contents();
  90. ob_end_clean();
  91. return $result;
  92. }
  93. /**
  94. * Helper function for getting the path to the sitemap file.
  95. *
  96. * @return string The path to the sitemap file.
  97. *
  98. * @author Michael Hamann
  99. */
  100. public static function getFilePath()
  101. {
  102. global $conf;
  103. $sitemap = $conf['cachedir'] . '/sitemap.xml';
  104. if (self::sitemapIsCompressed()) {
  105. $sitemap .= '.gz';
  106. }
  107. return $sitemap;
  108. }
  109. /**
  110. * Helper function for checking if the sitemap is compressed
  111. *
  112. * @return bool If the sitemap file is compressed
  113. */
  114. public static function sitemapIsCompressed()
  115. {
  116. global $conf;
  117. return $conf['compression'] === 'bz2' || $conf['compression'] === 'gz';
  118. }
  119. /**
  120. * Pings search engines with the sitemap url. Plugins can add or remove
  121. * urls to ping using the SITEMAP_PING event.
  122. *
  123. * @author Michael Hamann
  124. *
  125. * @return bool
  126. */
  127. public static function pingSearchEngines()
  128. {
  129. //ping search engines...
  130. $http = new DokuHTTPClient();
  131. $http->timeout = 8;
  132. $encoded_sitemap_url = urlencode(wl('', ['do' => 'sitemap'], true, '&'));
  133. $ping_urls = [
  134. 'google' => 'https://www.google.com/ping?sitemap=' . $encoded_sitemap_url,
  135. 'yandex' => 'https://webmaster.yandex.com/ping?sitemap=' . $encoded_sitemap_url
  136. ];
  137. $data = [
  138. 'ping_urls' => $ping_urls,
  139. 'encoded_sitemap_url' => $encoded_sitemap_url
  140. ];
  141. $event = new Event('SITEMAP_PING', $data);
  142. if ($event->advise_before(true)) {
  143. foreach ($data['ping_urls'] as $name => $url) {
  144. Logger::debug("Sitemapper::PingSearchEngines(): pinging $name");
  145. $resp = $http->get($url);
  146. if ($http->error) {
  147. Logger::debug("Sitemapper:pingSearchengines(): $http->error", $resp);
  148. }
  149. }
  150. }
  151. $event->advise_after();
  152. return true;
  153. }
  154. }