You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

189 lines
5.1 KiB

  1. #!/usr/bin/env php
  2. <?php
  3. use dokuwiki\Utf8\Sort;
  4. use dokuwiki\File\PageResolver;
  5. use splitbrain\phpcli\CLI;
  6. use splitbrain\phpcli\Options;
  7. if (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
  8. define('NOSESSION', 1);
  9. require_once(DOKU_INC . 'inc/init.php');
  10. /**
  11. * Find wanted pages
  12. */
  13. class WantedPagesCLI extends CLI
  14. {
  15. protected const DIR_CONTINUE = 1;
  16. protected const DIR_NS = 2;
  17. protected const DIR_PAGE = 3;
  18. private $skip = false;
  19. private $sort = 'wanted';
  20. private $result = [];
  21. /**
  22. * Register options and arguments on the given $options object
  23. *
  24. * @param Options $options
  25. * @return void
  26. */
  27. protected function setup(Options $options)
  28. {
  29. $options->setHelp(
  30. 'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
  31. ' (the pages that are linkin to these missing pages).'
  32. );
  33. $options->registerArgument(
  34. 'namespace',
  35. 'The namespace to lookup. Defaults to root namespace',
  36. false
  37. );
  38. $options->registerOption(
  39. 'sort',
  40. 'Sort by wanted or origin page',
  41. 's',
  42. '(wanted|origin)'
  43. );
  44. $options->registerOption(
  45. 'skip',
  46. 'Do not show the second dimension',
  47. 'k'
  48. );
  49. }
  50. /**
  51. * Your main program
  52. *
  53. * Arguments and options have been parsed when this is run
  54. *
  55. * @param Options $options
  56. * @return void
  57. */
  58. protected function main(Options $options)
  59. {
  60. $args = $options->getArgs();
  61. if ($args) {
  62. $startdir = dirname(wikiFN($args[0] . ':xxx'));
  63. } else {
  64. $startdir = dirname(wikiFN('xxx'));
  65. }
  66. $this->skip = $options->getOpt('skip');
  67. $this->sort = $options->getOpt('sort');
  68. $this->info("searching $startdir");
  69. foreach ($this->getPages($startdir) as $page) {
  70. $this->internalLinks($page);
  71. }
  72. Sort::ksort($this->result);
  73. foreach ($this->result as $main => $subs) {
  74. if ($this->skip) {
  75. echo "$main\n";
  76. } else {
  77. $subs = array_unique($subs);
  78. Sort::sort($subs);
  79. foreach ($subs as $sub) {
  80. printf("%-40s %s\n", $main, $sub);
  81. }
  82. }
  83. }
  84. }
  85. /**
  86. * Determine directions of the search loop
  87. *
  88. * @param string $entry
  89. * @param string $basepath
  90. * @return int
  91. */
  92. protected function dirFilter($entry, $basepath)
  93. {
  94. if ($entry == '.' || $entry == '..') {
  95. return WantedPagesCLI::DIR_CONTINUE;
  96. }
  97. if (is_dir($basepath . '/' . $entry)) {
  98. if (strpos($entry, '_') === 0) {
  99. return WantedPagesCLI::DIR_CONTINUE;
  100. }
  101. return WantedPagesCLI::DIR_NS;
  102. }
  103. if (preg_match('/\.txt$/', $entry)) {
  104. return WantedPagesCLI::DIR_PAGE;
  105. }
  106. return WantedPagesCLI::DIR_CONTINUE;
  107. }
  108. /**
  109. * Collects recursively the pages in a namespace
  110. *
  111. * @param string $dir
  112. * @return array
  113. * @throws DokuCLI_Exception
  114. */
  115. protected function getPages($dir)
  116. {
  117. static $trunclen = null;
  118. if (!$trunclen) {
  119. global $conf;
  120. $trunclen = strlen($conf['datadir'] . ':');
  121. }
  122. if (!is_dir($dir)) {
  123. throw new DokuCLI_Exception("Unable to read directory $dir");
  124. }
  125. $pages = [];
  126. $dh = opendir($dir);
  127. while (false !== ($entry = readdir($dh))) {
  128. $status = $this->dirFilter($entry, $dir);
  129. if ($status == WantedPagesCLI::DIR_CONTINUE) {
  130. continue;
  131. } elseif ($status == WantedPagesCLI::DIR_NS) {
  132. $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
  133. } else {
  134. $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
  135. $pages[] = $page;
  136. }
  137. }
  138. closedir($dh);
  139. return $pages;
  140. }
  141. /**
  142. * Parse instructions and add the non-existing links to the result array
  143. *
  144. * @param array $page array with page id and file path
  145. */
  146. protected function internalLinks($page)
  147. {
  148. global $conf;
  149. $instructions = p_get_instructions(file_get_contents($page['file']));
  150. $resolver = new PageResolver($page['id']);
  151. $pid = $page['id'];
  152. foreach ($instructions as $ins) {
  153. if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
  154. $mid = $resolver->resolveId($ins[1][0]);
  155. if (!page_exists($mid)) {
  156. [$mid] = explode('#', $mid); //record pages without hashes
  157. if ($this->sort == 'origin') {
  158. $this->result[$pid][] = $mid;
  159. } else {
  160. $this->result[$mid][] = $pid;
  161. }
  162. }
  163. }
  164. }
  165. }
  166. }
  167. // Main
  168. $cli = new WantedPagesCLI();
  169. $cli->run();