| 
							- <?php
 - 
 - /**
 -  * Functions to create the fulltext search index
 -  *
 -  * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
 -  * @author     Andreas Gohr <andi@splitbrain.org>
 -  * @author     Tom N Harris <tnharris@whoopdedo.org>
 -  */
 - 
 - use dokuwiki\Utf8\Clean;
 - use dokuwiki\Extension\Event;
 - use dokuwiki\Search\Indexer;
 - 
 - // Version tag used to force rebuild on upgrade
 - define('INDEXER_VERSION', 8);
 - 
 - // set the minimum token length to use in the index (note, this doesn't apply to numeric tokens)
 - if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH', 2);
 - 
 - /**
 -  * Version of the indexer taking into consideration the external tokenizer.
 -  * The indexer is only compatible with data written by the same version.
 -  *
 -  * @triggers INDEXER_VERSION_GET
 -  * Plugins that modify what gets indexed should hook this event and
 -  * add their version info to the event data like so:
 -  *     $data[$plugin_name] = $plugin_version;
 -  *
 -  * @author Tom N Harris <tnharris@whoopdedo.org>
 -  * @author Michael Hamann <michael@content-space.de>
 -  *
 -  * @return int|string
 -  */
 - function idx_get_version()
 - {
 -     static $indexer_version = null;
 -     if ($indexer_version == null) {
 -         $version = INDEXER_VERSION;
 - 
 -         // DokuWiki version is included for the convenience of plugins
 -         $data = ['dokuwiki' => $version];
 -         Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false);
 -         unset($data['dokuwiki']); // this needs to be first
 -         ksort($data);
 -         foreach ($data as $plugin => $vers)
 -             $version .= '+' . $plugin . '=' . $vers;
 -         $indexer_version = $version;
 -     }
 -     return $indexer_version;
 - }
 - 
 - /**
 -  * Measure the length of a string.
 -  * Differs from strlen in handling of asian characters.
 -  *
 -  * @author Tom N Harris <tnharris@whoopdedo.org>
 -  *
 -  * @param string $w
 -  * @return int
 -  */
 - function wordlen($w)
 - {
 -     $l = strlen($w);
 -     // If left alone, all chinese "words" will get put into w3.idx
 -     // So the "length" of a "word" is faked
 -     if (preg_match_all('/[\xE2-\xEF]/', $w, $leadbytes)) {
 -         foreach ($leadbytes[0] as $b)
 -             $l += ord($b) - 0xE1;
 -     }
 -     return $l;
 - }
 - 
 - /**
 -  * Create an instance of the indexer.
 -  *
 -  * @return Indexer    an Indexer
 -  *
 -  * @author Tom N Harris <tnharris@whoopdedo.org>
 -  */
 - function idx_get_indexer()
 - {
 -     static $Indexer;
 -     if (!isset($Indexer)) {
 -         $Indexer = new Indexer();
 -     }
 -     return $Indexer;
 - }
 - 
 - /**
 -  * Returns words that will be ignored.
 -  *
 -  * @return array                list of stop words
 -  *
 -  * @author Tom N Harris <tnharris@whoopdedo.org>
 -  */
 - function & idx_get_stopwords()
 - {
 -     static $stopwords = null;
 -     if (is_null($stopwords)) {
 -         global $conf;
 -         $swfile = DOKU_INC . 'inc/lang/' . $conf['lang'] . '/stopwords.txt';
 -         if (file_exists($swfile)) {
 -             $stopwords = file($swfile, FILE_IGNORE_NEW_LINES);
 -         } else {
 -             $stopwords = [];
 -         }
 -     }
 -     return $stopwords;
 - }
 - 
 - /**
 -  * Adds/updates the search index for the given page
 -  *
 -  * Locking is handled internally.
 -  *
 -  * @param string        $page   name of the page to index
 -  * @param boolean       $verbose    print status messages
 -  * @param boolean       $force  force reindexing even when the index is up to date
 -  * @return string|boolean  the function completed successfully
 -  *
 -  * @author Tom N Harris <tnharris@whoopdedo.org>
 -  */
 - function idx_addPage($page, $verbose = false, $force = false)
 - {
 -     $idxtag = metaFN($page, '.indexed');
 -     // check if page was deleted but is still in the index
 -     if (!page_exists($page)) {
 -         if (!file_exists($idxtag)) {
 -             if ($verbose) echo "Indexer: $page does not exist, ignoring" . DOKU_LF;
 -             return false;
 -         }
 -         $Indexer = idx_get_indexer();
 -         $result = $Indexer->deletePage($page);
 -         if ($result === "locked") {
 -             if ($verbose) echo "Indexer: locked" . DOKU_LF;
 -             return false;
 -         }
 -         @unlink($idxtag);
 -         return $result;
 -     }
 - 
 -     // check if indexing needed
 -     if (!$force && file_exists($idxtag)) {
 -         if (trim(io_readFile($idxtag)) == idx_get_version()) {
 -             $last = @filemtime($idxtag);
 -             if ($last > @filemtime(wikiFN($page))) {
 -                 if ($verbose) echo "Indexer: index for $page up to date" . DOKU_LF;
 -                 return false;
 -             }
 -         }
 -     }
 - 
 -     $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED);
 -     if ($indexenabled === false) {
 -         $result = false;
 -         if (file_exists($idxtag)) {
 -             $Indexer = idx_get_indexer();
 -             $result = $Indexer->deletePage($page);
 -             if ($result === "locked") {
 -                 if ($verbose) echo "Indexer: locked" . DOKU_LF;
 -                 return false;
 -             }
 -             @unlink($idxtag);
 -         }
 -         if ($verbose) echo "Indexer: index disabled for $page" . DOKU_LF;
 -         return $result;
 -     }
 - 
 -     $Indexer = idx_get_indexer();
 -     $pid = $Indexer->getPID($page);
 -     if ($pid === false) {
 -         if ($verbose) echo "Indexer: getting the PID failed for $page" . DOKU_LF;
 -         return false;
 -     }
 -     $body = '';
 -     $metadata = [];
 -     $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED);
 -     if (($references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED)) !== null)
 -         $metadata['relation_references'] = array_keys($references);
 -     else $metadata['relation_references'] = [];
 - 
 -     if (($media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED)) !== null)
 -         $metadata['relation_media'] = array_keys($media);
 -     else $metadata['relation_media'] = [];
 - 
 -     $data = ['page' => $page, 'body' => $body, 'metadata' => $metadata, 'pid' => $pid];
 -     $evt = new Event('INDEXER_PAGE_ADD', $data);
 -     if ($evt->advise_before()) $data['body'] = $data['body'] . " " . rawWiki($page);
 -     $evt->advise_after();
 -     unset($evt);
 -     extract($data);
 - 
 -     $result = $Indexer->addPageWords($page, $body);
 -     if ($result === "locked") {
 -         if ($verbose) echo "Indexer: locked" . DOKU_LF;
 -         return false;
 -     }
 - 
 -     if ($result) {
 -         $result = $Indexer->addMetaKeys($page, $metadata);
 -         if ($result === "locked") {
 -             if ($verbose) echo "Indexer: locked" . DOKU_LF;
 -             return false;
 -         }
 -     }
 - 
 -     if ($result)
 -         io_saveFile(metaFN($page, '.indexed'), idx_get_version());
 -     if ($verbose) {
 -         echo "Indexer: finished" . DOKU_LF;
 -         return true;
 -     }
 -     return $result;
 - }
 - 
 - /**
 -  * Find tokens in the fulltext index
 -  *
 -  * Takes an array of words and will return a list of matching
 -  * pages for each one.
 -  *
 -  * Important: No ACL checking is done here! All results are
 -  *            returned, regardless of permissions
 -  *
 -  * @param array      $words  list of words to search for
 -  * @return array             list of pages found, associated with the search terms
 -  */
 - function idx_lookup(&$words)
 - {
 -     $Indexer = idx_get_indexer();
 -     return $Indexer->lookup($words);
 - }
 - 
 - /**
 -  * Split a string into tokens
 -  *
 -  * @param string $string
 -  * @param bool $wc
 -  *
 -  * @return array
 -  */
 - function idx_tokenizer($string, $wc = false)
 - {
 -     $Indexer = idx_get_indexer();
 -     return $Indexer->tokenizer($string, $wc);
 - }
 - 
 - /* For compatibility */
 - 
 - /**
 -  * Read the list of words in an index (if it exists).
 -  *
 -  * @author Tom N Harris <tnharris@whoopdedo.org>
 -  *
 -  * @param string $idx
 -  * @param string $suffix
 -  * @return array
 -  */
 - function idx_getIndex($idx, $suffix)
 - {
 -     global $conf;
 -     $fn = $conf['indexdir'] . '/' . $idx . $suffix . '.idx';
 -     if (!file_exists($fn)) return [];
 -     return file($fn);
 - }
 - 
 - /**
 -  * Get the list of lengths indexed in the wiki.
 -  *
 -  * Read the index directory or a cache file and returns
 -  * a sorted array of lengths of the words used in the wiki.
 -  *
 -  * @author YoBoY <yoboy.leguesh@gmail.com>
 -  *
 -  * @return array
 -  */
 - function idx_listIndexLengths()
 - {
 -     global $conf;
 -     // testing what we have to do, create a cache file or not.
 -     if ($conf['readdircache'] == 0) {
 -         $docache = false;
 -     } else {
 -         clearstatcache();
 -         if (
 -             file_exists($conf['indexdir'] . '/lengths.idx')
 -             && (time() < @filemtime($conf['indexdir'] . '/lengths.idx') + $conf['readdircache'])
 -         ) {
 -             if (
 -                 ($lengths = @file($conf['indexdir'] . '/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES))
 -                 !== false
 -             ) {
 -                 $idx = [];
 -                 foreach ($lengths as $length) {
 -                     $idx[] = (int)$length;
 -                 }
 -                 return $idx;
 -             }
 -         }
 -         $docache = true;
 -     }
 - 
 -     if ($conf['readdircache'] == 0 || $docache) {
 -         $dir = @opendir($conf['indexdir']);
 -         if ($dir === false)
 -             return [];
 -         $idx = [];
 -         while (($f = readdir($dir)) !== false) {
 -             if (str_starts_with($f, 'i') && str_ends_with($f, '.idx')) {
 -                 $i = substr($f, 1, -4);
 -                 if (is_numeric($i))
 -                     $idx[] = (int)$i;
 -             }
 -         }
 -         closedir($dir);
 -         sort($idx);
 -         // save this in a file
 -         if ($docache) {
 -             $handle = @fopen($conf['indexdir'] . '/lengths.idx', 'w');
 -             @fwrite($handle, implode("\n", $idx));
 -             @fclose($handle);
 -         }
 -         return $idx;
 -     }
 - 
 -     return [];
 - }
 - 
 - /**
 -  * Get the word lengths that have been indexed.
 -  *
 -  * Reads the index directory and returns an array of lengths
 -  * that there are indices for.
 -  *
 -  * @author YoBoY <yoboy.leguesh@gmail.com>
 -  *
 -  * @param array|int $filter
 -  * @return array
 -  */
 - function idx_indexLengths($filter)
 - {
 -     global $conf;
 -     $idx = [];
 -     if (is_array($filter)) {
 -         // testing if index files exist only
 -         $path = $conf['indexdir'] . "/i";
 -         foreach (array_keys($filter) as $key) {
 -             if (file_exists($path . $key . '.idx'))
 -                 $idx[] = $key;
 -         }
 -     } else {
 -         $lengths = idx_listIndexLengths();
 -         foreach ($lengths as $length) {
 -             // keep all the values equal or superior
 -             if ((int)$length >= (int)$filter)
 -                 $idx[] = $length;
 -         }
 -     }
 -     return $idx;
 - }
 - 
 - /**
 -  * Clean a name of a key for use as a file name.
 -  *
 -  * Romanizes non-latin characters, then strips away anything that's
 -  * not a letter, number, or underscore.
 -  *
 -  * @author Tom N Harris <tnharris@whoopdedo.org>
 -  *
 -  * @param string $name
 -  * @return string
 -  */
 - function idx_cleanName($name)
 - {
 -     $name = Clean::romanize(trim((string)$name));
 -     $name = preg_replace('#[ \./\\:-]+#', '_', $name);
 -     $name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
 -     return strtolower($name);
 - }
 - 
 - //Setup VIM: ex: et ts=4 :
 
 
  |