// Contributed to the Sandcastle Help File Builder project by Thomas Levesque
class Ranking
{
public $filename;
public $pageTitle;
public $rank;
function __construct($file, $title, $rank)
{
$this->filename = $file;
$this->pageTitle = $title;
$this->rank = $rank;
}
}
///
/// Split the search text up into keywords
///
/// The keywords to parse
/// A list containing the words for which to search
function ParseKeywords($keywords)
{
$keywordList = array();
$words = preg_split("/[^\w]+/", $keywords);
foreach($words as $word)
{
$checkWord = strtolower($word);
$first = substr($checkWord, 0, 1);
if(strlen($checkWord) > 2 && !ctype_digit($first) && !in_array($checkWord, $keywordList))
{
array_push($keywordList, $checkWord);
}
}
return $keywordList;
}
///
/// Search for the specified keywords and return the results as a block of
/// HTML.
///
/// The keywords for which to search
/// The file list
/// The dictionary used to find the words
/// True to sort by title, false to sort by
/// ranking
/// A block of HTML representing the search results.
function Search($keywords, $fileInfo, $wordDictionary, $sortByTitle)
{
$sb = "
";
$matches = array();
$matchingFileIndices = array();
$rankings = array();
$isFirst = true;
foreach($keywords as $word)
{
if (!array_key_exists($word, $wordDictionary))
{
return "Nothing found";
}
$occurrences = $wordDictionary[$word];
$matches[$word] = $occurrences;
$occurrenceIndices = array();
// Get a list of the file indices for this match
foreach($occurrences as $entry)
array_push($occurrenceIndices, ($entry >> 16));
if($isFirst)
{
$isFirst = false;
foreach($occurrenceIndices as $i)
{
array_push($matchingFileIndices, $i);
}
}
else
{
// After the first match, remove files that do not appear for
// all found keywords.
for($idx = 0; $idx < count($matchingFileIndices); $idx++)
{
if (!in_array($matchingFileIndices[$idx], $occurrenceIndices))
{
array_splice($matchingFileIndices, $idx, 1);
$idx--;
}
}
}
}
if(count($matchingFileIndices) == 0)
{
return "Nothing found";
}
// Rank the files based on the number of times the words occurs
foreach($matchingFileIndices as $index)
{
// Split out the title, filename, and word count
$fileIndex = explode("\x00", $fileInfo[$index]);
$title = $fileIndex[0];
$filename = $fileIndex[1];
$wordCount = intval($fileIndex[2]);
$matchCount = 0;
foreach($keywords as $words)
{
$occurrences = $matches[$word];
foreach($occurrences as $entry)
{
if(($entry >> 16) == $index)
$matchCount += $entry & 0xFFFF;
}
}
$r = new Ranking($filename, $title, $matchCount * 1000 / $wordCount);
array_push($rankings, $r);
if(count($rankings) > 99)
break;
}
// Sort by rank in descending order or by page title in ascending order
if($sortByTitle)
{
usort($rankings, "cmprankbytitle");
}
else
{
usort($rankings, "cmprank");
}
// Format the file list and return the results
foreach($rankings as $r)
{
$f = $r->filename;
$t = $r->pageTitle;
$sb .= "- $t
";
}
$sb .= "
Omitted $c more results";
}
return $sb;
}
function cmprank($x, $y)
{
return $y->rank - $x->rank;
}
function cmprankbytitle($x, $y)
{
return strcmp($x->pageTitle, $y->pageTitle);
}
?>