Platon Technologies
not logged in Login Registration
EnglishSlovak
open source software development celebrating 10 years of open source development! Thursday, March 28, 2024

File: [Platon] / mylist / mylist_indexer.php (download)

Revision 1.1, Sun Apr 10 19:59:01 2005 UTC (18 years, 11 months ago) by majo

file to index keywords have been added

<?php

require_once "mylist.config.php";
require_once "backend/shared/emailMessage.php";

class CIndexer
{

    var $subject;
    var $header;    // array
    var $body;    //
    var $attachements;    // array

function indexMessage($_archive_id)
{
    $this->indexPartOfMessage($_archive_id, "header");
    $this->indexPartOfMessage($_archive_id, "subject");
    $this->indexPartOfMessage($_archive_id, "body");
    // pri indexovani priloh nie su existujuce tabulky postacujuce, problem je, ak bude viac ako 1 priloha, nebudeme vediet userovi vypisat, v ktorej z nich sa hladany vyraz nachadza
    $this->indexPartOfMessage($_archive_id, "attachement");

}

function indexPartOfMessage($id, $type)
{
    $text = $this->getTextToIndex($type);    
    $words = Array();
    $words = $this->parseTextIntoWords($text);

    global $dbh;
    $sql = "SELECT id, default_weight FROM mylist_search_types WHERE name LIKE '$type'";
    
    $res =& $dbh->query($sql);
    if (PEAR::isError($res))
        die($res->getMessage());
    $res->fetchInto($row);
    
    foreach($words as $word)
    {
        $sql = "SELECT weight FROM mylist_search_index WHERE message_id = $id AND word = '".AddSlashes($word)."' AND search_type_id = ".$row["id"];
        $weight =& $dbh->getOne($sql);
    
        if (PEAR::isError($weight))
            die($weight->getMessage());

        if ($weight == "" )
        {    
            $sql  = "INSERT INTO mylist_search_index  ";
            $sql .= "(message_id, word, search_type_id, weight) ";
            $sql .= "VALUES ($id, '".AddSlashes($word)."', ".$row["id"].", ".$row["default_weight"].")";
        }
        else
        {    
            
            $sql  = "UPDATE mylist_search_index ";
            $sql .= "SET weight = ".((0.3 * $row["default_weight"]) + $weight);
            $sql .= " WHERE message_id = $id AND word LIKE '".AddSlashes($word)."' AND search_type_id = ".$row["id"];
        }

        $res =& $dbh->query($sql);
        if (PEAR::isError($res))
            die($res->getMessage());
    }
}

function getTextToIndex($type)
{
    if ($type == "header")
        return $this->getAllHeaderValues($this->header);
    else if ($type == "subject")
        return $this->subject;
    else if ($type == "body")
        return $this->body;
    else if ($type == "attachement")
        return $this->attachements;
}

// recursive function
function getAllHeaderValues($array)
{
    $i = 0;
    $count = Count($array);
    $text = "";
    Reset($array);
        
    while ($i < $count)
    {
        if (Is_array(Current($array)))
            $text .= $this->getAllHeaderValues(Current($array));
        else
            $text .= Current($array)."\n";

        Next($array);
        $i++;
    }

    return $text;
}


function parseTextIntoWords($_text)
{
    $_words = Array();
    $output = Array();
    
    $_text = str_replace(".", " ", $_text);
    $_text = str_replace(",", " ", $_text);
    $_text = str_replace(";", " ", $_text);
    $_text = str_replace(":", " ", $_text);
    
    $_text = str_replace("-", " ", $_text);
    $_text = str_replace("+", " ", $_text);
    $_text = str_replace("*", " ", $_text);
    $_text = str_replace("/", " ", $_text);
    $_text = str_replace("|", " ", $_text);
    $_text = str_replace("=", " ", $_text);


    $_text = str_replace("`", " ", $_text);
    $_text = str_replace("'", " ", $_text);
    $_text = str_replace("\"", " ", $_text);
    $_text = str_replace("~", " ", $_text);
    
    $_text = str_replace("\n", " ", $_text);
    $_text = str_replace("\r.", " ", $_text);
    
    $_text = str_replace("(", " ", $_text);
    $_text = str_replace(")", " ", $_text);
    $_text = str_replace("<", " ", $_text);
    $_text = str_replace(">", " ", $_text);
    $_text = str_replace("[", " ", $_text);
    $_text = str_replace("]", " ", $_text);
    $_text = str_replace("{", " ", $_text);
    $_text = str_replace("}", " ", $_text);

    $_text = str_replace("!", " ", $_text);
    $_text = str_replace("?", " ", $_text);
    $_text = str_replace("#", " ", $_text);
    $_text = str_replace("$", " ", $_text);
    $_text = str_replace("%", " ", $_text);
    $_text = str_replace("^", " ", $_text);
    $_text = str_replace("&", " ", $_text);
    
    //$_text = preg_replace("(\s+)", " ", $_text);
    //$_words = preg_split('/\s/', $_text, -1, PREG_SPLIT_NO_EMPTY);

    $_words =& split ( " ", $_text);

    foreach($_words as $_word)
    {
        $w = $this->validateIndexWord($_word);
        if ($w != "")
            $output[Count($output)] = $w;
    }
    return $output;
}
    
function validateIndexWord($w)
{
    // all modifications, replacements
    $w = StrToLower(trim($w));
    // musim akceptovat aj stringy s dlzkou 2, kvoli domenam .sk, .cz atd
    // 48 je zas ohranicenie v db
    if (StrLen($w) > 1  && StrLen($w) <= 48)
        return $w;
    else
        return;
}
}
?>

Platon Group <platon@platon.org> http://platon.org/
Copyright © 2002-2006 Platon Group
Site powered by Metafox CMS
Go to Top