100Mb $site_size = 2; # Path to index database files $HASH = "./db/0_hash"; $HASHWORDS = "./db/0_hashwords"; $FINFO = "./db/0_finfo"; $SITEWORDS = "./db/0_sitewords"; $WORD_IND = "./db/0_word_ind"; #=================================================================== # # These variables are used by spider # #=================================================================== # Starting URL (used by spider) $start_url = array( "http://trs-sut.ru", ); # Spider will index only files from these servers $allow_url = array( "http://trs-sut.ru", ); #=================================================================== # # All other variables are optional. Script should work fine # with default settings. # These variables controls the indexing process. # #=================================================================== # File extensions to index # Add "NONE" if you want to index files without extensions $file_ext = 'php'; # List of directories, which should not be indexed $no_index_dir = 'admin download img cgi-bin js search private map prg manual mail forum egw otap tmp'; # List of files, which should not be indexed $no_index_files = 'robots.txt footer.php'; #minimum word length to index $min_length = 2; # Index or not numbers (set $numbers = "" if you don't want to index numbers) # You may add here other non-letter characters, which you want to index $numbers = '0-9'; # Parts of documents, which should not be indexed # Uncomment and edit, if you want to use this feature $use_selective_indexing = "NO"; $no_index_strings = array( "" => "", "" => "", ); # Cut default filenames from URL ("YES" or "NO") $cut_default_filenames = 'NO'; $default_filenames = 'index.php'; # Convert URL to lower case ("YES" or "NO") $url_to_lower_case = 'NO'; # Indexing scheme # Whole word - 1 # Beginning of the word - 2 # Every substring - 3 $INDEXING_SCHEME = 3; # Translate escape chars (like È or ÿ) ("YES" or "NO") $use_esc = "YES"; # Index META tags ("YES" or "NO") $use_META = "NO"; # List of stopwords ("YES" or "NO") $use_stop_words = "NO"; $stop_words = ""; #=================================================================== # # These variables controls the script output. # #=================================================================== # Number of results per page $res_num=10; # Define length of page description in output # and use META description ("YES") or first "n" characters of page ("NO") $descr_size = 256; $use_META_descr = "NO"; #=================================================================== # # --- end of configuration --- # # Please do not edit below this line unless you know what you do # #=================================================================== if ($site_size == 1) { $HASHSIZE = 20001; } elseif ($site_size == 3) { $HASHSIZE = 100001; } elseif ($site_size == 4) { $HASHSIZE = 300001; } else { $HASHSIZE = 50001; } #=================================================================== function prepare_string($str) { $str = preg_replace ("/^\s+|\s+$/", "", $str); $str = preg_replace ("/\s+/", "|", $str); $str = preg_replace ("/\./", "\\\.", $str); $str = "(".$str.")"; return $str; } if (preg_match("/NONE/",$file_ext) ) { $file_ext = preg_replace ("/NONE/", "", $file_ext); $file_ext = prepare_string($file_ext); $file_ext = '(\.'.$file_ext.'|^[^.]+|/[^.]*)$'; } else { $file_ext = prepare_string($file_ext); $file_ext = '\.'.$file_ext.'$'; } $no_index_dir = prepare_string($no_index_dir); $no_index_files = prepare_string($no_index_files); $default_filenames = prepare_string($default_filenames); $default_filenames = '/'.$default_filenames.'$'; #=================================================================== $stop_words = preg_replace("/\s+/s"," ",$stop_words); $pos = 0; do { $new_pos = strpos($stop_words," ",$pos); if ($new_pos === FALSE) { $word = substr($stop_words,$pos); $stop_words_array[$word] = 1; break; }; $word = substr($stop_words,$pos,$new_pos-$pos); $stop_words_array[$word] = 1; $pos = $new_pos+1; } while (1>0); #=================================================================== $html_esc = array( "À" => chr(192), "Á" => chr(193), "Â" => chr(194), "Ã" => chr(195), "Ä" => chr(196), "Å" => chr(197), "Æ" => chr(198), "Ç" => chr(199), "È" => chr(200), "É" => chr(201), "&Eirc;" => chr(202), "Ë" => chr(203), "Ì" => chr(204), "Í" => chr(205), "Î" => chr(206), "Ï" => chr(207), "Ð" => chr(208), "Ñ" => chr(209), "Ò" => chr(210), "Ó" => chr(211), "Ô" => chr(212), "Õ" => chr(213), "Ö" => chr(214), "×" => chr(215), "Ø" => chr(216), "Ù" => chr(217), "Ú" => chr(218), "Û" => chr(219), "Ü" => chr(220), "Ý" => chr(221), "Þ" => chr(222), "ß" => chr(223), "à" => chr(224), "á" => chr(225), "â" => chr(226), "ã" => chr(227), "ä" => chr(228), "å" => chr(229), "æ" => chr(230), "ç" => chr(231), "è" => chr(232), "é" => chr(233), "ê" => chr(234), "ë" => chr(235), "ì" => chr(236), "í" => chr(237), "î" => chr(238), "ï" => chr(239), "ð" => chr(240), "ñ" => chr(241), "ò" => chr(242), "ó" => chr(243), "ô" => chr(244), "õ" => chr(245), "ö" => chr(246), "÷" => chr(247), "ø" => chr(248), "ù" => chr(249), "ú" => chr(250), "û" => chr(251), "ü" => chr(252), "ý" => chr(253), "þ" => chr(254), "ÿ" => chr(255), " " => " ", "&" => " ", ""e;" => " ", ); #=================================================================== function esc2char($str) { global $html_esc; $esc = $str[0]; $char = ""; if (preg_match ("/&[a-zA-Z]*;/", $esc)) { $char = $html_esc[$esc]; } elseif (preg_match ("/&#([0-9]*);/", $esc, $matches)) { $char = chr($matches[1]); } elseif (preg_match ("/&#x([0-9a-fA-F]*);/", $esc, $matches)) { $char = chr(hexdec($matches[1])); } return $char; } #===================================================================== ?> Error!!! File does not exists. Check configuration!