Changeset 293
- Timestamp:
- 02/22/08 04:06:12 (9 months ago)
- Location:
- trunk/thrudex/src
- Files:
-
- 2 added
- 4 modified
-
CLuceneIndex.cpp (modified) (19 diffs)
-
CLuceneIndex.h (modified) (2 diffs)
-
Makefile.am (modified) (1 diff)
-
UpdateFilter.cpp (added)
-
UpdateFilter.h (added)
-
thrudex.conf (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
trunk/thrudex/src/CLuceneIndex.cpp
r292 r293 11 11 12 12 #include "bloom_filter.hpp" 13 #include "UpdateFilter.h" 13 14 14 15 using namespace thrudex; … … 73 74 74 75 //build up bloom filter 75 76 76 disk_bloom = shared_ptr<bloom_filter>(new bloom_filter(filter_space,1.0/(1.0 * filter_space), random_seed)); 77 disk_reader = shared_ptr<IndexReader>(IndexReader::open(idx_path.c_str())); 78 disk_filter = shared_ptr<UpdateFilter>(new UpdateFilter(disk_reader)); 77 79 78 80 if(!new_index){ 79 81 80 boost::shared_ptr<IndexReader> reader(IndexReader::open(idx_path.c_str())); 81 82 int max = reader->maxDoc(); 82 int max = disk_reader->maxDoc(); 83 83 char buf[1024]; 84 84 85 85 for(int i=0; i<max; i++){ 86 const wchar_t *id = reader->document(i)->get( DOC_KEY );86 const wchar_t *id = disk_reader->document(i)->get( DOC_KEY ); 87 87 88 88 STRCPY_TtoA(buf,id,1024); … … 93 93 } 94 94 95 reader->close(); 95 96 96 } 97 97 … … 105 105 ram_searcher = shared_ptr<IndexSearcher>(new IndexSearcher(ram_directory.get())); 106 106 107 disk_searcher = shared_ptr<IndexSearcher>(new IndexSearcher( idx_path.c_str()));107 disk_searcher = shared_ptr<IndexSearcher>(new IndexSearcher(disk_reader.get())); 108 108 last_refresh = -1; 109 109 … … 144 144 CLuceneIndex::~CLuceneIndex() 145 145 { 146 //monitor_thread->stop();147 146 sync(); 148 147 } … … 187 186 { 188 187 189 assert(!key.empty()); 190 188 if(key.empty()){ 189 ThrudexException ex; 190 ex.what = "Empty key"; 191 throw ex; 192 } 191 193 192 194 Guard g( mutex ); 193 194 195 195 196 //always put into memory (we will merge to disk later) … … 198 199 shared_ptr<IndexModifier> l_modifier = modifier; 199 200 shared_ptr<set<string> > l_disk_deletes = disk_deletes; 201 shared_ptr<UpdateFilter> l_disk_filter = disk_filter; 202 203 wstring wkey = build_wstring(key); 200 204 201 205 //if update to a doc in memory old copy first 202 206 if( ram_bloom->contains( key ) ){ 203 207 204 wstring wkey = build_wstring(key);205 208 Term *t = new Term(DOC_KEY, wkey.c_str() ); 206 209 … … 212 215 213 216 214 //If this exists already on disk 215 //remove it 217 //If this exists already on disk remove it 216 218 if( l_disk_bloom->contains( key ) ){ 217 219 l_disk_deletes->insert( key ); 220 221 if(!syncing) 222 l_disk_filter->skip(wkey); 218 223 } 219 224 220 225 l_modifier->addDocument(doc); 221 222 226 l_ram_bloom->insert( key ); 223 227 … … 233 237 shared_ptr<IndexModifier> l_modifier = modifier; 234 238 shared_ptr<set<string> > l_disk_deletes = disk_deletes; 235 239 shared_ptr<UpdateFilter> l_disk_filter = disk_filter; 240 241 wstring wkey = build_wstring(key); 236 242 237 243 //Since we don't want to write to disk … … 240 246 LOG4CXX_DEBUG(logger, "Removed "+key); 241 247 l_disk_deletes->insert( key ); 248 249 if(!syncing) 250 l_disk_filter->skip(wkey); 251 242 252 last_modified = Util::currentTime(); 243 253 } … … 246 256 if(l_ram_bloom->contains( key )){ 247 257 248 wstring wkey = build_wstring(key);249 258 Term *t = new Term(DOC_KEY, wkey.c_str() ); 250 259 … … 266 275 shared_ptr<CLuceneRAMDirectory> l_ram_prev_directory = ram_prev_directory; 267 276 268 shared_ptr<MultiSearcher> l_searcher = this->getSearcher(); 277 shared_ptr<MultiSearcher> l_searcher = this->getSearcher(); 278 shared_ptr<UpdateFilter> l_disk_filter = disk_filter; 269 279 270 280 Query *query; … … 301 311 302 312 if( q.sortby.empty() ){ 303 h = l_searcher->search(query );313 h = l_searcher->search(query, l_disk_filter.get()); 304 314 } else { 305 315 … … 313 323 314 324 try { 315 h = l_searcher->search(query,l sort);325 h = l_searcher->search(query,l_disk_filter.get(),lsort); 316 326 } catch(CLuceneError &e) { 317 327 318 328 LOG4CXX_WARN(logger, "Sort failed, falling back on regular search"); 319 h = l_searcher->search(query );329 h = l_searcher->search(query,l_disk_filter.get()); 320 330 } 321 331 } … … 419 429 420 430 LOG4CXX_DEBUG(logger,"Syncing Started"); 421 431 string idx_path = index_root + "/" + index_name; 422 432 423 433 shared_ptr<IndexModifier> l_ram_modifier; … … 426 436 shared_ptr<set<string> > l_disk_deletes; 427 437 shared_ptr<bloom_filter> l_disk_bloom; 438 shared_ptr<UpdateFilter> l_update_filter; 428 439 429 440 … … 459 470 460 471 //Now we start by deleting any updated docs from disk 461 string idx_path = index_root + "/" + index_name;462 472 463 473 shared_ptr<IndexReader> disk_reader(IndexReader::open(idx_path.c_str())); … … 498 508 LOG4CXX_DEBUG(logger,"Merged"); 499 509 } 510 500 511 //Search new index (big perf hit so get it over now) 501 502 shared_ptr<IndexSearcher> l_disk_searcher(new IndexSearcher(idx_path.c_str())); 512 shared_ptr<IndexReader> l_disk_reader(IndexReader::open(idx_path.c_str())); 513 shared_ptr<IndexSearcher> l_disk_searcher(new IndexSearcher(l_disk_reader.get())); 514 shared_ptr<UpdateFilter> l_disk_filter(new UpdateFilter(l_disk_reader)); 515 516 503 517 wstring q = wstring(DOC_KEY)+wstring(L":1234"); 504 518 … … 514 528 Guard g(mutex); 515 529 530 //Add any new deletes to the filter 531 set<string>::iterator it; 532 for( it=disk_deletes->begin(); it!=disk_deletes->end(); ++it){ 533 wstring wkey = build_wstring(*it); 534 l_disk_filter->skip(wkey); 535 } 536 537 disk_reader = l_disk_reader; 538 disk_filter = l_disk_filter; 539 disk_searcher = l_disk_searcher; 540 541 542 last_synched = Util::currentTime(); 543 516 544 syncing = false; //this flag alters the search code to include prev searcher 517 518 disk_searcher = l_disk_searcher;519 520 last_synched = Util::currentTime();521 545 } 522 546 -
trunk/thrudex/src/CLuceneIndex.h
r288 r293 25 25 26 26 class bloom_filter; 27 class UpdateFilter; 27 28 28 29 #define DOC_KEY L"_doc_key_" … … 80 81 bool syncing; 81 82 83 boost::shared_ptr<lucene::index::IndexReader> disk_reader; 84 boost::shared_ptr<UpdateFilter> disk_filter; 82 85 boost::shared_ptr<lucene::search::IndexSearcher> disk_searcher; 83 86 boost::shared_ptr<bloom_filter> disk_bloom; -
trunk/thrudex/src/Makefile.am
r259 r293 10 10 CLuceneRAMDirectory.cpp \ 11 11 CLuceneIndex.cpp \ 12 UpdateFilter.cpp \ 12 13 main.cpp 13 14 -
trunk/thrudex/src/thrudex.conf
r263 r293 16 16 17 17 # Set root logger level to DEBUG and its only appender to A1. 18 #log4j.rootLogger=DEBUG, A119 log4j.rootLogger=INFO, A118 log4j.rootLogger=DEBUG, A1 19 #log4j.rootLogger=INFO, A1 20 20 21 21 # A1 is set to be a ConsoleAppender.
