Lucene实战之基于StandardAnalyzer读写索引
前言
使用lucene创建索引时如果指定了解析器,则需要读写都使用这个解析器,目前我发现也就是在处理中文这块比较麻烦,像你在使用solr时如果配置了ik分词,则需要把index清空重新创建才能继续搜索。
本篇引用lucene-6.4.0和4.x的几个关键类会有不同的地方。
创建索引
1 public void index(){ 2 3 Directory dir=null; 4 Analyzer analyzer=null; 5 IndexWriterConfig config=null; 6 IndexWriter indexWriter=null; 7 try{ 8 /** 9 * SimpleFSDirectory 不能很好支持多线程操作 10 * **/ 11 dir =new SimpleFSDirectory(Paths.get(INDEX_URL)); 12 13 analyzer=new StandardAnalyzer(); 14 config =new IndexWriterConfig(analyzer); 15 /** 16 * IndexWriter(Directory d,IndexWriterConfig config) 17 * **/ 18 indexWriter =new IndexWriter(dir,config); 19 20 indexWriter.deleteAll(); 21 Listbooks =bookDao.listAllBooks(); 22 Document document=null; 23 24 SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 25 26 for(UploadBook book:books){ 27 document=new Document(); 28 document.add(new Field("id",book.getId().toString(), TextField.TYPE_STORED)); 29 document.add(new Field("ip",book.getIp(), TextField.TYPE_STORED)); 30 document.add(new Field("title",book.getOriginFileName(), TextField.TYPE_STORED)); 31 32 document.add(new Field("content", PdfReader.read(INDEX_PDF+book.getNewFileName()),TextField.TYPE_STORED)); 33 document.add(new Field("createtime",formatter.format(book.getCreateTime()), TextField.TYPE_STORED)); 34 35 indexWriter.addDocument(document); 36 } 37 38 indexWriter.commit(); 39 40 System.out.println("======索引创建完成,公创建"+books.size()+"条索引========"); 41 }catch (IOException ex){ 42 ex.printStackTrace(); 43 } 44 catch(Exception ex){ 45 ex.printStackTrace(); 46 }finally { 47 if(indexWriter !=null){ 48 try{ 49 indexWriter.close(); 50 }catch (IOException ex){ 51 System.out.println("======indexWriter close exception========"); 52 } 53 } 54 } 55 56 }
读取索引
1 public static Listsearch2(String kw){ 2 Directory dir=null; 3 Analyzer analyzer=null; 4 List list = new ArrayList (); 5 try{ 6 dir= FSDirectory.open(Paths.get("e:\\soso\\index")); 7 analyzer=new StandardAnalyzer(); 8 9 DirectoryReader reader =DirectoryReader.open(dir); 10 IndexSearcher searcher=new IndexSearcher(reader); 11 12 QueryParser parser=new QueryParser("content",analyzer); 13 Query query =parser.parse(kw); 14 15 ScoreDoc[] docs=searcher.search(query,100).scoreDocs; 16 17 for (int i = 0; i < docs.length; i++) { 18 Document firstHit = searcher.doc(docs[i].doc); 19 20 Book book=new Book(); 21 book.setId(Integer.parseInt(firstHit.getField("id").stringValue())); 22 book.setIp(firstHit.getField("ip").stringValue()); 23 24 String title=firstHit.getField("title").stringValue(); 25 title=title.substring(0,title.lastIndexOf(".")); 26 book.setTitle(title); 27 28 String content=firstHit.getField("content").stringValue(); 29 if(content.length()>=500){ 30 content=content.substring(0,500)+"......"; 31 } 32 book.setContent(content); 33 34 SimpleDateFormat format=new SimpleDateFormat("yyyy-MM-mm"); 35 Date date =format.parse(firstHit.getField("createtime").stringValue()); 36 book.setCreateTime(format.format(date)); 37 38 list.add(book); 39 40 } 41 42 }catch(Exception ex){ 43 44 }finally { 45 try{ 46 dir.close(); 47 48 }catch(IOException ex){ 49 ex.printStackTrace(); 50 } 51 } 52 53 return list; 54 }