Lucene 学习(一):简单demo
lucene java 全文检索    2017-07-19 20:51:52    793   
lightingfire   lucene java 全文检索
Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,但它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。Lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎。Lucene是一套用于全文检索和搜寻的开源程式库,由Apache软件基金会支持和提供。Lucene提供了一个简单却强大的应用程式接口,能够做全文索引和搜寻。在Java开发环境里Lucene是一个成熟的免费开源工具。就其本身而言,Lucene是当前以及最近几年最受欢迎的免费Java信息检索程序库。人们经常提到信息检索程序库,虽然与搜索引擎有关,但不应该将信息检索程序库与搜索引擎相混淆。(摘自:http://baike.baidu.com/link?url=YfcwwNXbNFaYkMNZqNhk9LIyHdrSuIMsMLlO_NNm3ioxHADGUid2JnF1R9znysICj6w83zJmlpZPBJnv1mHYFK


下面是全文检索引擎的初步应用,但是很遗憾,原生的lucene不支持中文分词,所以需要插件支持,在后面会继续讲到。


代码摘自:http://iluoxuan.iteye.com/blog/1708695


POM.xml文件:

  1. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  3. <modelVersion>4.0.0</modelVersion>
  4. <groupId>cn.firewarm</groupId>
  5. <artifactId>testLucene</artifactId>
  6. <packaging>war</packaging>
  7. <version>0.0.1-SNAPSHOT</version>
  8. <name>testLucene Maven Webapp</name>
  9. <url>http://maven.apache.org</url>
  10. <repositories>
  11. <repository>
  12. <id>mine</id>
  13. <name>public Releases</name>
  14. <layout>default</layout>
  15. <url>http://nexus.liuyingguang.cn:8081/nexus/content/groups/public/</url>
  16. </repository>
  17. </repositories>
  18. <dependencies>
  19. <dependency>
  20. <groupId>org.apache.lucene</groupId>
  21. <artifactId>lucene-core</artifactId>
  22. <version>4.10.1</version>
  23. </dependency>
  24. <dependency>
  25. <groupId>org.apache.lucene</groupId>
  26. <artifactId>lucene-analyzers-common</artifactId>
  27. <version>4.10.1</version>
  28. </dependency>
  29.  
  30. <dependency>
  31. <groupId>org.apache.lucene</groupId>
  32. <artifactId>lucene-queryparser</artifactId>
  33. <version>4.10.1</version>
  34. </dependency>
  35. <dependency>
  36. <groupId>org.apache.commons</groupId>
  37. <artifactId>commons-vfs2</artifactId>
  38. <version>2.1</version>
  39. </dependency>
  40. <dependency>
  41. <groupId>commons-io</groupId>
  42. <artifactId>commons-io</artifactId>
  43. <version>2.4</version>
  44. </dependency>
  45. </dependencies>
  46. <build>
  47. <finalName>testLucene</finalName>
  48. </build>
  49. </project>

创建索引的代码如下:

  1. package com.search.lucene;
  2.  
  3. import java.io.File;
  4.  
  5. import org.apache.lucene.analysis.Analyzer;
  6. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  7. import org.apache.lucene.document.Document;
  8. import org.apache.lucene.document.Field.Store;
  9. import org.apache.lucene.document.StringField;
  10. import org.apache.lucene.document.TextField;
  11. import org.apache.lucene.index.IndexWriter;
  12. import org.apache.lucene.index.IndexWriterConfig;
  13. import org.apache.lucene.store.Directory;
  14. import org.apache.lucene.store.FSDirectory;
  15. import org.apache.lucene.util.Version;
  16. import org.junit.Before;
  17. import org.junit.Test;
  18.  
  19. public class IndexFile {
  20.  
  21. protected String[] ids={"1", "2"};
  22.  
  23. protected String[] content={"Amsterdam has lost of add cancals", "i love add this girl"};
  24.  
  25. protected String[] city={"Amsterdam", "Venice"};
  26.  
  27. private Directory dir;
  28.  
  29. /**
  30. * 初始添加文档
  31. * @throws Exception
  32. */
  33. @Test
  34. public void init() throws Exception {
  35. String pathFile="D://lucene/index";
  36. dir=FSDirectory.open(new File(pathFile));
  37. IndexWriter writer=getWriter();
  38. for(int i=0; i < ids.length; i++) {
  39. Document doc=new Document();
  40. doc.add(new StringField("id", ids[i], Store.YES));
  41. doc.add(new TextField("content", content[i], Store.YES));
  42. doc.add(new StringField("city", city[i], Store.YES));
  43. writer.addDocument(doc);
  44. }
  45. System.out.println("init ok?");
  46. writer.close();
  47. }
  48.  
  49. /**
  50. * 获得IndexWriter对象
  51. * @return
  52. * @throws Exception
  53. */
  54. public IndexWriter getWriter() throws Exception {
  55. Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_40);
  56. IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_40, analyzer);
  57. return new IndexWriter(dir, iwc);
  58. }
  59.  
  60. }


查询代码如下:

  1. package com.search.lucene;
  2.  
  3. import java.io.File;
  4.  
  5. import org.apache.lucene.document.Document;
  6. import org.apache.lucene.index.DirectoryReader;
  7. import org.apache.lucene.index.IndexReader;
  8. import org.apache.lucene.index.Term;
  9. import org.apache.lucene.search.IndexSearcher;
  10. import org.apache.lucene.search.ScoreDoc;
  11. import org.apache.lucene.search.TermQuery;
  12. import org.apache.lucene.search.TopDocs;
  13. import org.apache.lucene.store.Directory;
  14. import org.apache.lucene.store.FSDirectory;
  15. import org.junit.Test;
  16.  
  17. public class IndexSearch {
  18.  
  19. /**
  20. * 查询
  21. * @throws Exception
  22. */
  23. @Test
  24. public void search() throws Exception {
  25. String filePath="D://lucene/index";
  26. Directory dir=FSDirectory.open(new File(filePath));
  27. IndexReader reader=DirectoryReader.open(dir);
  28. IndexSearcher searcher=new IndexSearcher(reader);
  29. Term term=new Term("content", "add");
  30. TermQuery query=new TermQuery(term);
  31. TopDocs topdocs=searcher.search(query, 5);
  32. ScoreDoc[] scoreDocs=topdocs.scoreDocs;
  33. System.out.println("查询结果总数---" + topdocs.totalHits+"最大的评分--"+topdocs.getMaxScore());
  34. for(int i=0; i < scoreDocs.length; i++) {
  35. int doc = scoreDocs[i].doc;
  36. Document document = searcher.doc(doc);
  37. System.out.println("content===="+document.get("content"));
  38. System.out.println("id--" + scoreDocs[i].doc + "---scors--" + scoreDocs[i].score+"---index--"+scoreDocs[i].shardIndex);
  39. }
  40. reader.close();
  41. }
  42. }

 

by 刘迎光@萤火虫工作室 
OpenBI交流群:495266201 
MicroService 微服务交流群:217722918 
mail: liuyg#liuyingguang.cn 
博主首页(==防止爬虫==):http://blog.liuyingguang.cn

Pre: 单机使用docker的host网络安装consul、registrator、consul-template构建高可用demo

Next: Lucene 学习(二):使用IK Analyzer中文分词


Table of content