lucene对日期(date)和整形(int)处理

项目结构:

运行效果:

==========================================

代码部分:

==========================================

/lucene_0400_dateAndInt/src/com/b510/lucene/util/LuceneUtil.java

* com.b510.lucene.util; java.io.File; 7 import java.io.IOException; 8 import java.text.ParseException; 9 import java.text.SimpleDateFormat; 10 import java.util.Date; 11 import java.util.HashMap; 12 import java.util.Map; org.apache.lucene.analysis.standard.StandardAnalyzer; 15 import org.apache.lucene.document.Document; 16 import org.apache.lucene.document.Field; 17 import org.apache.lucene.document.NumericField; 18 import org.apache.lucene.index.CorruptIndexException; 19 import org.apache.lucene.index.IndexReader; 20 import org.apache.lucene.index.IndexWriter; 21 import org.apache.lucene.index.IndexWriterConfig; 22 import org.apache.lucene.index.Term; 23 import org.apache.lucene.search.IndexSearcher; 24 import org.apache.lucene.search.ScoreDoc; 25 import org.apache.lucene.search.TermQuery; 26 import org.apache.lucene.search.TopDocs; 27 import org.apache.lucene.store.Directory; 28 import org.apache.lucene.store.FSDirectory; 29 import org.apache.lucene.store.LockObtainFailedException; 30 import org.apache.lucene.util.Version; Hongten <br /> 34 * @date 2013-1-31 LuceneUtil {* 邮件idString[] ids = { “1”, “2”, “3”, “4”, “5”, “6” };* 邮箱String[] emails = { “aa@sina.com”, “bb@foxmail.com”, “cc@qq.com”, 46″dd@163.com”, “ee@gmail.com”, “ff@sina.com” };* 邮件内容String[] contents = { “hello,aa,hi,hell world!!”, 51″hello,bb,i’m a boy”, 52″hello,cc”, 53″hello,dd,welcome to my zone,this is a test hello”, 54″hello,ee,haha,xixi,hello world!!”, 55″hello,ff” };* 附件数[] attachs = {1,5,3,2,1,6};* 日期Date[] dates = null;* 收件人的名称String[] names = { “hongten”, “hanyuan”, “Devide”, “Tom”, “Steven”, 68″Shala” };Directory directory = null;* 评分Map<String, Float> scores = new HashMap<String, Float>(); LuceneUtil() { 77try { 78 setDates(); 79scores.put(“sina.com”, 1.0f); 80scores.put(“foxmail.com”, 1.1f); 81directory = FSDirectory.open(new File( 82″D:/WordPlace/lucene/lucene_0400_dateAndInt/lucene/index”)); 83} catch (IOException e) { 84 e.printStackTrace(); 85 } 86 }* 创建日期 setDates(){ 92SimpleDateFormat sdf = new SimpleDateFormat(“yyyy-MM-dd”); 93try { 94dates = new Date[ids.length]; 95dates[0] = sdf.parse(“2012-11-18”); 96dates[1] = sdf.parse(“2010-01-28”); 97dates[2] = sdf.parse(“2011-11-21”); 98dates[3] = sdf.parse(“2012-12-12”); 99dates[4] = sdf.parse(“2011-06-23”);100dates[5] = sdf.parse(“2012-03-15”);101} catch (ParseException e) {102 e.printStackTrace();103 }104 }* 创建索引 index() {110IndexWriter writer = null;111try {112writer = new IndexWriter(directory, new IndexWriterConfig(113Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));writer.deleteAll();Document document = null;118for (int i = 0; i < ids.length; i++) {119// Field.Store.YES:将会存储域值,原始字符串的值会保存在索引,以此可以进行相应的回复操作,对于主键,香港虚拟主机,标题可以是这种方式存储120// Field.Store.NO:不会存储域值,通常与Index.ANAYLIZED和起来使用,索引一些如文章正文等不需要恢复的文档121// ==============================122// Field.Index.ANALYZED:进行分词和索引,适用于标题,内容等123// Field.Index.NOT_ANALYZED:进行索引,但是不进行分词,如身份证号码,姓名,香港服务器租用,ID等,适用于精确搜索124// Field.Index.ANALYZED_NOT_NORMS:进行分词,但是不进行存储norms信息,网站空间,这个norms中包括了创建索引的时间和权值等信息125// Field.Index.NOT_ANALYZED_NOT_NORMS:不进行分词也不进行存储norms信息(不推荐)document = new Document();128document.add(new Field(“id”, ids[i], Field.Store.YES,129 Field.Index.NOT_ANALYZED_NO_NORMS));130document.add(new Field(“email”, emails[i], Field.Store.YES,131 Field.Index.NOT_ANALYZED));132document.add(new Field(“content”, contents[i], Field.Store.YES,133 Field.Index.ANALYZED));134document.add(new Field(“name”, names[i], Field.Store.YES,135 Field.Index.NOT_ANALYZED_NO_NORMS));136document.add(new NumericField(“attach”, Field.Store.YES,true).setIntValue(attachs[i]));137document.add(new NumericField(“date”,Field.Store.YES,true).setLongValue(dates[i].getTime()));String et = emails[i].substring(emails[i].lastIndexOf(“@”)+1);141 System.out.println(et);142if(scores.containsKey(et)){143 document.setBoost(scores.get(et));144}else{145document.setBoost(0.6f);146 }147 writer.addDocument(document);148 }149} catch (CorruptIndexException e) {150 e.printStackTrace();151} catch (LockObtainFailedException e) {152 e.printStackTrace();153} catch (IOException e) {154 e.printStackTrace();155} finally {156if (writer != null) {157try {158 writer.close();159} catch (CorruptIndexException e) {160 e.printStackTrace();161} catch (IOException e) {162 e.printStackTrace();163 }164 }165 }166 }* 搜索 search(){172try {173IndexReader reader = IndexReader.open(directory);174IndexSearcher searcher = new IndexSearcher(reader);175TermQuery query = new TermQuery(new Term(“content”,”hello”));176TopDocs tds =searcher.search(query, 10);177for(ScoreDoc sd : tds.scoreDocs){178Document doc = searcher.doc(sd.doc);179SimpleDateFormat sdf = new SimpleDateFormat(“yyyy-MM-dd”);180Date date = null;181/*try {182 date = sdf.parse(doc.get(“date”));183 } catch (ParseException e) {184 e.printStackTrace();System.out.println(“文档序号:[“+sd.doc+”] 得分:[“+sd.score+”] 邮件名称:[“+doc.get(“email”)+”] 邮件人:[“+doc.get(“name”)+”] 附件数:[“+doc.get(“attach”)+”] 日期:[“+doc.get(“date”)+”] 内容 : [“+doc.get(“content”)+”]”);187 }188} catch (CorruptIndexException e) {189 e.printStackTrace();190} catch (IOException e) {191 e.printStackTrace();192 }193 }194 }享受每一刻的感觉,欣赏每一处的风景,这就是人生。

lucene对日期(date)和整形(int)处理

相关文章:

你感兴趣的文章:

标签云: