1
22
23 package com.liferay.portal.search.lucene;
24
25 import com.liferay.portal.kernel.dao.jdbc.DataAccess;
26 import com.liferay.portal.kernel.log.Log;
27 import com.liferay.portal.kernel.log.LogFactoryUtil;
28 import com.liferay.portal.kernel.search.Field;
29 import com.liferay.portal.kernel.search.SearchEngineUtil;
30 import com.liferay.portal.kernel.util.FileUtil;
31 import com.liferay.portal.kernel.util.InfrastructureUtil;
32 import com.liferay.portal.kernel.util.PortalClassLoaderUtil;
33 import com.liferay.portal.kernel.util.PropsKeys;
34 import com.liferay.portal.kernel.util.StringPool;
35 import com.liferay.portal.kernel.util.StringUtil;
36 import com.liferay.portal.kernel.util.Validator;
37 import com.liferay.portal.util.PropsUtil;
38 import com.liferay.portal.util.PropsValues;
39 import com.liferay.util.lucene.KeywordsUtil;
40
41 import java.io.IOException;
42 import java.io.StringReader;
43
44 import java.sql.Connection;
45 import java.sql.DatabaseMetaData;
46 import java.sql.ResultSet;
47 import java.sql.Statement;
48
49 import java.util.Date;
50 import java.util.HashSet;
51 import java.util.Map;
52 import java.util.Set;
53 import java.util.concurrent.ConcurrentHashMap;
54
55 import javax.sql.DataSource;
56
57 import org.apache.lucene.analysis.Analyzer;
58 import org.apache.lucene.analysis.TokenStream;
59 import org.apache.lucene.analysis.WhitespaceAnalyzer;
60 import org.apache.lucene.document.Document;
61 import org.apache.lucene.index.IndexReader;
62 import org.apache.lucene.index.IndexWriter;
63 import org.apache.lucene.index.Term;
64 import org.apache.lucene.queryParser.ParseException;
65 import org.apache.lucene.queryParser.QueryParser;
66 import org.apache.lucene.search.BooleanClause;
67 import org.apache.lucene.search.BooleanQuery;
68 import org.apache.lucene.search.IndexSearcher;
69 import org.apache.lucene.search.Query;
70 import org.apache.lucene.search.TermQuery;
71 import org.apache.lucene.search.WildcardQuery;
72 import org.apache.lucene.search.highlight.Highlighter;
73 import org.apache.lucene.search.highlight.QueryScorer;
74 import org.apache.lucene.search.highlight.QueryTermExtractor;
75 import org.apache.lucene.search.highlight.SimpleFragmenter;
76 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
77 import org.apache.lucene.search.highlight.WeightedTerm;
78 import org.apache.lucene.store.Directory;
79 import org.apache.lucene.store.FSDirectory;
80 import org.apache.lucene.store.RAMDirectory;
81 import org.apache.lucene.store.jdbc.JdbcDirectory;
82 import org.apache.lucene.store.jdbc.JdbcStoreException;
83 import org.apache.lucene.store.jdbc.dialect.Dialect;
84 import org.apache.lucene.store.jdbc.lock.JdbcLock;
85 import org.apache.lucene.store.jdbc.support.JdbcTemplate;
86
87
93 public class LuceneUtil {
94
95 public static void acquireLock(long companyId) {
96 try {
97 _instance._sharedWriter.acquireLock(companyId, true);
98 }
99 catch (InterruptedException ie) {
100 _log.error(ie);
101 }
102 }
103
104 public static void addDate(Document doc, String field, Date value) {
105 doc.add(LuceneFields.getDate(field, value));
106 }
107
108 public static void addExactTerm(
109 BooleanQuery booleanQuery, String field, boolean value) {
110
111 addExactTerm(booleanQuery, field, String.valueOf(value));
112 }
113
114 public static void addExactTerm(
115 BooleanQuery booleanQuery, String field, Boolean value) {
116
117 addExactTerm(booleanQuery, field, String.valueOf(value));
118 }
119
120 public static void addExactTerm(
121 BooleanQuery booleanQuery, String field, double value) {
122
123 addExactTerm(booleanQuery, field, String.valueOf(value));
124 }
125
126 public static void addExactTerm(
127 BooleanQuery booleanQuery, String field, Double value) {
128
129 addExactTerm(booleanQuery, field, String.valueOf(value));
130 }
131
132 public static void addExactTerm(
133 BooleanQuery booleanQuery, String field, int value) {
134
135 addExactTerm(booleanQuery, field, String.valueOf(value));
136 }
137
138 public static void addExactTerm(
139 BooleanQuery booleanQuery, String field, Integer value) {
140
141 addExactTerm(booleanQuery, field, String.valueOf(value));
142 }
143
144 public static void addExactTerm(
145 BooleanQuery booleanQuery, String field, long value) {
146
147 addExactTerm(booleanQuery, field, String.valueOf(value));
148 }
149
150 public static void addExactTerm(
151 BooleanQuery booleanQuery, String field, Long value) {
152
153 addExactTerm(booleanQuery, field, String.valueOf(value));
154 }
155
156 public static void addExactTerm(
157 BooleanQuery booleanQuery, String field, short value) {
158
159 addExactTerm(booleanQuery, field, String.valueOf(value));
160 }
161
162 public static void addExactTerm(
163 BooleanQuery booleanQuery, String field, Short value) {
164
165 addExactTerm(booleanQuery, field, String.valueOf(value));
166 }
167
168 public static void addExactTerm(
169 BooleanQuery booleanQuery, String field, String value) {
170
171
173 Query query = new TermQuery(new Term(field, value));
174
175 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
176 }
177
178 public static void addRequiredTerm(
179 BooleanQuery booleanQuery, String field, boolean value) {
180
181 addRequiredTerm(booleanQuery, field, String.valueOf(value));
182 }
183
184 public static void addRequiredTerm(
185 BooleanQuery booleanQuery, String field, Boolean value) {
186
187 addRequiredTerm(booleanQuery, field, String.valueOf(value));
188 }
189
190 public static void addRequiredTerm(
191 BooleanQuery booleanQuery, String field, double value) {
192
193 addRequiredTerm(booleanQuery, field, String.valueOf(value));
194 }
195
196 public static void addRequiredTerm(
197 BooleanQuery booleanQuery, String field, Double value) {
198
199 addRequiredTerm(booleanQuery, field, String.valueOf(value));
200 }
201
202 public static void addRequiredTerm(
203 BooleanQuery booleanQuery, String field, int value) {
204
205 addRequiredTerm(booleanQuery, field, String.valueOf(value));
206 }
207
208 public static void addRequiredTerm(
209 BooleanQuery booleanQuery, String field, Integer value) {
210
211 addRequiredTerm(booleanQuery, field, String.valueOf(value));
212 }
213
214 public static void addRequiredTerm(
215 BooleanQuery booleanQuery, String field, long value) {
216
217 addRequiredTerm(booleanQuery, field, String.valueOf(value));
218 }
219
220 public static void addRequiredTerm(
221 BooleanQuery booleanQuery, String field, Long value) {
222
223 addRequiredTerm(booleanQuery, field, String.valueOf(value));
224 }
225
226 public static void addRequiredTerm(
227 BooleanQuery booleanQuery, String field, short value) {
228
229 addRequiredTerm(booleanQuery, field, String.valueOf(value));
230 }
231
232 public static void addRequiredTerm(
233 BooleanQuery booleanQuery, String field, Short value) {
234
235 addRequiredTerm(booleanQuery, field, String.valueOf(value));
236 }
237
238 public static void addRequiredTerm(
239 BooleanQuery booleanQuery, String field, String value) {
240
241 addRequiredTerm(booleanQuery, field, value, false);
242 }
243
244 public static void addRequiredTerm(
245 BooleanQuery booleanQuery, String field, String value, boolean like) {
246
247 if (like) {
248 value = StringUtil.replace(
249 value, StringPool.PERCENT, StringPool.STAR);
250
251 value = value.toLowerCase();
252
253 WildcardQuery wildcardQuery = new WildcardQuery(
254 new Term(field, value));
255
256 booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);
257 }
258 else {
259
261 Term term = new Term(field, value);
262 TermQuery termQuery = new TermQuery(term);
263
264 booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
265 }
266 }
267
268 public static void addTerm(
269 BooleanQuery booleanQuery, String field, long value)
270 throws ParseException {
271
272 addTerm(booleanQuery, field, String.valueOf(value));
273 }
274
275 public static void addTerm(
276 BooleanQuery booleanQuery, String field, String value)
277 throws ParseException {
278
279 addTerm(booleanQuery, field, value, false);
280 }
281
282 public static void addTerm(
283 BooleanQuery booleanQuery, String field, String value,
284 boolean like)
285 throws ParseException {
286
287 if (Validator.isNull(value)) {
288 return;
289 }
290
291 if (like) {
292 value = value.toLowerCase();
293
294 StringBuilder sb = new StringBuilder();
295
296 sb.append(StringPool.STAR);
297 sb.append(value);
298 sb.append(StringPool.STAR);
299
300 WildcardQuery wildcardQuery = new WildcardQuery(
301 new Term(field, sb.toString()));
302
303 booleanQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
304 }
305 else {
306 QueryParser queryParser = new QueryParser(
307 field, LuceneUtil.getAnalyzer());
308
309 try {
310 Query query = queryParser.parse(value);
311
312 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
313 }
314 catch (ParseException pe) {
315 if (_log.isDebugEnabled()) {
316 _log.debug(
317 "ParseException thrown, reverting to literal search",
318 pe);
319 }
320
321 value = KeywordsUtil.escape(value);
322
323 Query query = queryParser.parse(value);
324
325 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
326 }
327 }
328 }
329
330 public static void checkLuceneDir(long companyId) {
331 if (SearchEngineUtil.isIndexReadOnly()) {
332 return;
333 }
334
335 Directory luceneDir = LuceneUtil.getLuceneDir(companyId);
336
337 try {
338
339
341 if (luceneDir.fileExists("write.lock")) {
342 luceneDir.deleteFile("write.lock");
343 }
344 }
345 catch (IOException ioe) {
346 _log.error("Unable to clear write lock", ioe);
347 }
348
349 IndexWriter writer = null;
350
351
354 try {
355 if (luceneDir.fileExists("segments.gen")) {
356 writer = new IndexWriter(
357 luceneDir, LuceneUtil.getAnalyzer(), false);
358 }
359 else {
360 writer = new IndexWriter(
361 luceneDir, LuceneUtil.getAnalyzer(), true);
362 }
363 }
364 catch (IOException ioe) {
365 _log.error("Check Lucene directory failed for " + companyId, ioe);
366 }
367 finally {
368 if (writer != null) {
369 try {
370 writer.close();
371 }
372 catch (IOException ioe) {
373 _log.error(ioe);
374 }
375 }
376 }
377 }
378
379 public static void delete(long companyId) {
380 _instance._delete(companyId);
381 }
382
383 public static void deleteDocuments(long companyId, Term term)
384 throws IOException {
385
386 try {
387 _instance._sharedWriter.deleteDocuments(companyId, term);
388 }
389 catch (InterruptedException ie) {
390 _log.error(ie);
391 }
392 }
393
394 public static Analyzer getAnalyzer() {
395 return _instance._getAnalyzer();
396 }
397
398 public static FSDirectory getDirectory(String path, boolean create)
399 throws IOException {
400
401 return FSDirectory.getDirectory(path, false);
402 }
403
404 public static Directory getLuceneDir(long companyId) {
405 return _instance._getLuceneDir(companyId);
406 }
407
408 public static IndexReader getReader(long companyId) throws IOException {
409 return IndexReader.open(getLuceneDir(companyId));
410 }
411
412 public static IndexSearcher getSearcher(long companyId)
413 throws IOException {
414
415 return new IndexSearcher(getLuceneDir(companyId));
416 }
417
418 public static String[] getQueryTerms(Query query) {
419 String[] fieldNames = new String[] {
420 Field.CONTENT, Field.DESCRIPTION, Field.PROPERTIES, Field.TITLE,
421 Field.USER_NAME
422 };
423
424 WeightedTerm[] weightedTerms = null;
425
426 for (String fieldName : fieldNames) {
427 weightedTerms = QueryTermExtractor.getTerms(
428 query, false, fieldName);
429
430 if (weightedTerms.length > 0) {
431 break;
432 }
433 }
434
435 Set<String> queryTerms = new HashSet<String>();
436
437 for (WeightedTerm weightedTerm : weightedTerms) {
438 queryTerms.add(weightedTerm.getTerm());
439 }
440
441 return queryTerms.toArray(new String[queryTerms.size()]);
442 }
443
444 public static String getSnippet(Query query, String field, String s)
445 throws IOException {
446
447 return getSnippet(
448 query, field, s, 3, 80, "...", StringPool.BLANK, StringPool.BLANK);
449 }
450
451 public static String getSnippet(
452 Query query, String field, String s, int maxNumFragments,
453 int fragmentLength, String fragmentSuffix, String preTag,
454 String postTag)
455 throws IOException {
456
457 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
458 preTag, postTag);
459
460 QueryScorer queryScorer = new QueryScorer(query, field);
461
462 Highlighter highlighter = new Highlighter(
463 simpleHTMLFormatter, queryScorer);
464
465 highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));
466
467 TokenStream tokenStream = LuceneUtil.getAnalyzer().tokenStream(
468 field, new StringReader(s));
469
470 String snippet = highlighter.getBestFragments(
471 tokenStream, s, maxNumFragments, fragmentSuffix);
472
473 if (Validator.isNotNull(snippet) &&
474 !StringUtil.endsWith(snippet, fragmentSuffix)) {
475
476 snippet = snippet + fragmentSuffix;
477 }
478
479 return snippet;
480 }
481
482 public static IndexWriter getWriter(long companyId) throws IOException {
483 return getWriter(companyId, false);
484 }
485
486 public static IndexWriter getWriter(long companyId, boolean create)
487 throws IOException {
488
489 return _instance._sharedWriter.getWriter(companyId, create);
490 }
491
492 public static void releaseLock(long companyId) {
493 _instance._sharedWriter.releaseLock(companyId);
494 }
495
496 public static void write(long companyId) {
497 _instance._sharedWriter.write(companyId);
498 }
499
500 public static void write(IndexWriter writer) throws IOException {
501 _instance._sharedWriter.write(writer);
502 }
503
504 private LuceneUtil() {
505 String analyzerName = PropsUtil.get(PropsKeys.LUCENE_ANALYZER);
506
507 if (Validator.isNotNull(analyzerName)) {
508 try {
509 _analyzerClass = Class.forName(analyzerName);
510 }
511 catch (Exception e) {
512 _log.error(e);
513 }
514 }
515
516
518 if (PropsValues.LUCENE_STORE_TYPE.equals(_LUCENE_STORE_TYPE_JDBC)) {
519 Connection con = null;
520
521 try {
522 con = DataAccess.getConnection();
523
524 String url = con.getMetaData().getURL();
525
526 int x = url.indexOf(":");
527 int y = url.indexOf(":", x + 1);
528
529 String urlPrefix = url.substring(x + 1, y);
530
531 String dialectClass = PropsUtil.get(
532 PropsKeys.LUCENE_STORE_JDBC_DIALECT + urlPrefix);
533
534 if (dialectClass != null) {
535 if (_log.isDebugEnabled()) {
536 _log.debug("JDBC class implementation " + dialectClass);
537 }
538 }
539 else {
540 if (_log.isDebugEnabled()) {
541 _log.debug("JDBC class implementation is null");
542 }
543 }
544
545 if (dialectClass != null) {
546 _dialect =
547 (Dialect)Class.forName(dialectClass).newInstance();
548 }
549 }
550 catch (Exception e) {
551 _log.error(e);
552 }
553 finally{
554 DataAccess.cleanUp(con);
555 }
556
557 if (_dialect == null) {
558 _log.error("No JDBC dialect found");
559 }
560 }
561 }
562
563 private void _delete(long companyId) {
564 if (SearchEngineUtil.isIndexReadOnly()) {
565 return;
566 }
567
568 if (_log.isDebugEnabled()) {
569 _log.debug("Lucene store type " + PropsValues.LUCENE_STORE_TYPE);
570 }
571
572 if (PropsValues.LUCENE_STORE_TYPE.equals(_LUCENE_STORE_TYPE_FILE)) {
573 _deleteFile(companyId);
574 }
575 else if (PropsValues.LUCENE_STORE_TYPE.equals(
576 _LUCENE_STORE_TYPE_JDBC)) {
577
578 _deleteJdbc(companyId);
579 }
580 else if (PropsValues.LUCENE_STORE_TYPE.equals(_LUCENE_STORE_TYPE_RAM)) {
581 _deleteRam(companyId);
582 }
583 else {
584 throw new RuntimeException(
585 "Invalid store type " + PropsValues.LUCENE_STORE_TYPE);
586 }
587 }
588
589 private void _deleteFile(long companyId) {
590 String path = _getPath(companyId);
591
592 try {
593 Directory directory = getDirectory(path, false);
594
595 directory.close();
596 }
597 catch (Exception e) {
598 if (_log.isWarnEnabled()) {
599 _log.warn("Could not close directory " + path);
600 }
601 }
602
603 FileUtil.deltree(path);
604 }
605
606 private void _deleteJdbc(long companyId) {
607 String tableName = _getTableName(companyId);
608
609 try {
610 Directory directory = _jdbcDirectories.remove(tableName);
611
612 if (directory != null) {
613 directory.close();
614 }
615 }
616 catch (Exception e) {
617 if (_log.isWarnEnabled()) {
618 _log.warn("Could not close directory " + tableName);
619 }
620 }
621
622 Connection con = null;
623 Statement s = null;
624
625 try {
626 con = DataAccess.getConnection();
627
628 s = con.createStatement();
629
630 s.executeUpdate("DELETE FROM " + tableName);
631 }
632 catch (Exception e) {
633 if (_log.isWarnEnabled()) {
634 _log.warn("Could not truncate " + tableName);
635 }
636 }
637 finally {
638 DataAccess.cleanUp(con, s);
639 }
640 }
641
642 private void _deleteRam(long companyId) {
643 }
644
645 private Analyzer _getAnalyzer() {
646 try {
647 return (Analyzer)_analyzerClass.newInstance();
648 }
649 catch (Exception e) {
650 throw new RuntimeException(e);
651 }
652 }
653
654 private Directory _getLuceneDir(long companyId) {
655 if (_log.isDebugEnabled()) {
656 _log.debug("Lucene store type " + PropsValues.LUCENE_STORE_TYPE);
657 }
658
659 if (PropsValues.LUCENE_STORE_TYPE.equals(_LUCENE_STORE_TYPE_FILE)) {
660 return _getLuceneDirFile(companyId);
661 }
662 else if (PropsValues.LUCENE_STORE_TYPE.equals(
663 _LUCENE_STORE_TYPE_JDBC)) {
664
665 return _getLuceneDirJdbc(companyId);
666 }
667 else if (PropsValues.LUCENE_STORE_TYPE.equals(_LUCENE_STORE_TYPE_RAM)) {
668 return _getLuceneDirRam(companyId);
669 }
670 else {
671 throw new RuntimeException(
672 "Invalid store type " + PropsValues.LUCENE_STORE_TYPE);
673 }
674 }
675
676 private Directory _getLuceneDirFile(long companyId) {
677 Directory directory = null;
678
679 String path = _getPath(companyId);
680
681 try {
682 directory = getDirectory(path, false);
683 }
684 catch (IOException ioe1) {
685 try {
686 if (directory != null) {
687 directory.close();
688 }
689
690 directory = getDirectory(path, true);
691 }
692 catch (IOException ioe2) {
693 throw new RuntimeException(ioe2);
694 }
695 }
696
697 return directory;
698 }
699
700 private Directory _getLuceneDirJdbc(long companyId) {
701 JdbcDirectory directory = null;
702
703 Thread currentThread = Thread.currentThread();
704
705 ClassLoader contextClassLoader = currentThread.getContextClassLoader();
706
707 try {
708 currentThread.setContextClassLoader(
709 PortalClassLoaderUtil.getClassLoader());
710
711 String tableName = _getTableName(companyId);
712
713 directory = (JdbcDirectory)_jdbcDirectories.get(tableName);
714
715 if (directory != null) {
716 return directory;
717 }
718
719 try {
720 DataSource ds = InfrastructureUtil.getDataSource();
721
722 directory = new JdbcDirectory(ds, _dialect, tableName);
723
724 _jdbcDirectories.put(tableName, directory);
725
726 if (!directory.tableExists()) {
727 directory.create();
728 }
729 }
730 catch (IOException ioe) {
731 throw new RuntimeException(ioe);
732 }
733 catch (UnsupportedOperationException uoe) {
734 if (_log.isWarnEnabled()) {
735 _log.warn(
736 "Database doesn't support the ability to check " +
737 "whether a table exists");
738 }
739
740 _manuallyCreateJdbcDirectory(directory, tableName);
741 }
742 }
743 finally {
744 currentThread.setContextClassLoader(contextClassLoader);
745 }
746
747 return directory;
748 }
749
750 private Directory _getLuceneDirRam(long companyId) {
751 String path = _getPath(companyId);
752
753 Directory directory = _ramDirectories.get(path);
754
755 if (directory == null) {
756 directory = new RAMDirectory();
757
758 _ramDirectories.put(path, directory);
759 }
760
761 return directory;
762 }
763
764 private String _getPath(long companyId) {
765 StringBuilder sb = new StringBuilder();
766
767 sb.append(PropsValues.LUCENE_DIR);
768 sb.append(companyId);
769 sb.append(StringPool.SLASH);
770
771 return sb.toString();
772 }
773
774 private String _getTableName(long companyId) {
775 return _LUCENE_TABLE_PREFIX + companyId;
776 }
777
778 private void _manuallyCreateJdbcDirectory(
779 JdbcDirectory directory, String tableName) {
780
781
783 Connection con = null;
784 ResultSet rs = null;
785
786 try {
787 con = DataAccess.getConnection();
788
789
791 DatabaseMetaData metaData = con.getMetaData();
792
793 rs = metaData.getTables(null, null, tableName, null);
794
795 if (!rs.next()) {
796 JdbcTemplate jdbcTemplate = directory.getJdbcTemplate();
797
798 jdbcTemplate.executeUpdate(directory.getTable().sqlCreate());
799
800 Class<?> lockClass = directory.getSettings().getLockClass();
801
802 JdbcLock jdbcLock = null;
803
804 try {
805 jdbcLock = (JdbcLock)lockClass.newInstance();
806 }
807 catch (Exception e) {
808 throw new JdbcStoreException(
809 "Failed to create lock class " + lockClass);
810 }
811
812 jdbcLock.initializeDatabase(directory);
813 }
814 }
815 catch (Exception e) {
816 if (_log.isWarnEnabled()) {
817 _log.warn("Could not create " + tableName);
818 }
819 }
820 finally {
821 DataAccess.cleanUp(con, null, rs);
822 }
823 }
824
825 private static final String _LUCENE_STORE_TYPE_FILE = "file";
826
827 private static final String _LUCENE_STORE_TYPE_JDBC = "jdbc";
828
829 private static final String _LUCENE_STORE_TYPE_RAM = "ram";
830
831 private static final String _LUCENE_TABLE_PREFIX = "LUCENE_";
832
833 private static Log _log = LogFactoryUtil.getLog(LuceneUtil.class);
834
835 private static LuceneUtil _instance = new LuceneUtil();
836
837 private IndexWriterFactory _sharedWriter = new IndexWriterFactory();
838 private Class<?> _analyzerClass = WhitespaceAnalyzer.class;
839 private Dialect _dialect;
840 private Map<String, Directory> _jdbcDirectories =
841 new ConcurrentHashMap<String, Directory>();
842 private Map<String, Directory> _ramDirectories =
843 new ConcurrentHashMap<String, Directory>();
844
845 }