1
14
15 package com.liferay.portal.util;
16
17 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
18 import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
19 import com.liferay.portal.kernel.log.Log;
20 import com.liferay.portal.kernel.log.LogFactoryUtil;
21 import com.liferay.portal.kernel.util.ContentTypes;
22 import com.liferay.portal.kernel.util.FileComparator;
23 import com.liferay.portal.kernel.util.MimeTypesUtil;
24 import com.liferay.portal.kernel.util.StreamUtil;
25 import com.liferay.portal.kernel.util.StringBundler;
26 import com.liferay.portal.kernel.util.StringPool;
27 import com.liferay.portal.kernel.util.StringUtil;
28 import com.liferay.portal.kernel.util.Time;
29 import com.liferay.portal.kernel.util.Validator;
30 import com.liferay.util.PwdGenerator;
31 import com.liferay.util.SystemProperties;
32 import com.liferay.util.lucene.JerichoHTMLTextExtractor;
33
34 import java.io.BufferedInputStream;
35 import java.io.File;
36 import java.io.FileInputStream;
37 import java.io.FileOutputStream;
38 import java.io.FileReader;
39 import java.io.IOException;
40 import java.io.InputStream;
41 import java.io.OutputStreamWriter;
42 import java.io.Reader;
43 import java.io.Writer;
44
45 import java.util.ArrayList;
46 import java.util.Arrays;
47 import java.util.HashMap;
48 import java.util.List;
49 import java.util.Map;
50 import java.util.Properties;
51
52 import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
53 import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
54 import org.apache.jackrabbit.extractor.MsWordTextExtractor;
55 import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
56 import org.apache.jackrabbit.extractor.PdfTextExtractor;
57 import org.apache.jackrabbit.extractor.PlainTextExtractor;
58 import org.apache.jackrabbit.extractor.RTFTextExtractor;
59 import org.apache.jackrabbit.extractor.TextExtractor;
60 import org.apache.jackrabbit.extractor.XMLTextExtractor;
61 import org.apache.poi.POITextExtractor;
62 import org.apache.poi.extractor.ExtractorFactory;
63
64 import org.mozilla.intl.chardet.nsDetector;
65 import org.mozilla.intl.chardet.nsPSMDetector;
66
67
73 public class FileImpl implements com.liferay.portal.kernel.util.File {
74
75 public static FileImpl getInstance() {
76 return _instance;
77 }
78
79 public FileImpl() {
80 Class<?>[] textExtractorClasses = new Class[] {
81 JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
82 MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
83 OpenOfficeTextExtractor.class, PdfTextExtractor.class,
84 PlainTextExtractor.class, RTFTextExtractor.class,
85 XMLTextExtractor.class
86 };
87
88 for (Class<?> textExtractorClass : textExtractorClasses) {
89 try {
90 TextExtractor textExtractor =
91 (TextExtractor)textExtractorClass.newInstance();
92
93 String[] contentTypes = textExtractor.getContentTypes();
94
95 for (String contentType : contentTypes) {
96 _textExtractors.put(contentType, textExtractor);
97 }
98 }
99 catch (Exception e) {
100 _log.error(e, e);
101 }
102 }
103 }
104
105 public void copyDirectory(String sourceDirName, String destinationDirName) {
106 copyDirectory(new File(sourceDirName), new File(destinationDirName));
107 }
108
109 public void copyDirectory(File source, File destination) {
110 if (source.exists() && source.isDirectory()) {
111 if (!destination.exists()) {
112 destination.mkdirs();
113 }
114
115 File[] fileArray = source.listFiles();
116
117 for (int i = 0; i < fileArray.length; i++) {
118 if (fileArray[i].isDirectory()) {
119 copyDirectory(
120 fileArray[i],
121 new File(destination.getPath() + File.separator
122 + fileArray[i].getName()));
123 }
124 else {
125 copyFile(
126 fileArray[i],
127 new File(destination.getPath() + File.separator
128 + fileArray[i].getName()));
129 }
130 }
131 }
132 }
133
134 public void copyFile(String source, String destination) {
135 copyFile(source, destination, false);
136 }
137
138 public void copyFile(String source, String destination, boolean lazy) {
139 copyFile(new File(source), new File(destination), lazy);
140 }
141
142 public void copyFile(File source, File destination) {
143 copyFile(source, destination, false);
144 }
145
146 public void copyFile(File source, File destination, boolean lazy) {
147 if (!source.exists()) {
148 return;
149 }
150
151 if (lazy) {
152 String oldContent = null;
153
154 try {
155 oldContent = read(source);
156 }
157 catch (Exception e) {
158 return;
159 }
160
161 String newContent = null;
162
163 try {
164 newContent = read(destination);
165 }
166 catch (Exception e) {
167 }
168
169 if ((oldContent == null) || !oldContent.equals(newContent)) {
170 copyFile(source, destination, false);
171 }
172 }
173 else {
174 if ((destination.getParentFile() != null) &&
175 (!destination.getParentFile().exists())) {
176
177 destination.getParentFile().mkdirs();
178 }
179
180 try {
181 StreamUtil.transfer(
182 new FileInputStream(source),
183 new FileOutputStream(destination));
184 }
185 catch (IOException ioe) {
186 _log.error(ioe.getMessage());
187 }
188 }
189 }
190
191 public File createTempFile() {
192 return createTempFile(null);
193 }
194
195 public File createTempFile(String extension) {
196 return new File(createTempFileName(extension));
197 }
198
199 public String createTempFileName() {
200 return createTempFileName(null);
201 }
202
203 public String createTempFileName(String extension) {
204 StringBundler sb = new StringBundler();
205
206 sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
207 sb.append(StringPool.SLASH);
208 sb.append(Time.getTimestamp());
209 sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
210
211 if (Validator.isNotNull(extension)) {
212 sb.append(StringPool.PERIOD);
213 sb.append(extension);
214 }
215
216 return sb.toString();
217 }
218
219 public String decodeSafeFileName(String fileName) {
220 return StringUtil.replace(
221 fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
222 }
223
224 public boolean delete(String file) {
225 return delete(new File(file));
226 }
227
228 public boolean delete(File file) {
229 if ((file != null) && file.exists()) {
230 return file.delete();
231 }
232 else {
233 return false;
234 }
235 }
236
237 public void deltree(String directory) {
238 deltree(new File(directory));
239 }
240
241 public void deltree(File directory) {
242 if (directory.exists() && directory.isDirectory()) {
243 File[] fileArray = directory.listFiles();
244
245 for (int i = 0; i < fileArray.length; i++) {
246 if (fileArray[i].isDirectory()) {
247 deltree(fileArray[i]);
248 }
249 else {
250 fileArray[i].delete();
251 }
252 }
253
254 directory.delete();
255 }
256 }
257
258 public String encodeSafeFileName(String fileName) {
259 if (fileName == null) {
260 return StringPool.BLANK;
261 }
262
263 return StringUtil.replace(
264 fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
265 }
266
267 public boolean exists(String fileName) {
268 return exists(new File(fileName));
269 }
270
271 public boolean exists(File file) {
272 return file.exists();
273 }
274
275 public String extractText(InputStream is, String fileName) {
276 String text = null;
277
278 try {
279 if (!is.markSupported()) {
280 is = new BufferedInputStream(is);
281 }
282
283 String contentType = MimeTypesUtil.getContentType(is, fileName);
284
285 if (_log.isInfoEnabled()) {
286 _log.info(
287 "Attempting to extract text from " + fileName +
288 " of type " + contentType);
289 }
290
291 TextExtractor textExtractor = _textExtractors.get(contentType);
292
293 if (textExtractor != null) {
294 if (_log.isInfoEnabled()) {
295 _log.info(
296 "Using text extractor " +
297 textExtractor.getClass().getName());
298 }
299
300 StringBuilder sb = new StringBuilder();
301
302 Reader reader = textExtractor.extractText(
303 is, contentType, null);
304
305 try{
306 char[] buffer = new char[1024];
307
308 int result = -1;
309
310 while ((result = reader.read(buffer)) != -1) {
311 sb.append(buffer, 0, result);
312 }
313 }
314 finally {
315 try {
316 reader.close();
317 }
318 catch (IOException ioe) {
319 }
320 }
321
322 text = sb.toString();
323 }
324 else if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
325 contentType.startsWith(
326 "application/vnd.openxmlformats-officedocument.")) {
327
328 try {
329 POITextExtractor poiTextExtractor =
330 ExtractorFactory.createExtractor(is);
331
332 text = poiTextExtractor.getText();
333 }
334 catch (Exception e) {
335 if (_log.isInfoEnabled()) {
336 _log.info(e.getMessage());
337 }
338 }
339 }
340 }
341 catch (Exception e) {
342 _log.error(e);
343 }
344
345 if (_log.isInfoEnabled()) {
346 if (text == null) {
347 _log.info("No text extractor found for " + fileName);
348 }
349 else {
350 _log.info("Text was extracted for " + fileName);
351 }
352 }
353
354 if (_log.isDebugEnabled()) {
355 _log.debug("Extractor returned text:\n\n" + text);
356 }
357
358 if (text == null) {
359 text = StringPool.BLANK;
360 }
361
362 return text;
363 }
364
365 public String getAbsolutePath(File file) {
366 return StringUtil.replace(
367 file.getAbsolutePath(), StringPool.BACK_SLASH, StringPool.SLASH);
368 }
369
370 public byte[] getBytes(File file) throws IOException {
371 if ((file == null) || !file.exists()) {
372 return null;
373 }
374
375 FileInputStream is = new FileInputStream(file);
376
377 byte[] bytes = getBytes(is, (int)file.length());
378
379 is.close();
380
381 return bytes;
382 }
383
384 public byte[] getBytes(InputStream is) throws IOException {
385 return getBytes(is, -1);
386 }
387
388 public byte[] getBytes(InputStream inputStream, int bufferSize)
389 throws IOException {
390
391 UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
392 new UnsyncByteArrayOutputStream();
393
394 StreamUtil.transfer(
395 inputStream, unsyncByteArrayOutputStream, bufferSize);
396
397 return unsyncByteArrayOutputStream.toByteArray();
398 }
399
400 public String getExtension(String fileName) {
401 if (fileName == null) {
402 return null;
403 }
404
405 int pos = fileName.lastIndexOf(StringPool.PERIOD);
406
407 if (pos != -1) {
408 return fileName.substring(pos + 1, fileName.length()).toLowerCase();
409 }
410 else {
411 return null;
412 }
413 }
414
415 public String getPath(String fullFileName) {
416 int pos = fullFileName.lastIndexOf(StringPool.SLASH);
417
418 if (pos == -1) {
419 pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
420 }
421
422 String shortFileName = fullFileName.substring(0, pos);
423
424 if (Validator.isNull(shortFileName)) {
425 return StringPool.SLASH;
426 }
427
428 return shortFileName;
429 }
430
431 public String getShortFileName(String fullFileName) {
432 int pos = fullFileName.lastIndexOf(StringPool.SLASH);
433
434 if (pos == -1) {
435 pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
436 }
437
438 String shortFileName =
439 fullFileName.substring(pos + 1, fullFileName.length());
440
441 return shortFileName;
442 }
443
444 public boolean isAscii(File file) throws IOException {
445 boolean ascii = true;
446
447 nsDetector detector = new nsDetector(nsPSMDetector.ALL);
448
449 InputStream inputStream = new FileInputStream(file);
450
451 byte[] buffer = new byte[1024];
452
453 int len = 0;
454
455 while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
456
457 if (ascii) {
458 ascii = detector.isAscii(buffer, len);
459
460 if (!ascii) {
461 break;
462 }
463 }
464 }
465
466 detector.DataEnd();
467
468 inputStream.close();
469
470 return ascii;
471 }
472
473 public String[] listDirs(String fileName) {
474 return listDirs(new File(fileName));
475 }
476
477 public String[] listDirs(File file) {
478 List<String> dirs = new ArrayList<String>();
479
480 File[] fileArray = file.listFiles();
481
482 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
483 if (fileArray[i].isDirectory()) {
484 dirs.add(fileArray[i].getName());
485 }
486 }
487
488 return dirs.toArray(new String[dirs.size()]);
489 }
490
491 public String[] listFiles(String fileName) {
492 if (Validator.isNull(fileName)) {
493 return new String[0];
494 }
495
496 return listFiles(new File(fileName));
497 }
498
499 public String[] listFiles(File file) {
500 List<String> files = new ArrayList<String>();
501
502 File[] fileArray = file.listFiles();
503
504 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
505 if (fileArray[i].isFile()) {
506 files.add(fileArray[i].getName());
507 }
508 }
509
510 return files.toArray(new String[files.size()]);
511 }
512
513 public void mkdirs(String pathName) {
514 File file = new File(pathName);
515
516 file.mkdirs();
517 }
518
519 public boolean move(String sourceFileName, String destinationFileName) {
520 return move(new File(sourceFileName), new File(destinationFileName));
521 }
522
523 public boolean move(File source, File destination) {
524 if (!source.exists()) {
525 return false;
526 }
527
528 destination.delete();
529
530 return source.renameTo(destination);
531 }
532
533 public String read(String fileName) throws IOException {
534 return read(new File(fileName));
535 }
536
537 public String read(File file) throws IOException {
538 return read(file, false);
539 }
540
541 public String read(File file, boolean raw) throws IOException {
542 FileInputStream fis = new FileInputStream(file);
543
544 byte[] bytes = new byte[fis.available()];
545
546 fis.read(bytes);
547
548 fis.close();
549
550 String s = new String(bytes, StringPool.UTF8);
551
552 if (raw) {
553 return s;
554 }
555 else {
556 return StringUtil.replace(
557 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
558 }
559 }
560
561 public String replaceSeparator(String fileName) {
562 return StringUtil.replace(
563 fileName, StringPool.BACK_SLASH, StringPool.SLASH);
564 }
565
566 public File[] sortFiles(File[] files) {
567 if (files == null) {
568 return null;
569 }
570
571 Arrays.sort(files, new FileComparator());
572
573 List<File> directoryList = new ArrayList<File>();
574 List<File> fileList = new ArrayList<File>();
575
576 for (int i = 0; i < files.length; i++) {
577 if (files[i].isDirectory()) {
578 directoryList.add(files[i]);
579 }
580 else {
581 fileList.add(files[i]);
582 }
583 }
584
585 directoryList.addAll(fileList);
586
587 return directoryList.toArray(new File[directoryList.size()]);
588 }
589
590 public String stripExtension(String fileName) {
591 if (fileName == null) {
592 return null;
593 }
594
595 int pos = fileName.lastIndexOf(StringPool.PERIOD);
596
597 if (pos != -1) {
598 return fileName.substring(0, pos);
599 }
600 else {
601 return fileName;
602 }
603 }
604
605 public List<String> toList(Reader reader) {
606 List<String> list = new ArrayList<String>();
607
608 try {
609 UnsyncBufferedReader unsyncBufferedReader =
610 new UnsyncBufferedReader(reader);
611
612 String line = null;
613
614 while ((line = unsyncBufferedReader.readLine()) != null) {
615 list.add(line);
616 }
617
618 unsyncBufferedReader.close();
619 }
620 catch (IOException ioe) {
621 }
622
623 return list;
624 }
625
626 public List<String> toList(String fileName) {
627 try {
628 return toList(new FileReader(fileName));
629 }
630 catch (IOException ioe) {
631 return new ArrayList<String>();
632 }
633 }
634
635 public Properties toProperties(FileInputStream fis) {
636 Properties props = new Properties();
637
638 try {
639 props.load(fis);
640 }
641 catch (IOException ioe) {
642 }
643
644 return props;
645 }
646
647 public Properties toProperties(String fileName) {
648 try {
649 return toProperties(new FileInputStream(fileName));
650 }
651 catch (IOException ioe) {
652 return new Properties();
653 }
654 }
655
656 public void write(String fileName, String s) throws IOException {
657 write(new File(fileName), s);
658 }
659
660 public void write(String fileName, String s, boolean lazy)
661 throws IOException {
662
663 write(new File(fileName), s, lazy);
664 }
665
666 public void write(String fileName, String s, boolean lazy, boolean append)
667 throws IOException {
668
669 write(new File(fileName), s, lazy, append);
670 }
671
672 public void write(String pathName, String fileName, String s)
673 throws IOException {
674
675 write(new File(pathName, fileName), s);
676 }
677
678 public void write(String pathName, String fileName, String s, boolean lazy)
679 throws IOException {
680
681 write(new File(pathName, fileName), s, lazy);
682 }
683
684 public void write(
685 String pathName, String fileName, String s, boolean lazy,
686 boolean append)
687 throws IOException {
688
689 write(new File(pathName, fileName), s, lazy, append);
690 }
691
692 public void write(File file, String s) throws IOException {
693 write(file, s, false);
694 }
695
696 public void write(File file, String s, boolean lazy)
697 throws IOException {
698
699 write(file, s, lazy, false);
700 }
701
702 public void write(File file, String s, boolean lazy, boolean append)
703 throws IOException {
704
705 if (file.getParent() != null) {
706 mkdirs(file.getParent());
707 }
708
709 if (lazy && file.exists()) {
710 String content = read(file);
711
712 if (content.equals(s)) {
713 return;
714 }
715 }
716
717 Writer writer = new OutputStreamWriter(
718 new FileOutputStream(file, append), StringPool.UTF8);
719
720 writer.write(s);
721
722 writer.close();
723 }
724
725 public void write(String fileName, byte[] bytes) throws IOException {
726 write(new File(fileName), bytes);
727 }
728
729 public void write(File file, byte[] bytes) throws IOException {
730 write(file, bytes, 0, bytes.length);
731 }
732
733 public void write(File file, byte[] bytes, int offset, int length)
734 throws IOException {
735
736 if (file.getParent() != null) {
737 mkdirs(file.getParent());
738 }
739
740 FileOutputStream fos = new FileOutputStream(file);
741
742 fos.write(bytes, offset, length);
743
744 fos.close();
745 }
746
747 public void write(String fileName, InputStream is) throws IOException {
748 write(new File(fileName), is);
749 }
750
751 public void write(File file, InputStream is) throws IOException {
752 if (file.getParent() != null) {
753 mkdirs(file.getParent());
754 }
755
756 StreamUtil.transfer(is, new FileOutputStream(file));
757 }
758
759 private static final String[] _SAFE_FILE_NAME_1 = {
760 StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
761 StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
762 };
763
764 private static final String[] _SAFE_FILE_NAME_2 = {
765 "_AMP_", "_CP_", "_OP_", "_SEM_"
766 };
767
768 private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
769
770 private static FileImpl _instance = new FileImpl();
771
772 private Map<String, TextExtractor> _textExtractors =
773 new HashMap<String, TextExtractor>();
774
775 }