1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   *
12   * 
13   */
14  
15  package com.liferay.portal.util;
16  
17  import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
18  import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
19  import com.liferay.portal.kernel.log.Log;
20  import com.liferay.portal.kernel.log.LogFactoryUtil;
21  import com.liferay.portal.kernel.util.ContentTypes;
22  import com.liferay.portal.kernel.util.FileComparator;
23  import com.liferay.portal.kernel.util.MimeTypesUtil;
24  import com.liferay.portal.kernel.util.StreamUtil;
25  import com.liferay.portal.kernel.util.StringBundler;
26  import com.liferay.portal.kernel.util.StringPool;
27  import com.liferay.portal.kernel.util.StringUtil;
28  import com.liferay.portal.kernel.util.Time;
29  import com.liferay.portal.kernel.util.Validator;
30  import com.liferay.util.PwdGenerator;
31  import com.liferay.util.SystemProperties;
32  import com.liferay.util.lucene.JerichoHTMLTextExtractor;
33  
34  import java.io.BufferedInputStream;
35  import java.io.File;
36  import java.io.FileInputStream;
37  import java.io.FileOutputStream;
38  import java.io.FileReader;
39  import java.io.IOException;
40  import java.io.InputStream;
41  import java.io.OutputStreamWriter;
42  import java.io.Reader;
43  import java.io.Writer;
44  
45  import java.util.ArrayList;
46  import java.util.Arrays;
47  import java.util.HashMap;
48  import java.util.List;
49  import java.util.Map;
50  import java.util.Properties;
51  
52  import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
53  import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
54  import org.apache.jackrabbit.extractor.MsWordTextExtractor;
55  import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
56  import org.apache.jackrabbit.extractor.PdfTextExtractor;
57  import org.apache.jackrabbit.extractor.PlainTextExtractor;
58  import org.apache.jackrabbit.extractor.RTFTextExtractor;
59  import org.apache.jackrabbit.extractor.TextExtractor;
60  import org.apache.jackrabbit.extractor.XMLTextExtractor;
61  import org.apache.poi.POITextExtractor;
62  import org.apache.poi.extractor.ExtractorFactory;
63  
64  import org.mozilla.intl.chardet.nsDetector;
65  import org.mozilla.intl.chardet.nsPSMDetector;
66  
67  /**
68   * <a href="FileImpl.java.html"><b><i>View Source</i></b></a>
69   *
70   * @author Brian Wing Shun Chan
71   * @author Alexander Chow
72   */
73  public class FileImpl implements com.liferay.portal.kernel.util.File {
74  
75      public static FileImpl getInstance() {
76          return _instance;
77      }
78  
79      public FileImpl() {
80          Class<?>[] textExtractorClasses = new Class[] {
81              JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
82              MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
83              OpenOfficeTextExtractor.class, PdfTextExtractor.class,
84              PlainTextExtractor.class, RTFTextExtractor.class,
85              XMLTextExtractor.class
86          };
87  
88          for (Class<?> textExtractorClass : textExtractorClasses) {
89              try {
90                  TextExtractor textExtractor =
91                      (TextExtractor)textExtractorClass.newInstance();
92  
93                  String[] contentTypes = textExtractor.getContentTypes();
94  
95                  for (String contentType : contentTypes) {
96                      _textExtractors.put(contentType, textExtractor);
97                  }
98              }
99              catch (Exception e) {
100                 _log.error(e, e);
101             }
102         }
103     }
104 
105     public void copyDirectory(String sourceDirName, String destinationDirName) {
106         copyDirectory(new File(sourceDirName), new File(destinationDirName));
107     }
108 
109     public void copyDirectory(File source, File destination) {
110         if (source.exists() && source.isDirectory()) {
111             if (!destination.exists()) {
112                 destination.mkdirs();
113             }
114 
115             File[] fileArray = source.listFiles();
116 
117             for (int i = 0; i < fileArray.length; i++) {
118                 if (fileArray[i].isDirectory()) {
119                     copyDirectory(
120                         fileArray[i],
121                         new File(destination.getPath() + File.separator
122                             + fileArray[i].getName()));
123                 }
124                 else {
125                     copyFile(
126                         fileArray[i],
127                         new File(destination.getPath() + File.separator
128                             + fileArray[i].getName()));
129                 }
130             }
131         }
132     }
133 
134     public void copyFile(String source, String destination) {
135         copyFile(source, destination, false);
136     }
137 
138     public void copyFile(String source, String destination, boolean lazy) {
139         copyFile(new File(source), new File(destination), lazy);
140     }
141 
142     public void copyFile(File source, File destination) {
143         copyFile(source, destination, false);
144     }
145 
146     public void copyFile(File source, File destination, boolean lazy) {
147         if (!source.exists()) {
148             return;
149         }
150 
151         if (lazy) {
152             String oldContent = null;
153 
154             try {
155                 oldContent = read(source);
156             }
157             catch (Exception e) {
158                 return;
159             }
160 
161             String newContent = null;
162 
163             try {
164                 newContent = read(destination);
165             }
166             catch (Exception e) {
167             }
168 
169             if ((oldContent == null) || !oldContent.equals(newContent)) {
170                 copyFile(source, destination, false);
171             }
172         }
173         else {
174             if ((destination.getParentFile() != null) &&
175                 (!destination.getParentFile().exists())) {
176 
177                 destination.getParentFile().mkdirs();
178             }
179 
180             try {
181                 StreamUtil.transfer(
182                     new FileInputStream(source),
183                     new FileOutputStream(destination));
184             }
185             catch (IOException ioe) {
186                 _log.error(ioe.getMessage());
187             }
188         }
189     }
190 
191     public File createTempFile() {
192         return createTempFile(null);
193     }
194 
195     public File createTempFile(String extension) {
196         return new File(createTempFileName(extension));
197     }
198 
199     public String createTempFileName() {
200         return createTempFileName(null);
201     }
202 
203     public String createTempFileName(String extension) {
204         StringBundler sb = new StringBundler();
205 
206         sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
207         sb.append(StringPool.SLASH);
208         sb.append(Time.getTimestamp());
209         sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
210 
211         if (Validator.isNotNull(extension)) {
212             sb.append(StringPool.PERIOD);
213             sb.append(extension);
214         }
215 
216         return sb.toString();
217     }
218 
219     public String decodeSafeFileName(String fileName) {
220         return StringUtil.replace(
221             fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
222     }
223 
224     public boolean delete(String file) {
225         return delete(new File(file));
226     }
227 
228     public boolean delete(File file) {
229         if ((file != null) && file.exists()) {
230             return file.delete();
231         }
232         else {
233             return false;
234         }
235     }
236 
237     public void deltree(String directory) {
238         deltree(new File(directory));
239     }
240 
241     public void deltree(File directory) {
242         if (directory.exists() && directory.isDirectory()) {
243             File[] fileArray = directory.listFiles();
244 
245             for (int i = 0; i < fileArray.length; i++) {
246                 if (fileArray[i].isDirectory()) {
247                     deltree(fileArray[i]);
248                 }
249                 else {
250                     fileArray[i].delete();
251                 }
252             }
253 
254             directory.delete();
255         }
256     }
257 
258     public String encodeSafeFileName(String fileName) {
259         if (fileName == null) {
260             return StringPool.BLANK;
261         }
262 
263         return StringUtil.replace(
264             fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
265     }
266 
267     public boolean exists(String fileName) {
268         return exists(new File(fileName));
269     }
270 
271     public boolean exists(File file) {
272         return file.exists();
273     }
274 
275     public String extractText(InputStream is, String fileName) {
276         String text = null;
277 
278         try {
279             if (!is.markSupported()) {
280                 is = new BufferedInputStream(is);
281             }
282 
283             String contentType = MimeTypesUtil.getContentType(is, fileName);
284 
285             if (_log.isInfoEnabled()) {
286                 _log.info(
287                     "Attempting to extract text from " + fileName +
288                         " of type " + contentType);
289             }
290 
291             TextExtractor textExtractor = _textExtractors.get(contentType);
292 
293             if (textExtractor != null) {
294                 if (_log.isInfoEnabled()) {
295                     _log.info(
296                         "Using text extractor " +
297                             textExtractor.getClass().getName());
298                 }
299 
300                 StringBuilder sb = new StringBuilder();
301 
302                 Reader reader = textExtractor.extractText(
303                     is, contentType, null);
304 
305                 try{
306                     char[] buffer = new char[1024];
307 
308                     int result = -1;
309 
310                     while ((result = reader.read(buffer)) != -1) {
311                         sb.append(buffer, 0, result);
312                     }
313                 }
314                 finally {
315                     try {
316                         reader.close();
317                     }
318                     catch (IOException ioe) {
319                     }
320                 }
321 
322                 text = sb.toString();
323             }
324             else if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
325                 contentType.startsWith(
326                     "application/vnd.openxmlformats-officedocument.")) {
327 
328                 try {
329                     POITextExtractor poiTextExtractor =
330                         ExtractorFactory.createExtractor(is);
331 
332                     text = poiTextExtractor.getText();
333                 }
334                 catch (Exception e) {
335                     if (_log.isInfoEnabled()) {
336                         _log.info(e.getMessage());
337                     }
338                 }
339             }
340         }
341         catch (Exception e) {
342             _log.error(e);
343         }
344 
345         if (_log.isInfoEnabled()) {
346             if (text == null) {
347                 _log.info("No text extractor found for " + fileName);
348             }
349             else {
350                 _log.info("Text was extracted for " + fileName);
351             }
352         }
353 
354         if (_log.isDebugEnabled()) {
355             _log.debug("Extractor returned text:\n\n" + text);
356         }
357 
358         if (text == null) {
359             text = StringPool.BLANK;
360         }
361 
362         return text;
363     }
364 
365     public String getAbsolutePath(File file) {
366         return StringUtil.replace(
367             file.getAbsolutePath(), StringPool.BACK_SLASH, StringPool.SLASH);
368     }
369 
370     public byte[] getBytes(File file) throws IOException {
371         if ((file == null) || !file.exists()) {
372             return null;
373         }
374 
375         FileInputStream is = new FileInputStream(file);
376 
377         byte[] bytes = getBytes(is, (int)file.length());
378 
379         is.close();
380 
381         return bytes;
382     }
383 
384     public byte[] getBytes(InputStream is) throws IOException {
385         return getBytes(is, -1);
386     }
387 
388     public byte[] getBytes(InputStream inputStream, int bufferSize)
389         throws IOException {
390 
391         UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
392             new UnsyncByteArrayOutputStream();
393 
394         StreamUtil.transfer(
395             inputStream, unsyncByteArrayOutputStream, bufferSize);
396 
397         return unsyncByteArrayOutputStream.toByteArray();
398     }
399 
400     public String getExtension(String fileName) {
401         if (fileName == null) {
402             return null;
403         }
404 
405         int pos = fileName.lastIndexOf(StringPool.PERIOD);
406 
407         if (pos != -1) {
408             return fileName.substring(pos + 1, fileName.length()).toLowerCase();
409         }
410         else {
411             return null;
412         }
413     }
414 
415     public String getPath(String fullFileName) {
416         int pos = fullFileName.lastIndexOf(StringPool.SLASH);
417 
418         if (pos == -1) {
419             pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
420         }
421 
422         String shortFileName = fullFileName.substring(0, pos);
423 
424         if (Validator.isNull(shortFileName)) {
425             return StringPool.SLASH;
426         }
427 
428         return shortFileName;
429     }
430 
431     public String getShortFileName(String fullFileName) {
432         int pos = fullFileName.lastIndexOf(StringPool.SLASH);
433 
434         if (pos == -1) {
435             pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
436         }
437 
438         String shortFileName =
439             fullFileName.substring(pos + 1, fullFileName.length());
440 
441         return shortFileName;
442     }
443 
444     public boolean isAscii(File file) throws IOException {
445         boolean ascii = true;
446 
447         nsDetector detector = new nsDetector(nsPSMDetector.ALL);
448 
449         InputStream inputStream = new FileInputStream(file);
450 
451         byte[] buffer = new byte[1024];
452 
453         int len = 0;
454 
455         while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
456 
457             if (ascii) {
458                 ascii = detector.isAscii(buffer, len);
459 
460                 if (!ascii) {
461                     break;
462                 }
463             }
464         }
465 
466         detector.DataEnd();
467 
468         inputStream.close();
469 
470         return ascii;
471     }
472 
473     public String[] listDirs(String fileName) {
474         return listDirs(new File(fileName));
475     }
476 
477     public String[] listDirs(File file) {
478         List<String> dirs = new ArrayList<String>();
479 
480         File[] fileArray = file.listFiles();
481 
482         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
483             if (fileArray[i].isDirectory()) {
484                 dirs.add(fileArray[i].getName());
485             }
486         }
487 
488         return dirs.toArray(new String[dirs.size()]);
489     }
490 
491     public String[] listFiles(String fileName) {
492         if (Validator.isNull(fileName)) {
493             return new String[0];
494         }
495 
496         return listFiles(new File(fileName));
497     }
498 
499     public String[] listFiles(File file) {
500         List<String> files = new ArrayList<String>();
501 
502         File[] fileArray = file.listFiles();
503 
504         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
505             if (fileArray[i].isFile()) {
506                 files.add(fileArray[i].getName());
507             }
508         }
509 
510         return files.toArray(new String[files.size()]);
511     }
512 
513     public void mkdirs(String pathName) {
514         File file = new File(pathName);
515 
516         file.mkdirs();
517     }
518 
519     public boolean move(String sourceFileName, String destinationFileName) {
520         return move(new File(sourceFileName), new File(destinationFileName));
521     }
522 
523     public boolean move(File source, File destination) {
524         if (!source.exists()) {
525             return false;
526         }
527 
528         destination.delete();
529 
530         return source.renameTo(destination);
531     }
532 
533     public String read(String fileName) throws IOException {
534         return read(new File(fileName));
535     }
536 
537     public String read(File file) throws IOException {
538         return read(file, false);
539     }
540 
541     public String read(File file, boolean raw) throws IOException {
542         FileInputStream fis = new FileInputStream(file);
543 
544         byte[] bytes = new byte[fis.available()];
545 
546         fis.read(bytes);
547 
548         fis.close();
549 
550         String s = new String(bytes, StringPool.UTF8);
551 
552         if (raw) {
553             return s;
554         }
555         else {
556             return StringUtil.replace(
557                 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
558         }
559     }
560 
561     public String replaceSeparator(String fileName) {
562         return StringUtil.replace(
563             fileName, StringPool.BACK_SLASH, StringPool.SLASH);
564     }
565 
566     public File[] sortFiles(File[] files) {
567         if (files == null) {
568             return null;
569         }
570 
571         Arrays.sort(files, new FileComparator());
572 
573         List<File> directoryList = new ArrayList<File>();
574         List<File> fileList = new ArrayList<File>();
575 
576         for (int i = 0; i < files.length; i++) {
577             if (files[i].isDirectory()) {
578                 directoryList.add(files[i]);
579             }
580             else {
581                 fileList.add(files[i]);
582             }
583         }
584 
585         directoryList.addAll(fileList);
586 
587         return directoryList.toArray(new File[directoryList.size()]);
588     }
589 
590     public String stripExtension(String fileName) {
591         if (fileName == null) {
592             return null;
593         }
594 
595         int pos = fileName.lastIndexOf(StringPool.PERIOD);
596 
597         if (pos != -1) {
598             return fileName.substring(0, pos);
599         }
600         else {
601             return fileName;
602         }
603     }
604 
605     public List<String> toList(Reader reader) {
606         List<String> list = new ArrayList<String>();
607 
608         try {
609             UnsyncBufferedReader unsyncBufferedReader =
610                 new UnsyncBufferedReader(reader);
611 
612             String line = null;
613 
614             while ((line = unsyncBufferedReader.readLine()) != null) {
615                 list.add(line);
616             }
617 
618             unsyncBufferedReader.close();
619         }
620         catch (IOException ioe) {
621         }
622 
623         return list;
624     }
625 
626     public List<String> toList(String fileName) {
627         try {
628             return toList(new FileReader(fileName));
629         }
630         catch (IOException ioe) {
631             return new ArrayList<String>();
632         }
633     }
634 
635     public Properties toProperties(FileInputStream fis) {
636         Properties props = new Properties();
637 
638         try {
639             props.load(fis);
640         }
641         catch (IOException ioe) {
642         }
643 
644         return props;
645     }
646 
647     public Properties toProperties(String fileName) {
648         try {
649             return toProperties(new FileInputStream(fileName));
650         }
651         catch (IOException ioe) {
652             return new Properties();
653         }
654     }
655 
656     public void write(String fileName, String s) throws IOException {
657         write(new File(fileName), s);
658     }
659 
660     public void write(String fileName, String s, boolean lazy)
661         throws IOException {
662 
663         write(new File(fileName), s, lazy);
664     }
665 
666     public void write(String fileName, String s, boolean lazy, boolean append)
667         throws IOException {
668 
669         write(new File(fileName), s, lazy, append);
670     }
671 
672     public void write(String pathName, String fileName, String s)
673         throws IOException {
674 
675         write(new File(pathName, fileName), s);
676     }
677 
678     public void write(String pathName, String fileName, String s, boolean lazy)
679         throws IOException {
680 
681         write(new File(pathName, fileName), s, lazy);
682     }
683 
684     public void write(
685             String pathName, String fileName, String s, boolean lazy,
686             boolean append)
687         throws IOException {
688 
689         write(new File(pathName, fileName), s, lazy, append);
690     }
691 
692     public void write(File file, String s) throws IOException {
693         write(file, s, false);
694     }
695 
696     public void write(File file, String s, boolean lazy)
697         throws IOException {
698 
699         write(file, s, lazy, false);
700     }
701 
702     public void write(File file, String s, boolean lazy, boolean append)
703         throws IOException {
704 
705         if (file.getParent() != null) {
706             mkdirs(file.getParent());
707         }
708 
709         if (lazy && file.exists()) {
710             String content = read(file);
711 
712             if (content.equals(s)) {
713                 return;
714             }
715         }
716 
717         Writer writer = new OutputStreamWriter(
718             new FileOutputStream(file, append), StringPool.UTF8);
719 
720         writer.write(s);
721 
722         writer.close();
723     }
724 
725     public void write(String fileName, byte[] bytes) throws IOException {
726         write(new File(fileName), bytes);
727     }
728 
729     public void write(File file, byte[] bytes) throws IOException {
730         write(file, bytes, 0, bytes.length);
731     }
732 
733     public void write(File file, byte[] bytes, int offset, int length)
734         throws IOException {
735 
736         if (file.getParent() != null) {
737             mkdirs(file.getParent());
738         }
739 
740         FileOutputStream fos = new FileOutputStream(file);
741 
742         fos.write(bytes, offset, length);
743 
744         fos.close();
745     }
746 
747     public void write(String fileName, InputStream is) throws IOException {
748         write(new File(fileName), is);
749     }
750 
751     public void write(File file, InputStream is) throws IOException {
752         if (file.getParent() != null) {
753             mkdirs(file.getParent());
754         }
755 
756         StreamUtil.transfer(is, new FileOutputStream(file));
757     }
758 
759     private static final String[] _SAFE_FILE_NAME_1 = {
760         StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
761         StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
762     };
763 
764     private static final String[] _SAFE_FILE_NAME_2 = {
765         "_AMP_", "_CP_", "_OP_", "_SEM_"
766     };
767 
768     private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
769 
770     private static FileImpl _instance = new FileImpl();
771 
772     private Map<String, TextExtractor> _textExtractors =
773         new HashMap<String, TextExtractor>();
774 
775 }