1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    *
5    *
6    *
7    * The contents of this file are subject to the terms of the Liferay Enterprise
8    * Subscription License ("License"). You may not use this file except in
9    * compliance with the License. You can obtain a copy of the License by
10   * contacting Liferay, Inc. See the License for the specific language governing
11   * permissions and limitations under the License, including but not limited to
12   * distribution rights of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portlet.wiki.importers.mediawiki;
24  
25  import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26  import com.liferay.portal.NoSuchUserException;
27  import com.liferay.portal.PortalException;
28  import com.liferay.portal.SystemException;
29  import com.liferay.portal.kernel.log.Log;
30  import com.liferay.portal.kernel.log.LogFactoryUtil;
31  import com.liferay.portal.kernel.util.ArrayUtil;
32  import com.liferay.portal.kernel.util.MapUtil;
33  import com.liferay.portal.kernel.util.ObjectValuePair;
34  import com.liferay.portal.kernel.util.ProgressTracker;
35  import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
36  import com.liferay.portal.kernel.util.StringPool;
37  import com.liferay.portal.kernel.util.StringUtil;
38  import com.liferay.portal.kernel.util.Validator;
39  import com.liferay.portal.kernel.xml.Document;
40  import com.liferay.portal.kernel.xml.DocumentException;
41  import com.liferay.portal.kernel.xml.Element;
42  import com.liferay.portal.kernel.xml.SAXReaderUtil;
43  import com.liferay.portal.kernel.zip.ZipReader;
44  import com.liferay.portal.model.User;
45  import com.liferay.portal.service.ServiceContext;
46  import com.liferay.portal.service.UserLocalServiceUtil;
47  import com.liferay.portal.util.PropsValues;
48  import com.liferay.portlet.tags.NoSuchEntryException;
49  import com.liferay.portlet.tags.model.TagsEntry;
50  import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
51  import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
52  import com.liferay.portlet.tags.util.TagsUtil;
53  import com.liferay.portlet.wiki.ImportFilesException;
54  import com.liferay.portlet.wiki.NoSuchPageException;
55  import com.liferay.portlet.wiki.importers.WikiImporter;
56  import com.liferay.portlet.wiki.importers.WikiImporterKeys;
57  import com.liferay.portlet.wiki.model.WikiNode;
58  import com.liferay.portlet.wiki.model.WikiPage;
59  import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
60  import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
61  import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
62  
63  import java.io.BufferedReader;
64  import java.io.File;
65  import java.io.FileReader;
66  import java.io.IOException;
67  
68  import java.util.ArrayList;
69  import java.util.Collections;
70  import java.util.HashMap;
71  import java.util.Iterator;
72  import java.util.List;
73  import java.util.Map;
74  import java.util.regex.Matcher;
75  import java.util.regex.Pattern;
76  
77  /**
78   * <a href="MediaWikiImporter.java.html"><b><i>View Source</i></b></a>
79   *
80   * @author Alvaro del Castillo
81   * @author Jorge Ferrer
82   */
83  public class MediaWikiImporter implements WikiImporter {
84  
85      public static final String SHARED_IMAGES_CONTENT = "See attachments";
86  
87      public static final String SHARED_IMAGES_TITLE = "SharedImages";
88  
89      public void importPages(
90              long userId, WikiNode node, File[] files,
91              Map<String, String[]> options)
92          throws PortalException {
93  
94          if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
95              throw new PortalException("The pages file is mandatory");
96          }
97  
98          File pagesFile = files[0];
99          File usersFile = files[1];
100         File imagesFile = files[2];
101 
102         try {
103             Document doc = SAXReaderUtil.read(pagesFile);
104 
105             Map<String, String> usersMap = readUsersFile(usersFile);
106 
107             Element root = doc.getRootElement();
108 
109             List<String> specialNamespaces = readSpecialNamespaces(root);
110 
111             processSpecialPages(userId, node, root, specialNamespaces);
112             processRegularPages(
113                 userId, node, root, specialNamespaces, usersMap, imagesFile,
114                 options);
115             processImages(userId, node, imagesFile);
116 
117             moveFrontPage(userId, node, options);
118         }
119         catch (DocumentException de) {
120             throw new ImportFilesException("Invalid XML file provided");
121         }
122         catch (IOException de) {
123             throw new ImportFilesException("Error reading the files provided");
124         }
125         catch (PortalException e) {
126             throw e;
127         }
128         catch (Exception e) {
129             throw new PortalException(e);
130         }
131     }
132 
133     protected long getUserId(
134             long userId, WikiNode node, String author,
135             Map<String, String> usersMap)
136         throws PortalException, SystemException {
137 
138         User user = null;
139 
140         String emailAddress = usersMap.get(author);
141 
142         try {
143             if (Validator.isNull(emailAddress)) {
144                 user = UserLocalServiceUtil.getUserByScreenName(
145                     node.getCompanyId(), author.toLowerCase());
146             }
147             else {
148                 user = UserLocalServiceUtil.getUserByEmailAddress(
149                     node.getCompanyId(), emailAddress);
150             }
151         }
152         catch (NoSuchUserException nsue) {
153             user = UserLocalServiceUtil.getUserById(userId);
154         }
155 
156         return user.getUserId();
157     }
158 
159     protected void importPage(
160             long userId, String author, WikiNode node, String title,
161             String content, String summary, Map<String, String> usersMap)
162         throws PortalException {
163 
164         try {
165             long authorUserId = getUserId(userId, node, author, usersMap);
166             String parentTitle = readParentTitle(content);
167             String redirectTitle = readRedirectTitle(content);
168 
169             ServiceContext serviceContext = new ServiceContext();
170 
171             serviceContext.setAddCommunityPermissions(true);
172             serviceContext.setAddGuestPermissions(true);
173             serviceContext.setTagsEntries(
174                 readTagsEntries(userId, node, content));
175 
176             if (Validator.isNull(redirectTitle)) {
177                 content = _translator.translate(content);
178             }
179             else {
180                 content =
181                     StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
182                         StringPool.DOUBLE_CLOSE_BRACKET;
183             }
184 
185             WikiPage page = null;
186 
187             try {
188                 page = WikiPageLocalServiceUtil.getPage(
189                     node.getNodeId(), title);
190             }
191             catch (NoSuchPageException nspe) {
192                 page = WikiPageLocalServiceUtil.addPage(
193                     authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
194                     null, true, serviceContext);
195             }
196 
197             WikiPageLocalServiceUtil.updatePage(
198                 authorUserId, node.getNodeId(), title, page.getVersion(),
199                 content, summary, true, "creole", parentTitle, redirectTitle,
200                 serviceContext);
201         }
202         catch (Exception e) {
203             throw new PortalException("Error importing page " + title, e);
204         }
205     }
206 
207     protected boolean isSpecialMediaWikiPage(
208         String title, List<String> specialNamespaces) {
209 
210         for (String namespace: specialNamespaces) {
211             if (title.startsWith(namespace + StringPool.COLON)) {
212                 return true;
213             }
214         }
215 
216         return false;
217     }
218 
219     protected boolean isValidImage(String[] paths, byte[] bytes) {
220         if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
221             return false;
222         }
223 
224         if ((paths.length > 1) &&
225             (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
226 
227             return false;
228         }
229 
230         String fileName = paths[paths.length - 1];
231 
232         try {
233             DLLocalServiceUtil.validate(fileName, bytes);
234         }
235         catch (PortalException pe) {
236             return false;
237         }
238         catch (SystemException se) {
239             return false;
240         }
241 
242         return true;
243     }
244 
245     protected void moveFrontPage(
246         long userId, WikiNode node, Map<String, String[]> options) {
247 
248         String frontPageTitle = MapUtil.getString(
249             options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
250 
251         if (Validator.isNotNull(frontPageTitle)) {
252             frontPageTitle = normalizeTitle(frontPageTitle);
253 
254             try {
255                 if (WikiPageLocalServiceUtil.getPagesCount(
256                         node.getNodeId(), frontPageTitle, true) > 0) {
257 
258                     ServiceContext serviceContext = new ServiceContext();
259 
260                     serviceContext.setAddCommunityPermissions(true);
261                     serviceContext.setAddGuestPermissions(true);
262 
263                     WikiPageLocalServiceUtil.movePage(
264                         userId, node.getNodeId(), frontPageTitle,
265                         WikiPageImpl.FRONT_PAGE, false, serviceContext);
266 
267                 }
268             }
269             catch (Exception e) {
270                 if (_log.isWarnEnabled()) {
271                     StringBuilder sb = new StringBuilder();
272 
273                     sb.append("Could not move ");
274                     sb.append(WikiPageImpl.FRONT_PAGE);
275                     sb.append(" to the title provided: ");
276                     sb.append(frontPageTitle);
277 
278                     _log.warn(sb.toString(), e);
279                 }
280             }
281 
282         }
283 
284     }
285 
286     protected String normalize(String categoryName, int length) {
287         categoryName = TagsUtil.toWord(categoryName.trim());
288 
289         return StringUtil.shorten(categoryName, length);
290     }
291 
292     protected String normalizeDescription(String description) {
293         description = description.replaceAll(
294             _categoriesPattern.pattern(), StringPool.BLANK);
295 
296         return normalize(description, 300);
297     }
298 
299     protected String normalizeTitle(String title) {
300         title = title.replaceAll(
301             PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
302 
303         return StringUtil.shorten(title, 75);
304     }
305 
306     private void processImages(long userId, WikiNode node, File imagesFile)
307         throws Exception {
308 
309         if ((imagesFile == null) || (!imagesFile.exists())) {
310             return;
311         }
312 
313         ProgressTracker progressTracker =
314             ProgressTrackerThreadLocal.getProgressTracker();
315 
316         int count = 0;
317 
318         ZipReader zipReader = new ZipReader(imagesFile);
319 
320         Map<String, byte[]> entries = zipReader.getEntries();
321 
322         int total = entries.size();
323 
324         if (total > 0) {
325             try {
326                 WikiPageLocalServiceUtil.getPage(
327                     node.getNodeId(), SHARED_IMAGES_TITLE);
328             }
329             catch (NoSuchPageException nspe) {
330                 ServiceContext serviceContext = new ServiceContext();
331 
332                 serviceContext.setAddCommunityPermissions(true);
333                 serviceContext.setAddGuestPermissions(true);
334 
335                 WikiPageLocalServiceUtil.addPage(
336                     userId, node.getNodeId(), SHARED_IMAGES_TITLE,
337                     SHARED_IMAGES_CONTENT, null, true, serviceContext);
338             }
339         }
340 
341         List<ObjectValuePair<String, byte[]>> attachments =
342             new ArrayList<ObjectValuePair<String, byte[]>>();
343 
344         Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
345 
346         int percentage = 50;
347 
348         for (int i = 0; itr.hasNext(); i++) {
349             Map.Entry<String, byte[]> entry = itr.next();
350 
351             String key = entry.getKey();
352             byte[] value = entry.getValue();
353 
354             if (key.endsWith(StringPool.SLASH)) {
355                 if (_log.isInfoEnabled()) {
356                     _log.info("Ignoring " + key);
357                 }
358 
359                 continue;
360             }
361 
362             String[] paths = StringUtil.split(key, StringPool.SLASH);
363 
364             if (!isValidImage(paths, value)) {
365                 if (_log.isInfoEnabled()) {
366                     _log.info("Ignoring " + key);
367                 }
368 
369                 continue;
370             }
371 
372             String fileName = paths[paths.length - 1].toLowerCase();
373 
374             attachments.add(
375                 new ObjectValuePair<String, byte[]>(fileName, value));
376 
377             count++;
378 
379             if ((i % 5) == 0) {
380                 WikiPageLocalServiceUtil.addPageAttachments(
381                     node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
382 
383                 attachments.clear();
384 
385                 percentage = Math.min(50 + (i * 50) / total, 99);
386 
387                 progressTracker.updateProgress(percentage);
388             }
389         }
390 
391         if (!attachments.isEmpty()) {
392             WikiPageLocalServiceUtil.addPageAttachments(
393                 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
394         }
395 
396         if (_log.isInfoEnabled()) {
397             _log.info("Imported " + count + " images into " + node.getName());
398         }
399     }
400 
401     protected void processRegularPages(
402         long userId, WikiNode node, Element root,
403         List<String> specialNamespaces, Map<String, String> usersMap,
404         File imagesFile, Map<String, String[]> options) {
405 
406         boolean importLatestVersion = MapUtil.getBoolean(
407             options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
408 
409         ProgressTracker progressTracker =
410             ProgressTrackerThreadLocal.getProgressTracker();
411 
412         int count = 0;
413 
414         List<Element> pages = root.elements("page");
415 
416         int total = pages.size();
417 
418         Iterator<Element> itr = root.elements("page").iterator();
419 
420         int percentage = 10;
421         int maxPercentage = 50;
422 
423         if ((imagesFile == null) || (!imagesFile.exists())) {
424             maxPercentage = 99;
425         }
426 
427         int percentageRange = maxPercentage - percentage;
428 
429         for (int i = 0; itr.hasNext(); i++) {
430             Element pageEl = itr.next();
431 
432             String title = pageEl.elementText("title");
433 
434             title = normalizeTitle(title);
435 
436             percentage = Math.min(
437                 10 + (i * percentageRange) / total, maxPercentage);
438 
439             progressTracker.updateProgress(percentage);
440 
441             if (isSpecialMediaWikiPage(title, specialNamespaces)) {
442                 continue;
443             }
444 
445             List<Element> revisionEls = pageEl.elements("revision");
446 
447             if (importLatestVersion) {
448                 Element lastRevisionEl = revisionEls.get(
449                     revisionEls.size() - 1);
450 
451                 revisionEls = new ArrayList<Element>();
452 
453                 revisionEls.add(lastRevisionEl);
454             }
455 
456             for (Element curRevisionEl : revisionEls) {
457                 String author = curRevisionEl.element(
458                     "contributor").elementText("username");
459                 String content = curRevisionEl.elementText("text");
460                 String summary = curRevisionEl.elementText("comment");
461 
462                 try {
463                     importPage(
464                         userId, author, node, title, content, summary,
465                         usersMap);
466                 }
467                 catch (Exception e) {
468                     if (_log.isWarnEnabled()) {
469                         StringBuilder sb = new StringBuilder();
470 
471                         sb.append("Page with title ");
472                         sb.append(title);
473                         sb.append(" could not be imported");
474 
475                         _log.warn(sb.toString(), e);
476                     }
477                 }
478             }
479 
480             count++;
481         }
482 
483         if (_log.isInfoEnabled()) {
484             _log.info("Imported " + count + " pages into " + node.getName());
485         }
486     }
487 
488     protected void processSpecialPages(
489             long userId, WikiNode node, Element root,
490             List<String> specialNamespaces)
491         throws PortalException {
492 
493         ProgressTracker progressTracker =
494             ProgressTrackerThreadLocal.getProgressTracker();
495 
496         List<Element> pages = root.elements("page");
497 
498         int total = pages.size();
499 
500         Iterator<Element> itr = pages.iterator();
501 
502         for (int i = 0; itr.hasNext(); i++) {
503             Element page = itr.next();
504 
505             String title = page.elementText("title");
506 
507             if (!title.startsWith("Category:")) {
508                 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
509                     root.remove(page);
510                 }
511 
512                 continue;
513             }
514 
515             String categoryName = title.substring("Category:".length());
516 
517             categoryName = normalize(categoryName, 75);
518 
519             String description = page.element("revision").elementText("text");
520 
521             description = normalizeDescription(description);
522 
523             try {
524                 TagsEntry tagsEntry = null;
525 
526                 try {
527                     tagsEntry = TagsEntryLocalServiceUtil.getEntry(
528                         node.getCompanyId(), categoryName);
529                 }
530                 catch (NoSuchEntryException nsee) {
531                     ServiceContext serviceContext = new ServiceContext();
532 
533                     serviceContext.setAddCommunityPermissions(true);
534                     serviceContext.setAddGuestPermissions(true);
535                     serviceContext.setScopeGroupId(node.getGroupId());
536 
537                     tagsEntry = TagsEntryLocalServiceUtil.addEntry(
538                         userId, null, categoryName, null, null, serviceContext);
539                 }
540 
541                 if (Validator.isNotNull(description)) {
542                     TagsPropertyLocalServiceUtil.addProperty(
543                         userId, tagsEntry.getEntryId(), "description",
544                         description);
545                 }
546             }
547             catch (SystemException se) {
548                  _log.error(se, se);
549             }
550 
551             if ((i % 5) == 0) {
552                 progressTracker.updateProgress((i * 10) / total);
553             }
554         }
555     }
556 
557     protected String readParentTitle(String content) {
558         Matcher matcher = _parentPattern.matcher(content);
559 
560         String redirectTitle = StringPool.BLANK;
561 
562         if (matcher.find()) {
563             redirectTitle = matcher.group(1);
564 
565             redirectTitle = normalizeTitle(redirectTitle);
566 
567             redirectTitle += " (disambiguation)";
568         }
569 
570         return redirectTitle;
571     }
572 
573     protected String readRedirectTitle(String content) {
574         Matcher matcher = _redirectPattern.matcher(content);
575 
576         String redirectTitle = StringPool.BLANK;
577 
578         if (matcher.find()) {
579             redirectTitle = matcher.group(1);
580 
581             redirectTitle = normalizeTitle(redirectTitle);
582         }
583 
584         return redirectTitle;
585     }
586 
587     protected List<String> readSpecialNamespaces(Element root)
588         throws ImportFilesException {
589 
590         List<String> namespaces = new ArrayList<String>();
591 
592         Element siteinfoEl = root.element("siteinfo");
593 
594         if (siteinfoEl == null) {
595             throw new ImportFilesException("Invalid pages XML file");
596         }
597 
598         Iterator<Element> itr = siteinfoEl.element(
599             "namespaces").elements("namespace").iterator();
600 
601         while (itr.hasNext()) {
602             Element namespace = itr.next();
603 
604             if (!namespace.attribute("key").getData().equals("0")) {
605                 namespaces.add(namespace.getText());
606             }
607         }
608 
609         return namespaces;
610     }
611 
612     protected String[] readTagsEntries(
613             long userId, WikiNode node, String content)
614         throws PortalException, SystemException {
615 
616         Matcher matcher = _categoriesPattern.matcher(content);
617 
618         List<String> tagsEntries = new ArrayList<String>();
619 
620         while (matcher.find()) {
621             String categoryName = matcher.group(1);
622 
623             categoryName = normalize(categoryName, 75);
624 
625             TagsEntry tagsEntry = null;
626 
627             try {
628                 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
629                     node.getCompanyId(), categoryName);
630             }
631             catch (NoSuchEntryException nsee) {
632                 ServiceContext serviceContext = new ServiceContext();
633 
634                 serviceContext.setAddCommunityPermissions(true);
635                 serviceContext.setAddGuestPermissions(true);
636                 serviceContext.setScopeGroupId(node.getGroupId());
637 
638                 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
639                     userId, null, categoryName, null, null, serviceContext);
640             }
641 
642             tagsEntries.add(tagsEntry.getName());
643         }
644 
645         if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
646             tagsEntries.add(_WORK_IN_PROGRESS_TAG);
647         }
648 
649         return tagsEntries.toArray(new String[tagsEntries.size()]);
650     }
651 
652     protected Map<String, String> readUsersFile(File usersFile)
653         throws IOException {
654 
655         if ((usersFile == null) || (!usersFile.exists())) {
656             return Collections.EMPTY_MAP;
657         }
658 
659         Map<String, String> usersMap = new HashMap<String, String>();
660 
661         BufferedReader reader = new BufferedReader(new FileReader(usersFile));
662 
663         String line = reader.readLine();
664 
665         while (line != null) {
666             String[] array = StringUtil.split(line);
667 
668             if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
669                 (Validator.isNotNull(array[1]))) {
670 
671                 usersMap.put(array[0], array[1]);
672             }
673             else {
674                 if (_log.isInfoEnabled()) {
675                     _log.info(
676                         "Ignoring line " + line +
677                             " because it does not contain exactly 2 columns");
678                 }
679             }
680 
681             line = reader.readLine();
682         }
683 
684         return usersMap;
685     }
686 
687     private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
688         "thumb", "temp", "archive"
689     };
690 
691     private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
692 
693     private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
694 
695     private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
696 
697     private static Pattern _categoriesPattern = Pattern.compile(
698         "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
699     private static Pattern _parentPattern = Pattern.compile(
700         "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
701     private static Pattern _redirectPattern = Pattern.compile(
702         "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
703 
704     private MediaWikiToCreoleTranslator _translator =
705         new MediaWikiToCreoleTranslator();
706 
707 }