001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portlet.wiki.importers.mediawiki;
016    
017    import com.liferay.portal.kernel.exception.PortalException;
018    import com.liferay.portal.kernel.exception.SystemException;
019    import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
020    import com.liferay.portal.kernel.log.Log;
021    import com.liferay.portal.kernel.log.LogFactoryUtil;
022    import com.liferay.portal.kernel.util.CharPool;
023    import com.liferay.portal.kernel.util.MapUtil;
024    import com.liferay.portal.kernel.util.ObjectValuePair;
025    import com.liferay.portal.kernel.util.ProgressTracker;
026    import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
027    import com.liferay.portal.kernel.util.SetUtil;
028    import com.liferay.portal.kernel.util.StreamUtil;
029    import com.liferay.portal.kernel.util.StringBundler;
030    import com.liferay.portal.kernel.util.StringPool;
031    import com.liferay.portal.kernel.util.StringUtil;
032    import com.liferay.portal.kernel.util.Validator;
033    import com.liferay.portal.kernel.xml.Attribute;
034    import com.liferay.portal.kernel.xml.Document;
035    import com.liferay.portal.kernel.xml.DocumentException;
036    import com.liferay.portal.kernel.xml.Element;
037    import com.liferay.portal.kernel.xml.SAXReaderUtil;
038    import com.liferay.portal.kernel.zip.ZipReader;
039    import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
040    import com.liferay.portal.model.User;
041    import com.liferay.portal.service.ServiceContext;
042    import com.liferay.portal.service.UserLocalServiceUtil;
043    import com.liferay.portal.util.PropsValues;
044    import com.liferay.portlet.asset.NoSuchTagException;
045    import com.liferay.portlet.asset.model.AssetTag;
046    import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
047    import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
048    import com.liferay.portlet.asset.util.AssetUtil;
049    import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
050    import com.liferay.portlet.wiki.ImportFilesException;
051    import com.liferay.portlet.wiki.NoSuchPageException;
052    import com.liferay.portlet.wiki.importers.WikiImporter;
053    import com.liferay.portlet.wiki.importers.WikiImporterKeys;
054    import com.liferay.portlet.wiki.model.WikiNode;
055    import com.liferay.portlet.wiki.model.WikiPage;
056    import com.liferay.portlet.wiki.model.WikiPageConstants;
057    import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
058    import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
059    
060    import java.io.IOException;
061    import java.io.InputStream;
062    import java.io.InputStreamReader;
063    
064    import java.util.ArrayList;
065    import java.util.Collections;
066    import java.util.HashMap;
067    import java.util.List;
068    import java.util.Map;
069    import java.util.Set;
070    import java.util.regex.Matcher;
071    import java.util.regex.Pattern;
072    
073    /**
074     * @author Alvaro del Castillo
075     * @author Jorge Ferrer
076     */
077    public class MediaWikiImporter implements WikiImporter {
078    
079            public static final String SHARED_IMAGES_CONTENT = "See attachments";
080    
081            public static final String SHARED_IMAGES_TITLE = "SharedImages";
082    
083            @Override
084            public void importPages(
085                            long userId, WikiNode node, InputStream[] inputStreams,
086                            Map<String, String[]> options)
087                    throws PortalException {
088    
089                    if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
090                            throw new PortalException("The pages file is mandatory");
091                    }
092    
093                    InputStream pagesInputStream = inputStreams[0];
094                    InputStream usersInputStream = inputStreams[1];
095                    InputStream imagesInputStream = inputStreams[2];
096    
097                    try {
098                            Document document = SAXReaderUtil.read(pagesInputStream);
099    
100                            Map<String, String> usersMap = readUsersFile(usersInputStream);
101    
102                            Element rootElement = document.getRootElement();
103    
104                            List<String> specialNamespaces = readSpecialNamespaces(rootElement);
105    
106                            processSpecialPages(userId, node, rootElement, specialNamespaces);
107                            processRegularPages(
108                                    userId, node, rootElement, specialNamespaces, usersMap,
109                                    imagesInputStream, options);
110                            processImages(userId, node, imagesInputStream);
111    
112                            moveFrontPage(userId, node, options);
113                    }
114                    catch (DocumentException de) {
115                            throw new ImportFilesException("Invalid XML file provided");
116                    }
117                    catch (IOException ioe) {
118                            throw new ImportFilesException("Error reading the files provided");
119                    }
120                    catch (PortalException pe) {
121                            throw pe;
122                    }
123                    catch (Exception e) {
124                            throw new PortalException(e);
125                    }
126            }
127    
128            protected long getUserId(
129                            long userId, WikiNode node, String author,
130                            Map<String, String> usersMap)
131                    throws SystemException {
132    
133                    User user = null;
134    
135                    String emailAddress = usersMap.get(author);
136    
137                    if (Validator.isNotNull(emailAddress)) {
138                            user = UserLocalServiceUtil.fetchUserByEmailAddress(
139                                    node.getCompanyId(), emailAddress);
140                    }
141                    else {
142                            user = UserLocalServiceUtil.fetchUserByScreenName(
143                                    node.getCompanyId(), author.toLowerCase());
144                    }
145    
146                    if (user != null) {
147                            return user.getUserId();
148                    }
149    
150                    return userId;
151            }
152    
153            protected void importPage(
154                            long userId, String author, WikiNode node, String title,
155                            String content, String summary, Map<String, String> usersMap,
156                            boolean strictImportMode)
157                    throws PortalException {
158    
159                    try {
160                            long authorUserId = getUserId(userId, node, author, usersMap);
161                            String parentTitle = readParentTitle(content);
162                            String redirectTitle = readRedirectTitle(content);
163    
164                            ServiceContext serviceContext = new ServiceContext();
165    
166                            serviceContext.setAddGroupPermissions(true);
167                            serviceContext.setAddGuestPermissions(true);
168                            serviceContext.setAssetTagNames(
169                                    readAssetTagNames(userId, node, content));
170    
171                            if (Validator.isNull(redirectTitle)) {
172                                    _translator.setStrictImportMode(strictImportMode);
173    
174                                    content = _translator.translate(content);
175                            }
176                            else {
177                                    content =
178                                            StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
179                                                    StringPool.DOUBLE_CLOSE_BRACKET;
180                            }
181    
182                            WikiPage page = null;
183    
184                            try {
185                                    page = WikiPageLocalServiceUtil.getPage(
186                                            node.getNodeId(), title);
187                            }
188                            catch (NoSuchPageException nspe) {
189                                    page = WikiPageLocalServiceUtil.addPage(
190                                            authorUserId, node.getNodeId(), title,
191                                            WikiPageConstants.NEW, null, true, serviceContext);
192                            }
193    
194                            WikiPageLocalServiceUtil.updatePage(
195                                    authorUserId, node.getNodeId(), title, page.getVersion(),
196                                    content, summary, true, "creole", parentTitle, redirectTitle,
197                                    serviceContext);
198                    }
199                    catch (Exception e) {
200                            throw new PortalException("Error importing page " + title, e);
201                    }
202            }
203    
204            protected boolean isSpecialMediaWikiPage(
205                    String title, List<String> specialNamespaces) {
206    
207                    for (String namespace : specialNamespaces) {
208                            if (title.startsWith(namespace + StringPool.COLON)) {
209                                    return true;
210                            }
211                    }
212    
213                    return false;
214            }
215    
216            protected boolean isValidImage(String[] paths, InputStream inputStream) {
217                    if (_specialMediaWikiDirs.contains(paths[0])) {
218                            return false;
219                    }
220    
221                    if ((paths.length > 1) && _specialMediaWikiDirs.contains(paths[1])) {
222                            return false;
223                    }
224    
225                    String fileName = paths[paths.length - 1];
226    
227                    try {
228                            DLStoreUtil.validate(fileName, true, inputStream);
229                    }
230                    catch (PortalException pe) {
231                            return false;
232                    }
233                    catch (SystemException se) {
234                            return false;
235                    }
236    
237                    return true;
238            }
239    
240            protected void moveFrontPage(
241                    long userId, WikiNode node, Map<String, String[]> options) {
242    
243                    String frontPageTitle = MapUtil.getString(
244                            options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
245    
246                    if (Validator.isNotNull(frontPageTitle)) {
247                            frontPageTitle = normalizeTitle(frontPageTitle);
248    
249                            try {
250                                    if (WikiPageLocalServiceUtil.getPagesCount(
251                                                    node.getNodeId(), frontPageTitle, true) > 0) {
252    
253                                            ServiceContext serviceContext = new ServiceContext();
254    
255                                            serviceContext.setAddGroupPermissions(true);
256                                            serviceContext.setAddGuestPermissions(true);
257    
258                                            WikiPageLocalServiceUtil.movePage(
259                                                    userId, node.getNodeId(), frontPageTitle,
260                                                    WikiPageConstants.FRONT_PAGE, false, serviceContext);
261    
262                                    }
263                            }
264                            catch (Exception e) {
265                                    if (_log.isWarnEnabled()) {
266                                            StringBundler sb = new StringBundler(4);
267    
268                                            sb.append("Could not move ");
269                                            sb.append(WikiPageConstants.FRONT_PAGE);
270                                            sb.append(" to the title provided: ");
271                                            sb.append(frontPageTitle);
272    
273                                            _log.warn(sb.toString(), e);
274                                    }
275                            }
276    
277                    }
278    
279            }
280    
281            protected String normalize(String categoryName, int length) {
282                    categoryName = AssetUtil.toWord(categoryName.trim());
283    
284                    return StringUtil.shorten(categoryName, length);
285            }
286    
287            protected String normalizeDescription(String description) {
288                    description = description.replaceAll(
289                            _categoriesPattern.pattern(), StringPool.BLANK);
290    
291                    return normalize(description, 300);
292            }
293    
294            protected String normalizeTitle(String title) {
295                    title = title.replaceAll(
296                            PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
297    
298                    return StringUtil.shorten(title, 75);
299            }
300    
301            protected void processImages(
302                            long userId, WikiNode node, InputStream imagesInputStream)
303                    throws Exception {
304    
305                    if (imagesInputStream == null) {
306                            return;
307                    }
308    
309                    ProgressTracker progressTracker =
310                            ProgressTrackerThreadLocal.getProgressTracker();
311    
312                    int count = 0;
313    
314                    ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
315                            imagesInputStream);
316    
317                    List<String> entries = zipReader.getEntries();
318    
319                    int total = entries.size();
320    
321                    if (total > 0) {
322                            try {
323                                    WikiPageLocalServiceUtil.getPage(
324                                            node.getNodeId(), SHARED_IMAGES_TITLE);
325                            }
326                            catch (NoSuchPageException nspe) {
327                                    ServiceContext serviceContext = new ServiceContext();
328    
329                                    serviceContext.setAddGroupPermissions(true);
330                                    serviceContext.setAddGuestPermissions(true);
331    
332                                    WikiPageLocalServiceUtil.addPage(
333                                            userId, node.getNodeId(), SHARED_IMAGES_TITLE,
334                                            SHARED_IMAGES_CONTENT, null, true, serviceContext);
335                            }
336                    }
337    
338                    List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
339                            new ArrayList<ObjectValuePair<String, InputStream>>();
340    
341                    try {
342                            int percentage = 50;
343    
344                            for (int i = 0; i < entries.size(); i++) {
345                                    String entry = entries.get(i);
346    
347                                    String key = entry;
348    
349                                    InputStream inputStream = zipReader.getEntryAsInputStream(
350                                            entry);
351    
352                                    String[] paths = StringUtil.split(key, CharPool.SLASH);
353    
354                                    if (!isValidImage(paths, inputStream)) {
355                                            if (_log.isInfoEnabled()) {
356                                                    _log.info("Ignoring " + key);
357                                            }
358    
359                                            continue;
360                                    }
361    
362                                    String fileName = paths[paths.length - 1].toLowerCase();
363    
364                                    ObjectValuePair<String, InputStream> inputStreamOVP =
365                                            new ObjectValuePair<String, InputStream>(
366                                                    fileName, inputStream);
367    
368                                    inputStreamOVPs.add(inputStreamOVP);
369    
370                                    count++;
371    
372                                    if ((i % 5) == 0) {
373                                            WikiPageLocalServiceUtil.addPageAttachments(
374                                                    userId, node.getNodeId(), SHARED_IMAGES_TITLE,
375                                                    inputStreamOVPs);
376    
377                                            inputStreamOVPs.clear();
378    
379                                            percentage = Math.min(50 + (i * 50) / total, 99);
380    
381                                            progressTracker.setPercent(percentage);
382                                    }
383                            }
384    
385                            if (!inputStreamOVPs.isEmpty()) {
386                                    WikiPageLocalServiceUtil.addPageAttachments(
387                                            userId, node.getNodeId(), SHARED_IMAGES_TITLE,
388                                            inputStreamOVPs);
389                            }
390                    }
391                    finally {
392                            for (ObjectValuePair<String, InputStream> inputStreamOVP :
393                                            inputStreamOVPs) {
394    
395                                    InputStream inputStream = inputStreamOVP.getValue();
396    
397                                    StreamUtil.cleanUp(inputStream);
398                            }
399                    }
400    
401                    zipReader.close();
402    
403                    if (_log.isInfoEnabled()) {
404                            _log.info("Imported " + count + " images into " + node.getName());
405                    }
406            }
407    
408            protected void processRegularPages(
409                    long userId, WikiNode node, Element rootElement,
410                    List<String> specialNamespaces, Map<String, String> usersMap,
411                    InputStream imagesInputStream, Map<String, String[]> options) {
412    
413                    boolean importLatestVersion = MapUtil.getBoolean(
414                            options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
415                    boolean strictImportMode = MapUtil.getBoolean(
416                            options, WikiImporterKeys.OPTIONS_STRICT_IMPORT_MODE);
417    
418                    ProgressTracker progressTracker =
419                            ProgressTrackerThreadLocal.getProgressTracker();
420    
421                    int count = 0;
422    
423                    int percentage = 10;
424    
425                    int maxPercentage = 50;
426    
427                    if (imagesInputStream == null) {
428                            maxPercentage = 99;
429                    }
430    
431                    List<Element> pageElements = rootElement.elements("page");
432    
433                    for (int i = 0; i < pageElements.size(); i++) {
434                            Element pageElement = pageElements.get(i);
435    
436                            String title = pageElement.elementText("title");
437    
438                            title = normalizeTitle(title);
439    
440                            percentage = Math.min(
441                                    10 + (i * (maxPercentage - percentage)) / pageElements.size(),
442                                    maxPercentage);
443    
444                            progressTracker.setPercent(percentage);
445    
446                            if (isSpecialMediaWikiPage(title, specialNamespaces)) {
447                                    continue;
448                            }
449    
450                            List<Element> revisionElements = pageElement.elements("revision");
451    
452                            if (importLatestVersion) {
453                                    Element lastRevisionElement = revisionElements.get(
454                                            revisionElements.size() - 1);
455    
456                                    revisionElements = new ArrayList<Element>();
457    
458                                    revisionElements.add(lastRevisionElement);
459                            }
460    
461                            for (Element revisionElement : revisionElements) {
462                                    Element contributorElement = revisionElement.element(
463                                            "contributor");
464    
465                                    String author = contributorElement.elementText("username");
466    
467                                    String content = revisionElement.elementText("text");
468                                    String summary = revisionElement.elementText("comment");
469    
470                                    try {
471                                            importPage(
472                                                    userId, author, node, title, content, summary, usersMap,
473                                                    strictImportMode);
474                                    }
475                                    catch (Exception e) {
476                                            if (_log.isWarnEnabled()) {
477                                                    _log.warn(
478                                                            "Page with title " + title +
479                                                                    " could not be imported",
480                                                            e);
481                                            }
482                                    }
483                            }
484    
485                            count++;
486                    }
487    
488                    if (_log.isInfoEnabled()) {
489                            _log.info("Imported " + count + " pages into " + node.getName());
490                    }
491            }
492    
493            protected void processSpecialPages(
494                            long userId, WikiNode node, Element rootElement,
495                            List<String> specialNamespaces)
496                    throws PortalException {
497    
498                    ProgressTracker progressTracker =
499                            ProgressTrackerThreadLocal.getProgressTracker();
500    
501                    List<Element> pageElements = rootElement.elements("page");
502    
503                    for (int i = 0; i < pageElements.size(); i++) {
504                            Element pageElement = pageElements.get(i);
505    
506                            String title = pageElement.elementText("title");
507    
508                            if (!title.startsWith("Category:")) {
509                                    if (isSpecialMediaWikiPage(title, specialNamespaces)) {
510                                            rootElement.remove(pageElement);
511                                    }
512    
513                                    continue;
514                            }
515    
516                            String categoryName = title.substring("Category:".length());
517    
518                            categoryName = normalize(categoryName, 75);
519    
520                            Element revisionElement = pageElement.element("revision");
521    
522                            String description = revisionElement.elementText("text");
523    
524                            description = normalizeDescription(description);
525    
526                            try {
527                                    AssetTag assetTag = null;
528    
529                                    try {
530                                            assetTag = AssetTagLocalServiceUtil.getTag(
531                                                    node.getCompanyId(), categoryName);
532                                    }
533                                    catch (NoSuchTagException nste) {
534                                            ServiceContext serviceContext = new ServiceContext();
535    
536                                            serviceContext.setAddGroupPermissions(true);
537                                            serviceContext.setAddGuestPermissions(true);
538                                            serviceContext.setScopeGroupId(node.getGroupId());
539    
540                                            assetTag = AssetTagLocalServiceUtil.addTag(
541                                                    userId, categoryName, null, serviceContext);
542                                    }
543    
544                                    if (Validator.isNotNull(description)) {
545                                            AssetTagPropertyLocalServiceUtil.addTagProperty(
546                                                    userId, assetTag.getTagId(), "description",
547                                                    description);
548                                    }
549                            }
550                            catch (SystemException se) {
551                                    _log.error(se, se);
552                            }
553    
554                            if ((i % 5) == 0) {
555                                    progressTracker.setPercent((i * 10) / pageElements.size());
556                            }
557                    }
558            }
559    
560            protected String[] readAssetTagNames(
561                            long userId, WikiNode node, String content)
562                    throws PortalException, SystemException {
563    
564                    Matcher matcher = _categoriesPattern.matcher(content);
565    
566                    List<String> assetTagNames = new ArrayList<String>();
567    
568                    while (matcher.find()) {
569                            String categoryName = matcher.group(1);
570    
571                            categoryName = normalize(categoryName, 75);
572    
573                            AssetTag assetTag = null;
574    
575                            try {
576                                    assetTag = AssetTagLocalServiceUtil.getTag(
577                                            node.getGroupId(), categoryName);
578                            }
579                            catch (NoSuchTagException nste) {
580                                    ServiceContext serviceContext = new ServiceContext();
581    
582                                    serviceContext.setAddGroupPermissions(true);
583                                    serviceContext.setAddGuestPermissions(true);
584                                    serviceContext.setScopeGroupId(node.getGroupId());
585    
586                                    assetTag = AssetTagLocalServiceUtil.addTag(
587                                            userId, categoryName, null, serviceContext);
588                            }
589    
590                            assetTagNames.add(assetTag.getName());
591                    }
592    
593                    if (content.contains(_WORK_IN_PROGRESS)) {
594                            assetTagNames.add(_WORK_IN_PROGRESS_TAG);
595                    }
596    
597                    return assetTagNames.toArray(new String[assetTagNames.size()]);
598            }
599    
600            protected String readParentTitle(String content) {
601                    Matcher matcher = _parentPattern.matcher(content);
602    
603                    String redirectTitle = StringPool.BLANK;
604    
605                    if (matcher.find()) {
606                            redirectTitle = matcher.group(1);
607    
608                            redirectTitle = normalizeTitle(redirectTitle);
609    
610                            redirectTitle += " (disambiguation)";
611                    }
612    
613                    return redirectTitle;
614            }
615    
616            protected String readRedirectTitle(String content) {
617                    Matcher matcher = _redirectPattern.matcher(content);
618    
619                    String redirectTitle = StringPool.BLANK;
620    
621                    if (matcher.find()) {
622                            redirectTitle = matcher.group(1);
623    
624                            redirectTitle = normalizeTitle(redirectTitle);
625                    }
626    
627                    return redirectTitle;
628            }
629    
630            protected List<String> readSpecialNamespaces(Element root)
631                    throws ImportFilesException {
632    
633                    List<String> namespaces = new ArrayList<String>();
634    
635                    Element siteinfoElement = root.element("siteinfo");
636    
637                    if (siteinfoElement == null) {
638                            throw new ImportFilesException("Invalid pages XML file");
639                    }
640    
641                    Element namespacesElement = siteinfoElement.element("namespaces");
642    
643                    List<Element> namespaceElements = namespacesElement.elements(
644                            "namespace");
645    
646                    for (Element namespaceElement : namespaceElements) {
647                            Attribute attribute = namespaceElement.attribute("key");
648    
649                            String value = attribute.getValue();
650    
651                            if (!value.equals("0")) {
652                                    namespaces.add(namespaceElement.getText());
653                            }
654                    }
655    
656                    return namespaces;
657            }
658    
659            protected Map<String, String> readUsersFile(InputStream usersInputStream)
660                    throws IOException {
661    
662                    if (usersInputStream == null) {
663                            return Collections.emptyMap();
664                    }
665    
666                    Map<String, String> usersMap = new HashMap<String, String>();
667    
668                    UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
669                            new InputStreamReader(usersInputStream));
670    
671                    String line = unsyncBufferedReader.readLine();
672    
673                    while (line != null) {
674                            String[] array = StringUtil.split(line);
675    
676                            if ((array.length == 2) && Validator.isNotNull(array[0]) &&
677                                    Validator.isNotNull(array[1])) {
678    
679                                    usersMap.put(array[0], array[1]);
680                            }
681                            else {
682                                    if (_log.isInfoEnabled()) {
683                                            _log.info(
684                                                    "Ignoring line " + line +
685                                                            " because it does not contain exactly 2 columns");
686                                    }
687                            }
688    
689                            line = unsyncBufferedReader.readLine();
690                    }
691    
692                    return usersMap;
693            }
694    
695            private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
696    
697            private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
698    
699            private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
700    
701            private static Pattern _categoriesPattern = Pattern.compile(
702                    "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
703            private static Pattern _parentPattern = Pattern.compile(
704                    "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
705            private static Pattern _redirectPattern = Pattern.compile(
706                    "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
707            private static Set<String> _specialMediaWikiDirs = SetUtil.fromArray(
708                    new String[] {"archive", "temp", "thumb"});
709    
710            private MediaWikiToCreoleTranslator _translator =
711                    new MediaWikiToCreoleTranslator();
712    
713    }