001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portlet.wiki.importers.mediawiki;
016    
017    import com.liferay.portal.kernel.exception.PortalException;
018    import com.liferay.portal.kernel.exception.SystemException;
019    import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
020    import com.liferay.portal.kernel.log.Log;
021    import com.liferay.portal.kernel.log.LogFactoryUtil;
022    import com.liferay.portal.kernel.util.CharPool;
023    import com.liferay.portal.kernel.util.MapUtil;
024    import com.liferay.portal.kernel.util.ObjectValuePair;
025    import com.liferay.portal.kernel.util.ProgressTracker;
026    import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
027    import com.liferay.portal.kernel.util.SetUtil;
028    import com.liferay.portal.kernel.util.StreamUtil;
029    import com.liferay.portal.kernel.util.StringBundler;
030    import com.liferay.portal.kernel.util.StringPool;
031    import com.liferay.portal.kernel.util.StringUtil;
032    import com.liferay.portal.kernel.util.Validator;
033    import com.liferay.portal.kernel.xml.Attribute;
034    import com.liferay.portal.kernel.xml.Document;
035    import com.liferay.portal.kernel.xml.DocumentException;
036    import com.liferay.portal.kernel.xml.Element;
037    import com.liferay.portal.kernel.xml.SAXReaderUtil;
038    import com.liferay.portal.kernel.zip.ZipReader;
039    import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
040    import com.liferay.portal.model.User;
041    import com.liferay.portal.service.ServiceContext;
042    import com.liferay.portal.service.UserLocalServiceUtil;
043    import com.liferay.portal.util.PropsValues;
044    import com.liferay.portlet.asset.NoSuchTagException;
045    import com.liferay.portlet.asset.model.AssetTag;
046    import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
047    import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
048    import com.liferay.portlet.asset.util.AssetUtil;
049    import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
050    import com.liferay.portlet.wiki.ImportFilesException;
051    import com.liferay.portlet.wiki.NoSuchPageException;
052    import com.liferay.portlet.wiki.importers.WikiImporter;
053    import com.liferay.portlet.wiki.importers.WikiImporterKeys;
054    import com.liferay.portlet.wiki.model.WikiNode;
055    import com.liferay.portlet.wiki.model.WikiPage;
056    import com.liferay.portlet.wiki.model.WikiPageConstants;
057    import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
058    import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
059    
060    import java.io.IOException;
061    import java.io.InputStream;
062    import java.io.InputStreamReader;
063    
064    import java.util.ArrayList;
065    import java.util.Collections;
066    import java.util.HashMap;
067    import java.util.List;
068    import java.util.Map;
069    import java.util.Set;
070    import java.util.regex.Matcher;
071    import java.util.regex.Pattern;
072    
073    /**
074     * @author Alvaro del Castillo
075     * @author Jorge Ferrer
076     */
077    public class MediaWikiImporter implements WikiImporter {
078    
079            public static final String SHARED_IMAGES_CONTENT = "See attachments";
080    
081            public static final String SHARED_IMAGES_TITLE = "SharedImages";
082    
083            @Override
084            public void importPages(
085                            long userId, WikiNode node, InputStream[] inputStreams,
086                            Map<String, String[]> options)
087                    throws PortalException {
088    
089                    if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
090                            throw new PortalException("The pages file is mandatory");
091                    }
092    
093                    InputStream pagesInputStream = inputStreams[0];
094                    InputStream usersInputStream = inputStreams[1];
095                    InputStream imagesInputStream = inputStreams[2];
096    
097                    try {
098                            Document document = SAXReaderUtil.read(pagesInputStream);
099    
100                            Map<String, String> usersMap = readUsersFile(usersInputStream);
101    
102                            Element rootElement = document.getRootElement();
103    
104                            List<String> specialNamespaces = readSpecialNamespaces(rootElement);
105    
106                            processSpecialPages(userId, node, rootElement, specialNamespaces);
107                            processRegularPages(
108                                    userId, node, rootElement, specialNamespaces, usersMap,
109                                    imagesInputStream, options);
110                            processImages(userId, node, imagesInputStream);
111    
112                            moveFrontPage(userId, node, options);
113                    }
114                    catch (DocumentException de) {
115                            throw new ImportFilesException("Invalid XML file provided");
116                    }
117                    catch (IOException ioe) {
118                            throw new ImportFilesException("Error reading the files provided");
119                    }
120                    catch (PortalException pe) {
121                            throw pe;
122                    }
123                    catch (Exception e) {
124                            throw new PortalException(e);
125                    }
126            }
127    
128            protected long getUserId(
129                            long userId, WikiNode node, String author,
130                            Map<String, String> usersMap)
131                    throws SystemException {
132    
133                    User user = null;
134    
135                    String emailAddress = usersMap.get(author);
136    
137                    if (Validator.isNotNull(emailAddress)) {
138                            user = UserLocalServiceUtil.fetchUserByEmailAddress(
139                                    node.getCompanyId(), emailAddress);
140                    }
141                    else {
142                            user = UserLocalServiceUtil.fetchUserByScreenName(
143                                    node.getCompanyId(), author.toLowerCase());
144                    }
145    
146                    if (user != null) {
147                            return user.getUserId();
148                    }
149    
150                    return userId;
151            }
152    
153            protected void importPage(
154                            long userId, String author, WikiNode node, String title,
155                            String content, String summary, Map<String, String> usersMap,
156                            boolean strictImportMode)
157                    throws PortalException {
158    
159                    try {
160                            long authorUserId = getUserId(userId, node, author, usersMap);
161                            String parentTitle = readParentTitle(content);
162                            String redirectTitle = readRedirectTitle(content);
163    
164                            ServiceContext serviceContext = new ServiceContext();
165    
166                            serviceContext.setAddGroupPermissions(true);
167                            serviceContext.setAddGuestPermissions(true);
168                            serviceContext.setAssetTagNames(
169                                    readAssetTagNames(userId, node, content));
170    
171                            if (Validator.isNull(redirectTitle)) {
172                                    _translator.setStrictImportMode(strictImportMode);
173    
174                                    content = _translator.translate(content);
175                            }
176                            else {
177                                    content =
178                                            StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
179                                                    StringPool.DOUBLE_CLOSE_BRACKET;
180                            }
181    
182                            WikiPage page = null;
183    
184                            try {
185                                    page = WikiPageLocalServiceUtil.getPage(
186                                            node.getNodeId(), title);
187                            }
188                            catch (NoSuchPageException nspe) {
189                                    page = WikiPageLocalServiceUtil.addPage(
190                                            authorUserId, node.getNodeId(), title,
191                                            WikiPageConstants.NEW, null, true, serviceContext);
192                            }
193    
194                            WikiPageLocalServiceUtil.updatePage(
195                                    authorUserId, node.getNodeId(), title, page.getVersion(),
196                                    content, summary, true, "creole", parentTitle, redirectTitle,
197                                    serviceContext);
198                    }
199                    catch (Exception e) {
200                            throw new PortalException("Error importing page " + title, e);
201                    }
202            }
203    
204            protected boolean isSpecialMediaWikiPage(
205                    String title, List<String> specialNamespaces) {
206    
207                    for (String namespace : specialNamespaces) {
208                            if (title.startsWith(namespace + StringPool.COLON)) {
209                                    return true;
210                            }
211                    }
212    
213                    return false;
214            }
215    
216            protected boolean isValidImage(String[] paths, InputStream inputStream) {
217                    if (_specialMediaWikiDirs.contains(paths[0])) {
218                            return false;
219                    }
220    
221                    if ((paths.length > 1) && _specialMediaWikiDirs.contains(paths[1])) {
222                            return false;
223                    }
224    
225                    String fileName = paths[paths.length - 1];
226    
227                    try {
228                            DLStoreUtil.validate(fileName, true, inputStream);
229                    }
230                    catch (PortalException pe) {
231                            return false;
232                    }
233                    catch (SystemException se) {
234                            return false;
235                    }
236    
237                    return true;
238            }
239    
240            protected void moveFrontPage(
241                    long userId, WikiNode node, Map<String, String[]> options) {
242    
243                    String frontPageTitle = MapUtil.getString(
244                            options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
245    
246                    if (Validator.isNotNull(frontPageTitle)) {
247                            frontPageTitle = normalizeTitle(frontPageTitle);
248    
249                            try {
250                                    if (WikiPageLocalServiceUtil.getPagesCount(
251                                                    node.getNodeId(), frontPageTitle, true) > 0) {
252    
253                                            ServiceContext serviceContext = new ServiceContext();
254    
255                                            serviceContext.setAddGroupPermissions(true);
256                                            serviceContext.setAddGuestPermissions(true);
257    
258                                            WikiPageLocalServiceUtil.movePage(
259                                                    userId, node.getNodeId(), frontPageTitle,
260                                                    WikiPageConstants.FRONT_PAGE, false, serviceContext);
261                                    }
262                            }
263                            catch (Exception e) {
264                                    if (_log.isWarnEnabled()) {
265                                            StringBundler sb = new StringBundler(4);
266    
267                                            sb.append("Could not move ");
268                                            sb.append(WikiPageConstants.FRONT_PAGE);
269                                            sb.append(" to the title provided: ");
270                                            sb.append(frontPageTitle);
271    
272                                            _log.warn(sb.toString(), e);
273                                    }
274                            }
275                    }
276            }
277    
278            protected String normalize(String categoryName, int length) {
279                    categoryName = AssetUtil.toWord(categoryName.trim());
280    
281                    return StringUtil.shorten(categoryName, length);
282            }
283    
284            protected String normalizeDescription(String description) {
285                    description = description.replaceAll(
286                            _categoriesPattern.pattern(), StringPool.BLANK);
287    
288                    return normalize(description, 300);
289            }
290    
291            protected String normalizeTitle(String title) {
292                    title = title.replaceAll(
293                            PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
294    
295                    return StringUtil.shorten(title, 75);
296            }
297    
298            protected void processImages(
299                            long userId, WikiNode node, InputStream imagesInputStream)
300                    throws Exception {
301    
302                    if (imagesInputStream == null) {
303                            return;
304                    }
305    
306                    ProgressTracker progressTracker =
307                            ProgressTrackerThreadLocal.getProgressTracker();
308    
309                    int count = 0;
310    
311                    ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
312                            imagesInputStream);
313    
314                    List<String> entries = zipReader.getEntries();
315    
316                    int total = entries.size();
317    
318                    if (total > 0) {
319                            try {
320                                    WikiPageLocalServiceUtil.getPage(
321                                            node.getNodeId(), SHARED_IMAGES_TITLE);
322                            }
323                            catch (NoSuchPageException nspe) {
324                                    ServiceContext serviceContext = new ServiceContext();
325    
326                                    serviceContext.setAddGroupPermissions(true);
327                                    serviceContext.setAddGuestPermissions(true);
328    
329                                    WikiPageLocalServiceUtil.addPage(
330                                            userId, node.getNodeId(), SHARED_IMAGES_TITLE,
331                                            SHARED_IMAGES_CONTENT, null, true, serviceContext);
332                            }
333                    }
334    
335                    List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
336                            new ArrayList<ObjectValuePair<String, InputStream>>();
337    
338                    try {
339                            int percentage = 50;
340    
341                            for (int i = 0; i < entries.size(); i++) {
342                                    String entry = entries.get(i);
343    
344                                    String key = entry;
345    
346                                    InputStream inputStream = zipReader.getEntryAsInputStream(
347                                            entry);
348    
349                                    String[] paths = StringUtil.split(key, CharPool.SLASH);
350    
351                                    if (!isValidImage(paths, inputStream)) {
352                                            if (_log.isInfoEnabled()) {
353                                                    _log.info("Ignoring " + key);
354                                            }
355    
356                                            continue;
357                                    }
358    
359                                    String fileName = paths[paths.length - 1].toLowerCase();
360    
361                                    ObjectValuePair<String, InputStream> inputStreamOVP =
362                                            new ObjectValuePair<String, InputStream>(
363                                                    fileName, inputStream);
364    
365                                    inputStreamOVPs.add(inputStreamOVP);
366    
367                                    count++;
368    
369                                    if ((i % 5) == 0) {
370                                            WikiPageLocalServiceUtil.addPageAttachments(
371                                                    userId, node.getNodeId(), SHARED_IMAGES_TITLE,
372                                                    inputStreamOVPs);
373    
374                                            inputStreamOVPs.clear();
375    
376                                            percentage = Math.min(50 + (i * 50) / total, 99);
377    
378                                            progressTracker.setPercent(percentage);
379                                    }
380                            }
381    
382                            if (!inputStreamOVPs.isEmpty()) {
383                                    WikiPageLocalServiceUtil.addPageAttachments(
384                                            userId, node.getNodeId(), SHARED_IMAGES_TITLE,
385                                            inputStreamOVPs);
386                            }
387                    }
388                    finally {
389                            for (ObjectValuePair<String, InputStream> inputStreamOVP :
390                                            inputStreamOVPs) {
391    
392                                    InputStream inputStream = inputStreamOVP.getValue();
393    
394                                    StreamUtil.cleanUp(inputStream);
395                            }
396                    }
397    
398                    zipReader.close();
399    
400                    if (_log.isInfoEnabled()) {
401                            _log.info("Imported " + count + " images into " + node.getName());
402                    }
403            }
404    
405            protected void processRegularPages(
406                    long userId, WikiNode node, Element rootElement,
407                    List<String> specialNamespaces, Map<String, String> usersMap,
408                    InputStream imagesInputStream, Map<String, String[]> options) {
409    
410                    boolean importLatestVersion = MapUtil.getBoolean(
411                            options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
412                    boolean strictImportMode = MapUtil.getBoolean(
413                            options, WikiImporterKeys.OPTIONS_STRICT_IMPORT_MODE);
414    
415                    ProgressTracker progressTracker =
416                            ProgressTrackerThreadLocal.getProgressTracker();
417    
418                    int count = 0;
419    
420                    int percentage = 10;
421    
422                    int maxPercentage = 50;
423    
424                    if (imagesInputStream == null) {
425                            maxPercentage = 99;
426                    }
427    
428                    List<Element> pageElements = rootElement.elements("page");
429    
430                    for (int i = 0; i < pageElements.size(); i++) {
431                            Element pageElement = pageElements.get(i);
432    
433                            String title = pageElement.elementText("title");
434    
435                            title = normalizeTitle(title);
436    
437                            percentage = Math.min(
438                                    10 + (i * (maxPercentage - percentage)) / pageElements.size(),
439                                    maxPercentage);
440    
441                            progressTracker.setPercent(percentage);
442    
443                            if (isSpecialMediaWikiPage(title, specialNamespaces)) {
444                                    continue;
445                            }
446    
447                            List<Element> revisionElements = pageElement.elements("revision");
448    
449                            if (importLatestVersion) {
450                                    Element lastRevisionElement = revisionElements.get(
451                                            revisionElements.size() - 1);
452    
453                                    revisionElements = new ArrayList<Element>();
454    
455                                    revisionElements.add(lastRevisionElement);
456                            }
457    
458                            for (Element revisionElement : revisionElements) {
459                                    Element contributorElement = revisionElement.element(
460                                            "contributor");
461    
462                                    String author = contributorElement.elementText("username");
463    
464                                    String content = revisionElement.elementText("text");
465                                    String summary = revisionElement.elementText("comment");
466    
467                                    try {
468                                            importPage(
469                                                    userId, author, node, title, content, summary, usersMap,
470                                                    strictImportMode);
471                                    }
472                                    catch (Exception e) {
473                                            if (_log.isWarnEnabled()) {
474                                                    _log.warn(
475                                                            "Page with title " + title +
476                                                                    " could not be imported",
477                                                            e);
478                                            }
479                                    }
480                            }
481    
482                            count++;
483                    }
484    
485                    if (_log.isInfoEnabled()) {
486                            _log.info("Imported " + count + " pages into " + node.getName());
487                    }
488            }
489    
490            protected void processSpecialPages(
491                            long userId, WikiNode node, Element rootElement,
492                            List<String> specialNamespaces)
493                    throws PortalException {
494    
495                    ProgressTracker progressTracker =
496                            ProgressTrackerThreadLocal.getProgressTracker();
497    
498                    List<Element> pageElements = rootElement.elements("page");
499    
500                    for (int i = 0; i < pageElements.size(); i++) {
501                            Element pageElement = pageElements.get(i);
502    
503                            String title = pageElement.elementText("title");
504    
505                            if (!title.startsWith("Category:")) {
506                                    if (isSpecialMediaWikiPage(title, specialNamespaces)) {
507                                            rootElement.remove(pageElement);
508                                    }
509    
510                                    continue;
511                            }
512    
513                            String categoryName = title.substring("Category:".length());
514    
515                            categoryName = normalize(categoryName, 75);
516    
517                            Element revisionElement = pageElement.element("revision");
518    
519                            String description = revisionElement.elementText("text");
520    
521                            description = normalizeDescription(description);
522    
523                            try {
524                                    AssetTag assetTag = null;
525    
526                                    try {
527                                            assetTag = AssetTagLocalServiceUtil.getTag(
528                                                    node.getCompanyId(), categoryName);
529                                    }
530                                    catch (NoSuchTagException nste) {
531                                            ServiceContext serviceContext = new ServiceContext();
532    
533                                            serviceContext.setAddGroupPermissions(true);
534                                            serviceContext.setAddGuestPermissions(true);
535                                            serviceContext.setScopeGroupId(node.getGroupId());
536    
537                                            assetTag = AssetTagLocalServiceUtil.addTag(
538                                                    userId, categoryName, null, serviceContext);
539                                    }
540    
541                                    if (Validator.isNotNull(description)) {
542                                            AssetTagPropertyLocalServiceUtil.addTagProperty(
543                                                    userId, assetTag.getTagId(), "description",
544                                                    description);
545                                    }
546                            }
547                            catch (SystemException se) {
548                                    _log.error(se, se);
549                            }
550    
551                            if ((i % 5) == 0) {
552                                    progressTracker.setPercent((i * 10) / pageElements.size());
553                            }
554                    }
555            }
556    
557            protected String[] readAssetTagNames(
558                            long userId, WikiNode node, String content)
559                    throws PortalException, SystemException {
560    
561                    Matcher matcher = _categoriesPattern.matcher(content);
562    
563                    List<String> assetTagNames = new ArrayList<String>();
564    
565                    while (matcher.find()) {
566                            String categoryName = matcher.group(1);
567    
568                            categoryName = normalize(categoryName, 75);
569    
570                            AssetTag assetTag = null;
571    
572                            try {
573                                    assetTag = AssetTagLocalServiceUtil.getTag(
574                                            node.getGroupId(), categoryName);
575                            }
576                            catch (NoSuchTagException nste) {
577                                    ServiceContext serviceContext = new ServiceContext();
578    
579                                    serviceContext.setAddGroupPermissions(true);
580                                    serviceContext.setAddGuestPermissions(true);
581                                    serviceContext.setScopeGroupId(node.getGroupId());
582    
583                                    assetTag = AssetTagLocalServiceUtil.addTag(
584                                            userId, categoryName, null, serviceContext);
585                            }
586    
587                            assetTagNames.add(assetTag.getName());
588                    }
589    
590                    if (content.contains(_WORK_IN_PROGRESS)) {
591                            assetTagNames.add(_WORK_IN_PROGRESS_TAG);
592                    }
593    
594                    return assetTagNames.toArray(new String[assetTagNames.size()]);
595            }
596    
597            protected String readParentTitle(String content) {
598                    Matcher matcher = _parentPattern.matcher(content);
599    
600                    String redirectTitle = StringPool.BLANK;
601    
602                    if (matcher.find()) {
603                            redirectTitle = matcher.group(1);
604    
605                            redirectTitle = normalizeTitle(redirectTitle);
606    
607                            redirectTitle += " (disambiguation)";
608                    }
609    
610                    return redirectTitle;
611            }
612    
613            protected String readRedirectTitle(String content) {
614                    Matcher matcher = _redirectPattern.matcher(content);
615    
616                    String redirectTitle = StringPool.BLANK;
617    
618                    if (matcher.find()) {
619                            redirectTitle = matcher.group(1);
620    
621                            redirectTitle = normalizeTitle(redirectTitle);
622                    }
623    
624                    return redirectTitle;
625            }
626    
627            protected List<String> readSpecialNamespaces(Element root)
628                    throws ImportFilesException {
629    
630                    List<String> namespaces = new ArrayList<String>();
631    
632                    Element siteinfoElement = root.element("siteinfo");
633    
634                    if (siteinfoElement == null) {
635                            throw new ImportFilesException("Invalid pages XML file");
636                    }
637    
638                    Element namespacesElement = siteinfoElement.element("namespaces");
639    
640                    List<Element> namespaceElements = namespacesElement.elements(
641                            "namespace");
642    
643                    for (Element namespaceElement : namespaceElements) {
644                            Attribute attribute = namespaceElement.attribute("key");
645    
646                            String value = attribute.getValue();
647    
648                            if (!value.equals("0")) {
649                                    namespaces.add(namespaceElement.getText());
650                            }
651                    }
652    
653                    return namespaces;
654            }
655    
656            protected Map<String, String> readUsersFile(InputStream usersInputStream)
657                    throws IOException {
658    
659                    if (usersInputStream == null) {
660                            return Collections.emptyMap();
661                    }
662    
663                    Map<String, String> usersMap = new HashMap<String, String>();
664    
665                    UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
666                            new InputStreamReader(usersInputStream));
667    
668                    String line = unsyncBufferedReader.readLine();
669    
670                    while (line != null) {
671                            String[] array = StringUtil.split(line);
672    
673                            if ((array.length == 2) && Validator.isNotNull(array[0]) &&
674                                    Validator.isNotNull(array[1])) {
675    
676                                    usersMap.put(array[0], array[1]);
677                            }
678                            else {
679                                    if (_log.isInfoEnabled()) {
680                                            _log.info(
681                                                    "Ignoring line " + line +
682                                                            " because it does not contain exactly 2 columns");
683                                    }
684                            }
685    
686                            line = unsyncBufferedReader.readLine();
687                    }
688    
689                    return usersMap;
690            }
691    
692            private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
693    
694            private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
695    
696            private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
697    
698            private static Pattern _categoriesPattern = Pattern.compile(
699                    "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
700            private static Pattern _parentPattern = Pattern.compile(
701                    "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
702            private static Pattern _redirectPattern = Pattern.compile(
703                    "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
704            private static Set<String> _specialMediaWikiDirs = SetUtil.fromArray(
705                    new String[] {"archive", "temp", "thumb"});
706    
707            private MediaWikiToCreoleTranslator _translator =
708                    new MediaWikiToCreoleTranslator();
709    
710    }