001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portlet.wiki.importers.mediawiki;
016    
017    import com.liferay.portal.NoSuchUserException;
018    import com.liferay.portal.kernel.exception.PortalException;
019    import com.liferay.portal.kernel.exception.SystemException;
020    import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
021    import com.liferay.portal.kernel.log.Log;
022    import com.liferay.portal.kernel.log.LogFactoryUtil;
023    import com.liferay.portal.kernel.util.ArrayUtil;
024    import com.liferay.portal.kernel.util.CharPool;
025    import com.liferay.portal.kernel.util.MapUtil;
026    import com.liferay.portal.kernel.util.ObjectValuePair;
027    import com.liferay.portal.kernel.util.ProgressTracker;
028    import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
029    import com.liferay.portal.kernel.util.StreamUtil;
030    import com.liferay.portal.kernel.util.StringBundler;
031    import com.liferay.portal.kernel.util.StringPool;
032    import com.liferay.portal.kernel.util.StringUtil;
033    import com.liferay.portal.kernel.util.Validator;
034    import com.liferay.portal.kernel.xml.Document;
035    import com.liferay.portal.kernel.xml.DocumentException;
036    import com.liferay.portal.kernel.xml.Element;
037    import com.liferay.portal.kernel.xml.SAXReaderUtil;
038    import com.liferay.portal.kernel.zip.ZipReader;
039    import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
040    import com.liferay.portal.model.User;
041    import com.liferay.portal.service.ServiceContext;
042    import com.liferay.portal.service.UserLocalServiceUtil;
043    import com.liferay.portal.util.PropsValues;
044    import com.liferay.portlet.asset.NoSuchTagException;
045    import com.liferay.portlet.asset.model.AssetTag;
046    import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
047    import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
048    import com.liferay.portlet.asset.util.AssetUtil;
049    import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
050    import com.liferay.portlet.wiki.ImportFilesException;
051    import com.liferay.portlet.wiki.NoSuchPageException;
052    import com.liferay.portlet.wiki.importers.WikiImporter;
053    import com.liferay.portlet.wiki.importers.WikiImporterKeys;
054    import com.liferay.portlet.wiki.model.WikiNode;
055    import com.liferay.portlet.wiki.model.WikiPage;
056    import com.liferay.portlet.wiki.model.WikiPageConstants;
057    import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
058    import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
059    
060    import java.io.IOException;
061    import java.io.InputStream;
062    import java.io.InputStreamReader;
063    
064    import java.util.ArrayList;
065    import java.util.Collections;
066    import java.util.HashMap;
067    import java.util.Iterator;
068    import java.util.List;
069    import java.util.Map;
070    import java.util.regex.Matcher;
071    import java.util.regex.Pattern;
072    
073    /**
074     * @author Alvaro del Castillo
075     * @author Jorge Ferrer
076     */
077    public class MediaWikiImporter implements WikiImporter {
078    
079            public static final String SHARED_IMAGES_CONTENT = "See attachments";
080    
081            public static final String SHARED_IMAGES_TITLE = "SharedImages";
082    
083            public void importPages(
084                            long userId, WikiNode node, InputStream[] inputStreams,
085                            Map<String, String[]> options)
086                    throws PortalException {
087    
088                    if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
089                            throw new PortalException("The pages file is mandatory");
090                    }
091    
092                    InputStream pagesInputStream = inputStreams[0];
093                    InputStream usersInputStream = inputStreams[1];
094                    InputStream imagesInputStream = inputStreams[2];
095    
096                    try {
097                            Document doc = SAXReaderUtil.read(pagesInputStream);
098    
099                            Map<String, String> usersMap = readUsersFile(usersInputStream);
100    
101                            Element root = doc.getRootElement();
102    
103                            List<String> specialNamespaces = readSpecialNamespaces(root);
104    
105                            processSpecialPages(userId, node, root, specialNamespaces);
106                            processRegularPages(
107                                    userId, node, root, specialNamespaces, usersMap,
108                                    imagesInputStream, options);
109                            processImages(userId, node, imagesInputStream);
110    
111                            moveFrontPage(userId, node, options);
112                    }
113                    catch (DocumentException de) {
114                            throw new ImportFilesException("Invalid XML file provided");
115                    }
116                    catch (IOException de) {
117                            throw new ImportFilesException("Error reading the files provided");
118                    }
119                    catch (PortalException e) {
120                            throw e;
121                    }
122                    catch (Exception e) {
123                            throw new PortalException(e);
124                    }
125            }
126    
127            protected long getUserId(
128                            long userId, WikiNode node, String author,
129                            Map<String, String> usersMap)
130                    throws PortalException, SystemException {
131    
132                    User user = null;
133    
134                    String emailAddress = usersMap.get(author);
135    
136                    try {
137                            if (Validator.isNull(emailAddress)) {
138                                    user = UserLocalServiceUtil.getUserByScreenName(
139                                            node.getCompanyId(), author.toLowerCase());
140                            }
141                            else {
142                                    user = UserLocalServiceUtil.getUserByEmailAddress(
143                                            node.getCompanyId(), emailAddress);
144                            }
145                    }
146                    catch (NoSuchUserException nsue) {
147                            user = UserLocalServiceUtil.getUserById(userId);
148                    }
149    
150                    return user.getUserId();
151            }
152    
153            protected void importPage(
154                            long userId, String author, WikiNode node, String title,
155                            String content, String summary, Map<String, String> usersMap)
156                    throws PortalException {
157    
158                    try {
159                            long authorUserId = getUserId(userId, node, author, usersMap);
160                            String parentTitle = readParentTitle(content);
161                            String redirectTitle = readRedirectTitle(content);
162    
163                            ServiceContext serviceContext = new ServiceContext();
164    
165                            serviceContext.setAddGroupPermissions(true);
166                            serviceContext.setAddGuestPermissions(true);
167                            serviceContext.setAssetTagNames(
168                                    readAssetTagNames(userId, node, content));
169    
170                            if (Validator.isNull(redirectTitle)) {
171                                    content = _translator.translate(content);
172                            }
173                            else {
174                                    content =
175                                            StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
176                                                    StringPool.DOUBLE_CLOSE_BRACKET;
177                            }
178    
179                            WikiPage page = null;
180    
181                            try {
182                                    page = WikiPageLocalServiceUtil.getPage(
183                                            node.getNodeId(), title);
184                            }
185                            catch (NoSuchPageException nspe) {
186                                    page = WikiPageLocalServiceUtil.addPage(
187                                            authorUserId, node.getNodeId(), title,
188                                            WikiPageConstants.NEW, null, true, serviceContext);
189                            }
190    
191                            WikiPageLocalServiceUtil.updatePage(
192                                    authorUserId, node.getNodeId(), title, page.getVersion(),
193                                    content, summary, true, "creole", parentTitle, redirectTitle,
194                                    serviceContext);
195                    }
196                    catch (Exception e) {
197                            throw new PortalException("Error importing page " + title, e);
198                    }
199            }
200    
201            protected boolean isSpecialMediaWikiPage(
202                    String title, List<String> specialNamespaces) {
203    
204                    for (String namespace: specialNamespaces) {
205                            if (title.startsWith(namespace + StringPool.COLON)) {
206                                    return true;
207                            }
208                    }
209    
210                    return false;
211            }
212    
213            protected boolean isValidImage(String[] paths, InputStream inputStream) {
214                    if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
215                            return false;
216                    }
217    
218                    if ((paths.length > 1) &&
219                            (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
220    
221                            return false;
222                    }
223    
224                    String fileName = paths[paths.length - 1];
225    
226                    try {
227                            DLStoreUtil.validate(fileName, true, inputStream);
228                    }
229                    catch (PortalException pe) {
230                            return false;
231                    }
232                    catch (SystemException se) {
233                            return false;
234                    }
235    
236                    return true;
237            }
238    
239            protected void moveFrontPage(
240                    long userId, WikiNode node, Map<String, String[]> options) {
241    
242                    String frontPageTitle = MapUtil.getString(
243                            options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
244    
245                    if (Validator.isNotNull(frontPageTitle)) {
246                            frontPageTitle = normalizeTitle(frontPageTitle);
247    
248                            try {
249                                    if (WikiPageLocalServiceUtil.getPagesCount(
250                                                    node.getNodeId(), frontPageTitle, true) > 0) {
251    
252                                            ServiceContext serviceContext = new ServiceContext();
253    
254                                            serviceContext.setAddGroupPermissions(true);
255                                            serviceContext.setAddGuestPermissions(true);
256    
257                                            WikiPageLocalServiceUtil.movePage(
258                                                    userId, node.getNodeId(), frontPageTitle,
259                                                    WikiPageConstants.FRONT_PAGE, false, serviceContext);
260    
261                                    }
262                            }
263                            catch (Exception e) {
264                                    if (_log.isWarnEnabled()) {
265                                            StringBundler sb = new StringBundler(4);
266    
267                                            sb.append("Could not move ");
268                                            sb.append(WikiPageConstants.FRONT_PAGE);
269                                            sb.append(" to the title provided: ");
270                                            sb.append(frontPageTitle);
271    
272                                            _log.warn(sb.toString(), e);
273                                    }
274                            }
275    
276                    }
277    
278            }
279    
280            protected String normalize(String categoryName, int length) {
281                    categoryName = AssetUtil.toWord(categoryName.trim());
282    
283                    return StringUtil.shorten(categoryName, length);
284            }
285    
286            protected String normalizeDescription(String description) {
287                    description = description.replaceAll(
288                            _categoriesPattern.pattern(), StringPool.BLANK);
289    
290                    return normalize(description, 300);
291            }
292    
293            protected String normalizeTitle(String title) {
294                    title = title.replaceAll(
295                            PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
296    
297                    return StringUtil.shorten(title, 75);
298            }
299    
300            protected void processImages(
301                            long userId, WikiNode node, InputStream imagesInputStream)
302                    throws Exception {
303    
304                    if (imagesInputStream == null) {
305                            return;
306                    }
307    
308                    ProgressTracker progressTracker =
309                            ProgressTrackerThreadLocal.getProgressTracker();
310    
311                    int count = 0;
312    
313                    ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
314                            imagesInputStream);
315    
316                    List<String> entries = zipReader.getEntries();
317    
318                    int total = entries.size();
319    
320                    if (total > 0) {
321                            try {
322                                    WikiPageLocalServiceUtil.getPage(
323                                            node.getNodeId(), SHARED_IMAGES_TITLE);
324                            }
325                            catch (NoSuchPageException nspe) {
326                                    ServiceContext serviceContext = new ServiceContext();
327    
328                                    serviceContext.setAddGroupPermissions(true);
329                                    serviceContext.setAddGuestPermissions(true);
330    
331                                    WikiPageLocalServiceUtil.addPage(
332                                            userId, node.getNodeId(), SHARED_IMAGES_TITLE,
333                                            SHARED_IMAGES_CONTENT, null, true, serviceContext);
334                            }
335                    }
336    
337                    List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
338                            new ArrayList<ObjectValuePair<String, InputStream>>();
339    
340                    try {
341                            int percentage = 50;
342    
343                            for (int i = 0; i < entries.size(); i++) {
344                                    String entry = entries.get(i);
345    
346                                    String key = entry;
347    
348                                    InputStream inputStream = zipReader.getEntryAsInputStream(
349                                            entry);
350    
351                                    String[] paths = StringUtil.split(key, CharPool.SLASH);
352    
353                                    if (!isValidImage(paths, inputStream)) {
354                                            if (_log.isInfoEnabled()) {
355                                                    _log.info("Ignoring " + key);
356                                            }
357    
358                                            continue;
359                                    }
360    
361                                    String fileName = paths[paths.length - 1].toLowerCase();
362    
363                                    ObjectValuePair<String, InputStream> inputStreamOVP =
364                                            new ObjectValuePair<String, InputStream>(
365                                                    fileName, inputStream);
366    
367                                    inputStreamOVPs.add(inputStreamOVP);
368    
369                                    count++;
370    
371                                    if ((i % 5) == 0) {
372                                            WikiPageLocalServiceUtil.addPageAttachments(
373                                                    userId, node.getNodeId(), SHARED_IMAGES_TITLE,
374                                                    inputStreamOVPs);
375    
376                                            inputStreamOVPs.clear();
377    
378                                            percentage = Math.min(50 + (i * 50) / total, 99);
379    
380                                            progressTracker.updateProgress(percentage);
381                                    }
382                            }
383    
384                            if (!inputStreamOVPs.isEmpty()) {
385                                    WikiPageLocalServiceUtil.addPageAttachments(
386                                            userId, node.getNodeId(), SHARED_IMAGES_TITLE,
387                                            inputStreamOVPs);
388                            }
389                    }
390                    finally {
391                            for (ObjectValuePair<String, InputStream> inputStreamOVP :
392                                            inputStreamOVPs) {
393    
394                                    InputStream inputStream = inputStreamOVP.getValue();
395    
396                                    StreamUtil.cleanUp(inputStream);
397                            }
398                    }
399    
400                    zipReader.close();
401    
402                    if (_log.isInfoEnabled()) {
403                            _log.info("Imported " + count + " images into " + node.getName());
404                    }
405            }
406    
407            protected void processRegularPages(
408                    long userId, WikiNode node, Element root,
409                    List<String> specialNamespaces, Map<String, String> usersMap,
410                    InputStream imagesInputStream, Map<String, String[]> options) {
411    
412                    boolean importLatestVersion = MapUtil.getBoolean(
413                            options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
414    
415                    ProgressTracker progressTracker =
416                            ProgressTrackerThreadLocal.getProgressTracker();
417    
418                    int count = 0;
419    
420                    List<Element> pages = root.elements("page");
421    
422                    int total = pages.size();
423    
424                    Iterator<Element> itr = root.elements("page").iterator();
425    
426                    int percentage = 10;
427                    int maxPercentage = 50;
428    
429                    if (imagesInputStream == null) {
430                            maxPercentage = 99;
431                    }
432    
433                    int percentageRange = maxPercentage - percentage;
434    
435                    for (int i = 0; itr.hasNext(); i++) {
436                            Element pageEl = itr.next();
437    
438                            String title = pageEl.elementText("title");
439    
440                            title = normalizeTitle(title);
441    
442                            percentage = Math.min(
443                                    10 + (i * percentageRange) / total, maxPercentage);
444    
445                            progressTracker.updateProgress(percentage);
446    
447                            if (isSpecialMediaWikiPage(title, specialNamespaces)) {
448                                    continue;
449                            }
450    
451                            List<Element> revisionEls = pageEl.elements("revision");
452    
453                            if (importLatestVersion) {
454                                    Element lastRevisionEl = revisionEls.get(
455                                            revisionEls.size() - 1);
456    
457                                    revisionEls = new ArrayList<Element>();
458    
459                                    revisionEls.add(lastRevisionEl);
460                            }
461    
462                            for (Element curRevisionEl : revisionEls) {
463                                    String author = curRevisionEl.element(
464                                            "contributor").elementText("username");
465                                    String content = curRevisionEl.elementText("text");
466                                    String summary = curRevisionEl.elementText("comment");
467    
468                                    try {
469                                            importPage(
470                                                    userId, author, node, title, content, summary,
471                                                    usersMap);
472                                    }
473                                    catch (Exception e) {
474                                            if (_log.isWarnEnabled()) {
475                                                    StringBundler sb = new StringBundler(3);
476    
477                                                    sb.append("Page with title ");
478                                                    sb.append(title);
479                                                    sb.append(" could not be imported");
480    
481                                                    _log.warn(sb.toString(), e);
482                                            }
483                                    }
484                            }
485    
486                            count++;
487                    }
488    
489                    if (_log.isInfoEnabled()) {
490                            _log.info("Imported " + count + " pages into " + node.getName());
491                    }
492            }
493    
494            protected void processSpecialPages(
495                            long userId, WikiNode node, Element root,
496                            List<String> specialNamespaces)
497                    throws PortalException {
498    
499                    ProgressTracker progressTracker =
500                            ProgressTrackerThreadLocal.getProgressTracker();
501    
502                    List<Element> pages = root.elements("page");
503    
504                    int total = pages.size();
505    
506                    Iterator<Element> itr = pages.iterator();
507    
508                    for (int i = 0; itr.hasNext(); i++) {
509                            Element page = itr.next();
510    
511                            String title = page.elementText("title");
512    
513                            if (!title.startsWith("Category:")) {
514                                    if (isSpecialMediaWikiPage(title, specialNamespaces)) {
515                                            root.remove(page);
516                                    }
517    
518                                    continue;
519                            }
520    
521                            String categoryName = title.substring("Category:".length());
522    
523                            categoryName = normalize(categoryName, 75);
524    
525                            String description = page.element("revision").elementText("text");
526    
527                            description = normalizeDescription(description);
528    
529                            try {
530                                    AssetTag assetTag = null;
531    
532                                    try {
533                                            assetTag = AssetTagLocalServiceUtil.getTag(
534                                                    node.getCompanyId(), categoryName);
535                                    }
536                                    catch (NoSuchTagException nste) {
537                                            ServiceContext serviceContext = new ServiceContext();
538    
539                                            serviceContext.setAddGroupPermissions(true);
540                                            serviceContext.setAddGuestPermissions(true);
541                                            serviceContext.setScopeGroupId(node.getGroupId());
542    
543                                            assetTag = AssetTagLocalServiceUtil.addTag(
544                                                    userId, categoryName, null, serviceContext);
545                                    }
546    
547                                    if (Validator.isNotNull(description)) {
548                                            AssetTagPropertyLocalServiceUtil.addTagProperty(
549                                                    userId, assetTag.getTagId(), "description",
550                                                    description);
551                                    }
552                            }
553                            catch (SystemException se) {
554                                     _log.error(se, se);
555                            }
556    
557                            if ((i % 5) == 0) {
558                                    progressTracker.updateProgress((i * 10) / total);
559                            }
560                    }
561            }
562    
563            protected String[] readAssetTagNames(
564                            long userId, WikiNode node, String content)
565                    throws PortalException, SystemException {
566    
567                    Matcher matcher = _categoriesPattern.matcher(content);
568    
569                    List<String> assetTagNames = new ArrayList<String>();
570    
571                    while (matcher.find()) {
572                            String categoryName = matcher.group(1);
573    
574                            categoryName = normalize(categoryName, 75);
575    
576                            AssetTag assetTag = null;
577    
578                            try {
579                                    assetTag = AssetTagLocalServiceUtil.getTag(
580                                            node.getGroupId(), categoryName);
581                            }
582                            catch (NoSuchTagException nste) {
583                                    ServiceContext serviceContext = new ServiceContext();
584    
585                                    serviceContext.setAddGroupPermissions(true);
586                                    serviceContext.setAddGuestPermissions(true);
587                                    serviceContext.setScopeGroupId(node.getGroupId());
588    
589                                    assetTag = AssetTagLocalServiceUtil.addTag(
590                                            userId, categoryName, null, serviceContext);
591                            }
592    
593                            assetTagNames.add(assetTag.getName());
594                    }
595    
596                    if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
597                            assetTagNames.add(_WORK_IN_PROGRESS_TAG);
598                    }
599    
600                    return assetTagNames.toArray(new String[assetTagNames.size()]);
601            }
602    
603            protected String readParentTitle(String content) {
604                    Matcher matcher = _parentPattern.matcher(content);
605    
606                    String redirectTitle = StringPool.BLANK;
607    
608                    if (matcher.find()) {
609                            redirectTitle = matcher.group(1);
610    
611                            redirectTitle = normalizeTitle(redirectTitle);
612    
613                            redirectTitle += " (disambiguation)";
614                    }
615    
616                    return redirectTitle;
617            }
618            protected String readRedirectTitle(String content) {
619                    Matcher matcher = _redirectPattern.matcher(content);
620    
621                    String redirectTitle = StringPool.BLANK;
622    
623                    if (matcher.find()) {
624                            redirectTitle = matcher.group(1);
625    
626                            redirectTitle = normalizeTitle(redirectTitle);
627                    }
628    
629                    return redirectTitle;
630            }
631            protected List<String> readSpecialNamespaces(Element root)
632                    throws ImportFilesException {
633    
634                    List<String> namespaces = new ArrayList<String>();
635    
636                    Element siteinfoEl = root.element("siteinfo");
637    
638                    if (siteinfoEl == null) {
639                            throw new ImportFilesException("Invalid pages XML file");
640                    }
641    
642                    Iterator<Element> itr = siteinfoEl.element(
643                            "namespaces").elements("namespace").iterator();
644    
645                    while (itr.hasNext()) {
646                            Element namespace = itr.next();
647    
648                            if (!namespace.attribute("key").getData().equals("0")) {
649                                    namespaces.add(namespace.getText());
650                            }
651                    }
652    
653                    return namespaces;
654            }
655    
656            protected Map<String, String> readUsersFile(InputStream usersInputStream)
657                    throws IOException {
658    
659                    if (usersInputStream == null) {
660                            return Collections.emptyMap();
661                    }
662    
663                    Map<String, String> usersMap = new HashMap<String, String>();
664    
665                    UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
666                            new InputStreamReader(usersInputStream));
667    
668                    String line = unsyncBufferedReader.readLine();
669    
670                    while (line != null) {
671                            String[] array = StringUtil.split(line);
672    
673                            if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
674                                    (Validator.isNotNull(array[1]))) {
675    
676                                    usersMap.put(array[0], array[1]);
677                            }
678                            else {
679                                    if (_log.isInfoEnabled()) {
680                                            _log.info(
681                                                    "Ignoring line " + line +
682                                                            " because it does not contain exactly 2 columns");
683                                    }
684                            }
685    
686                            line = unsyncBufferedReader.readLine();
687                    }
688    
689                    return usersMap;
690            }
691    
692            private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
693                    "thumb", "temp", "archive"
694            };
695    
696            private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
697    
698            private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
699    
700            private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
701    
702            private static Pattern _categoriesPattern = Pattern.compile(
703                    "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
704            private static Pattern _parentPattern = Pattern.compile(
705                    "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
706            private static Pattern _redirectPattern = Pattern.compile(
707                    "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
708    
709            private MediaWikiToCreoleTranslator _translator =
710                    new MediaWikiToCreoleTranslator();
711    
712    }