001
014
015 package com.liferay.portlet.wiki.importers.mediawiki;
016
017 import com.liferay.portal.kernel.exception.PortalException;
018 import com.liferay.portal.kernel.exception.SystemException;
019 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
020 import com.liferay.portal.kernel.log.Log;
021 import com.liferay.portal.kernel.log.LogFactoryUtil;
022 import com.liferay.portal.kernel.util.CharPool;
023 import com.liferay.portal.kernel.util.MapUtil;
024 import com.liferay.portal.kernel.util.ObjectValuePair;
025 import com.liferay.portal.kernel.util.ProgressTracker;
026 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
027 import com.liferay.portal.kernel.util.SetUtil;
028 import com.liferay.portal.kernel.util.StreamUtil;
029 import com.liferay.portal.kernel.util.StringBundler;
030 import com.liferay.portal.kernel.util.StringPool;
031 import com.liferay.portal.kernel.util.StringUtil;
032 import com.liferay.portal.kernel.util.Validator;
033 import com.liferay.portal.kernel.xml.Attribute;
034 import com.liferay.portal.kernel.xml.Document;
035 import com.liferay.portal.kernel.xml.DocumentException;
036 import com.liferay.portal.kernel.xml.Element;
037 import com.liferay.portal.kernel.xml.SAXReaderUtil;
038 import com.liferay.portal.kernel.zip.ZipReader;
039 import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
040 import com.liferay.portal.model.User;
041 import com.liferay.portal.service.ServiceContext;
042 import com.liferay.portal.service.UserLocalServiceUtil;
043 import com.liferay.portal.util.PropsValues;
044 import com.liferay.portlet.asset.NoSuchTagException;
045 import com.liferay.portlet.asset.model.AssetTag;
046 import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
047 import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
048 import com.liferay.portlet.asset.util.AssetUtil;
049 import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
050 import com.liferay.portlet.wiki.ImportFilesException;
051 import com.liferay.portlet.wiki.NoSuchPageException;
052 import com.liferay.portlet.wiki.importers.WikiImporter;
053 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
054 import com.liferay.portlet.wiki.model.WikiNode;
055 import com.liferay.portlet.wiki.model.WikiPage;
056 import com.liferay.portlet.wiki.model.WikiPageConstants;
057 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
058 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
059
060 import java.io.IOException;
061 import java.io.InputStream;
062 import java.io.InputStreamReader;
063
064 import java.util.ArrayList;
065 import java.util.Collections;
066 import java.util.HashMap;
067 import java.util.List;
068 import java.util.Map;
069 import java.util.Set;
070 import java.util.regex.Matcher;
071 import java.util.regex.Pattern;
072
073
077 public class MediaWikiImporter implements WikiImporter {
078
079 public static final String SHARED_IMAGES_CONTENT = "See attachments";
080
081 public static final String SHARED_IMAGES_TITLE = "SharedImages";
082
083 @Override
084 public void importPages(
085 long userId, WikiNode node, InputStream[] inputStreams,
086 Map<String, String[]> options)
087 throws PortalException {
088
089 if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
090 throw new PortalException("The pages file is mandatory");
091 }
092
093 InputStream pagesInputStream = inputStreams[0];
094 InputStream usersInputStream = inputStreams[1];
095 InputStream imagesInputStream = inputStreams[2];
096
097 try {
098 Document document = SAXReaderUtil.read(pagesInputStream);
099
100 Map<String, String> usersMap = readUsersFile(usersInputStream);
101
102 Element rootElement = document.getRootElement();
103
104 List<String> specialNamespaces = readSpecialNamespaces(rootElement);
105
106 processSpecialPages(userId, node, rootElement, specialNamespaces);
107 processRegularPages(
108 userId, node, rootElement, specialNamespaces, usersMap,
109 imagesInputStream, options);
110 processImages(userId, node, imagesInputStream);
111
112 moveFrontPage(userId, node, options);
113 }
114 catch (DocumentException de) {
115 throw new ImportFilesException("Invalid XML file provided");
116 }
117 catch (IOException ioe) {
118 throw new ImportFilesException("Error reading the files provided");
119 }
120 catch (PortalException pe) {
121 throw pe;
122 }
123 catch (Exception e) {
124 throw new PortalException(e);
125 }
126 }
127
128 protected long getUserId(
129 long userId, WikiNode node, String author,
130 Map<String, String> usersMap) {
131
132 User user = null;
133
134 String emailAddress = usersMap.get(author);
135
136 if (Validator.isNotNull(emailAddress)) {
137 user = UserLocalServiceUtil.fetchUserByEmailAddress(
138 node.getCompanyId(), emailAddress);
139 }
140 else {
141 user = UserLocalServiceUtil.fetchUserByScreenName(
142 node.getCompanyId(), StringUtil.toLowerCase(author));
143 }
144
145 if (user != null) {
146 return user.getUserId();
147 }
148
149 return userId;
150 }
151
152 protected void importPage(
153 long userId, String author, WikiNode node, String title,
154 String content, String summary, Map<String, String> usersMap,
155 boolean strictImportMode)
156 throws PortalException {
157
158 try {
159 long authorUserId = getUserId(userId, node, author, usersMap);
160 String parentTitle = readParentTitle(content);
161 String redirectTitle = readRedirectTitle(content);
162
163 ServiceContext serviceContext = new ServiceContext();
164
165 serviceContext.setAddGroupPermissions(true);
166 serviceContext.setAddGuestPermissions(true);
167 serviceContext.setAssetTagNames(
168 readAssetTagNames(userId, node, content));
169
170 if (Validator.isNull(redirectTitle)) {
171 _translator.setStrictImportMode(strictImportMode);
172
173 content = _translator.translate(content);
174 }
175 else {
176 content =
177 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
178 StringPool.DOUBLE_CLOSE_BRACKET;
179 }
180
181 WikiPage page = null;
182
183 try {
184 page = WikiPageLocalServiceUtil.getPage(
185 node.getNodeId(), title);
186 }
187 catch (NoSuchPageException nspe) {
188 page = WikiPageLocalServiceUtil.addPage(
189 authorUserId, node.getNodeId(), title,
190 WikiPageConstants.NEW, null, true, serviceContext);
191 }
192
193 WikiPageLocalServiceUtil.updatePage(
194 authorUserId, node.getNodeId(), title, page.getVersion(),
195 content, summary, true, "creole", parentTitle, redirectTitle,
196 serviceContext);
197 }
198 catch (Exception e) {
199 throw new PortalException("Error importing page " + title, e);
200 }
201 }
202
203 protected boolean isSpecialMediaWikiPage(
204 String title, List<String> specialNamespaces) {
205
206 for (String namespace : specialNamespaces) {
207 if (title.startsWith(namespace + StringPool.COLON)) {
208 return true;
209 }
210 }
211
212 return false;
213 }
214
215 protected boolean isValidImage(String[] paths, InputStream inputStream) {
216 if (_specialMediaWikiDirs.contains(paths[0])) {
217 return false;
218 }
219
220 if ((paths.length > 1) && _specialMediaWikiDirs.contains(paths[1])) {
221 return false;
222 }
223
224 String fileName = paths[paths.length - 1];
225
226 try {
227 DLStoreUtil.validate(fileName, true, inputStream);
228 }
229 catch (PortalException pe) {
230 return false;
231 }
232 catch (SystemException se) {
233 return false;
234 }
235
236 return true;
237 }
238
239 protected void moveFrontPage(
240 long userId, WikiNode node, Map<String, String[]> options) {
241
242 String frontPageTitle = MapUtil.getString(
243 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
244
245 if (Validator.isNotNull(frontPageTitle)) {
246 frontPageTitle = normalizeTitle(frontPageTitle);
247
248 try {
249 if (WikiPageLocalServiceUtil.getPagesCount(
250 node.getNodeId(), frontPageTitle, true) > 0) {
251
252 ServiceContext serviceContext = new ServiceContext();
253
254 serviceContext.setAddGroupPermissions(true);
255 serviceContext.setAddGuestPermissions(true);
256
257 WikiPageLocalServiceUtil.renamePage(
258 userId, node.getNodeId(), frontPageTitle,
259 WikiPageConstants.FRONT_PAGE, false, serviceContext);
260 }
261 }
262 catch (Exception e) {
263 if (_log.isWarnEnabled()) {
264 StringBundler sb = new StringBundler(4);
265
266 sb.append("Could not move ");
267 sb.append(WikiPageConstants.FRONT_PAGE);
268 sb.append(" to the title provided: ");
269 sb.append(frontPageTitle);
270
271 _log.warn(sb.toString(), e);
272 }
273 }
274 }
275 }
276
277 protected String normalize(String categoryName, int length) {
278 categoryName = AssetUtil.toWord(categoryName.trim());
279
280 return StringUtil.shorten(categoryName, length);
281 }
282
283 protected String normalizeDescription(String description) {
284 Matcher matcher = _categoriesPattern.matcher(description);
285
286 description = matcher.replaceAll(StringPool.BLANK);
287
288 return normalize(description, 255);
289 }
290
291 protected String normalizeTitle(String title) {
292 Matcher matcher = _wikiPageTitlesRemovePattern.matcher(title);
293
294 title = matcher.replaceAll(StringPool.BLANK);
295
296 return StringUtil.shorten(title, 75);
297 }
298
299 protected void processImages(
300 long userId, WikiNode node, InputStream imagesInputStream)
301 throws Exception {
302
303 if (imagesInputStream == null) {
304 return;
305 }
306
307 ProgressTracker progressTracker =
308 ProgressTrackerThreadLocal.getProgressTracker();
309
310 int count = 0;
311
312 ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
313 imagesInputStream);
314
315 List<String> entries = zipReader.getEntries();
316
317 int total = entries.size();
318
319 if (total > 0) {
320 try {
321 WikiPageLocalServiceUtil.getPage(
322 node.getNodeId(), SHARED_IMAGES_TITLE);
323 }
324 catch (NoSuchPageException nspe) {
325 ServiceContext serviceContext = new ServiceContext();
326
327 serviceContext.setAddGroupPermissions(true);
328 serviceContext.setAddGuestPermissions(true);
329
330 WikiPageLocalServiceUtil.addPage(
331 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
332 SHARED_IMAGES_CONTENT, null, true, serviceContext);
333 }
334 }
335
336 List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
337 new ArrayList<ObjectValuePair<String, InputStream>>();
338
339 try {
340 int percentage = 50;
341
342 for (int i = 0; i < entries.size(); i++) {
343 String entry = entries.get(i);
344
345 String key = entry;
346
347 InputStream inputStream = zipReader.getEntryAsInputStream(
348 entry);
349
350 String[] paths = StringUtil.split(key, CharPool.SLASH);
351
352 if (!isValidImage(paths, inputStream)) {
353 if (_log.isInfoEnabled()) {
354 _log.info("Ignoring " + key);
355 }
356
357 continue;
358 }
359
360 String fileName = StringUtil.toLowerCase(
361 paths[paths.length - 1]);
362
363 ObjectValuePair<String, InputStream> inputStreamOVP =
364 new ObjectValuePair<String, InputStream>(
365 fileName, inputStream);
366
367 inputStreamOVPs.add(inputStreamOVP);
368
369 count++;
370
371 if ((i % 5) == 0) {
372 WikiPageLocalServiceUtil.addPageAttachments(
373 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
374 inputStreamOVPs);
375
376 inputStreamOVPs.clear();
377
378 percentage = Math.min(50 + (i * 50) / total, 99);
379
380 progressTracker.setPercent(percentage);
381 }
382 }
383
384 if (!inputStreamOVPs.isEmpty()) {
385 WikiPageLocalServiceUtil.addPageAttachments(
386 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
387 inputStreamOVPs);
388 }
389 }
390 finally {
391 for (ObjectValuePair<String, InputStream> inputStreamOVP :
392 inputStreamOVPs) {
393
394 InputStream inputStream = inputStreamOVP.getValue();
395
396 StreamUtil.cleanUp(inputStream);
397 }
398 }
399
400 zipReader.close();
401
402 if (_log.isInfoEnabled()) {
403 _log.info("Imported " + count + " images into " + node.getName());
404 }
405 }
406
407 protected void processRegularPages(
408 long userId, WikiNode node, Element rootElement,
409 List<String> specialNamespaces, Map<String, String> usersMap,
410 InputStream imagesInputStream, Map<String, String[]> options) {
411
412 boolean importLatestVersion = MapUtil.getBoolean(
413 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
414 boolean strictImportMode = MapUtil.getBoolean(
415 options, WikiImporterKeys.OPTIONS_STRICT_IMPORT_MODE);
416
417 ProgressTracker progressTracker =
418 ProgressTrackerThreadLocal.getProgressTracker();
419
420 int count = 0;
421
422 int percentage = 10;
423
424 int maxPercentage = 50;
425
426 if (imagesInputStream == null) {
427 maxPercentage = 99;
428 }
429
430 List<Element> pageElements = rootElement.elements("page");
431
432 for (int i = 0; i < pageElements.size(); i++) {
433 Element pageElement = pageElements.get(i);
434
435 String title = pageElement.elementText("title");
436
437 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
438 continue;
439 }
440
441 title = normalizeTitle(title);
442
443 percentage = Math.min(
444 10 + (i * (maxPercentage - percentage)) / pageElements.size(),
445 maxPercentage);
446
447 progressTracker.setPercent(percentage);
448
449 List<Element> revisionElements = pageElement.elements("revision");
450
451 if (importLatestVersion) {
452 Element lastRevisionElement = revisionElements.get(
453 revisionElements.size() - 1);
454
455 revisionElements = new ArrayList<Element>();
456
457 revisionElements.add(lastRevisionElement);
458 }
459
460 for (Element revisionElement : revisionElements) {
461 Element contributorElement = revisionElement.element(
462 "contributor");
463
464 String author = contributorElement.elementText("username");
465
466 String content = revisionElement.elementText("text");
467 String summary = revisionElement.elementText("comment");
468
469 try {
470 importPage(
471 userId, author, node, title, content, summary, usersMap,
472 strictImportMode);
473 }
474 catch (Exception e) {
475 if (_log.isWarnEnabled()) {
476 _log.warn(
477 "Page with title " + title +
478 " could not be imported",
479 e);
480 }
481 }
482 }
483
484 count++;
485 }
486
487 if (_log.isInfoEnabled()) {
488 _log.info("Imported " + count + " pages into " + node.getName());
489 }
490 }
491
492 protected void processSpecialPages(
493 long userId, WikiNode node, Element rootElement,
494 List<String> specialNamespaces)
495 throws PortalException {
496
497 ProgressTracker progressTracker =
498 ProgressTrackerThreadLocal.getProgressTracker();
499
500 List<Element> pageElements = rootElement.elements("page");
501
502 for (int i = 0; i < pageElements.size(); i++) {
503 Element pageElement = pageElements.get(i);
504
505 String title = pageElement.elementText("title");
506
507 if (!title.startsWith("Category:")) {
508 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
509 rootElement.remove(pageElement);
510 }
511
512 continue;
513 }
514
515 String categoryName = title.substring("Category:".length());
516
517 categoryName = normalize(categoryName, 75);
518
519 Element revisionElement = pageElement.element("revision");
520
521 String description = revisionElement.elementText("text");
522
523 description = normalizeDescription(description);
524
525 try {
526 AssetTag assetTag = null;
527
528 try {
529 assetTag = AssetTagLocalServiceUtil.getTag(
530 node.getGroupId(), categoryName);
531 }
532 catch (NoSuchTagException nste) {
533 ServiceContext serviceContext = new ServiceContext();
534
535 serviceContext.setAddGroupPermissions(true);
536 serviceContext.setAddGuestPermissions(true);
537 serviceContext.setScopeGroupId(node.getGroupId());
538
539 assetTag = AssetTagLocalServiceUtil.addTag(
540 userId, categoryName, null, serviceContext);
541
542 if (PropsValues.ASSET_TAG_PROPERTIES_ENABLED &&
543 Validator.isNotNull(description)) {
544
545 AssetTagPropertyLocalServiceUtil.addTagProperty(
546 userId, assetTag.getTagId(), "description",
547 description);
548 }
549 }
550 }
551 catch (SystemException se) {
552 _log.error(se, se);
553 }
554
555 if ((i % 5) == 0) {
556 progressTracker.setPercent((i * 10) / pageElements.size());
557 }
558 }
559 }
560
561 protected String[] readAssetTagNames(
562 long userId, WikiNode node, String content)
563 throws PortalException {
564
565 Matcher matcher = _categoriesPattern.matcher(content);
566
567 List<String> assetTagNames = new ArrayList<String>();
568
569 while (matcher.find()) {
570 String categoryName = matcher.group(1);
571
572 categoryName = normalize(categoryName, 75);
573
574 AssetTag assetTag = null;
575
576 try {
577 assetTag = AssetTagLocalServiceUtil.getTag(
578 node.getGroupId(), categoryName);
579 }
580 catch (NoSuchTagException nste) {
581 ServiceContext serviceContext = new ServiceContext();
582
583 serviceContext.setAddGroupPermissions(true);
584 serviceContext.setAddGuestPermissions(true);
585 serviceContext.setScopeGroupId(node.getGroupId());
586
587 assetTag = AssetTagLocalServiceUtil.addTag(
588 userId, categoryName, null, serviceContext);
589 }
590
591 assetTagNames.add(assetTag.getName());
592 }
593
594 if (content.contains(_WORK_IN_PROGRESS)) {
595 assetTagNames.add(_WORK_IN_PROGRESS_TAG);
596 }
597
598 return assetTagNames.toArray(new String[assetTagNames.size()]);
599 }
600
601 protected String readParentTitle(String content) {
602 Matcher matcher = _parentPattern.matcher(content);
603
604 String redirectTitle = StringPool.BLANK;
605
606 if (matcher.find()) {
607 redirectTitle = matcher.group(1);
608
609 redirectTitle = normalizeTitle(redirectTitle);
610
611 redirectTitle += " (disambiguation)";
612 }
613
614 return redirectTitle;
615 }
616
617 protected String readRedirectTitle(String content) {
618 Matcher matcher = _redirectPattern.matcher(content);
619
620 String redirectTitle = StringPool.BLANK;
621
622 if (matcher.find()) {
623 redirectTitle = matcher.group(1);
624
625 redirectTitle = normalizeTitle(redirectTitle);
626 }
627
628 return redirectTitle;
629 }
630
631 protected List<String> readSpecialNamespaces(Element root)
632 throws ImportFilesException {
633
634 List<String> namespaces = new ArrayList<String>();
635
636 Element siteinfoElement = root.element("siteinfo");
637
638 if (siteinfoElement == null) {
639 throw new ImportFilesException("Invalid pages XML file");
640 }
641
642 Element namespacesElement = siteinfoElement.element("namespaces");
643
644 List<Element> namespaceElements = namespacesElement.elements(
645 "namespace");
646
647 for (Element namespaceElement : namespaceElements) {
648 Attribute attribute = namespaceElement.attribute("key");
649
650 String value = attribute.getValue();
651
652 if (!value.equals("0")) {
653 namespaces.add(namespaceElement.getText());
654 }
655 }
656
657 return namespaces;
658 }
659
660 protected Map<String, String> readUsersFile(InputStream usersInputStream)
661 throws IOException {
662
663 if (usersInputStream == null) {
664 return Collections.emptyMap();
665 }
666
667 Map<String, String> usersMap = new HashMap<String, String>();
668
669 UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
670 new InputStreamReader(usersInputStream));
671
672 String line = unsyncBufferedReader.readLine();
673
674 while (line != null) {
675 String[] array = StringUtil.split(line);
676
677 if ((array.length == 2) && Validator.isNotNull(array[0]) &&
678 Validator.isNotNull(array[1])) {
679
680 usersMap.put(array[0], array[1]);
681 }
682 else {
683 if (_log.isInfoEnabled()) {
684 _log.info(
685 "Ignoring line " + line +
686 " because it does not contain exactly 2 columns");
687 }
688 }
689
690 line = unsyncBufferedReader.readLine();
691 }
692
693 return usersMap;
694 }
695
696 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
697
698 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
699
700 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
701
702 private static Pattern _categoriesPattern = Pattern.compile(
703 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
704 private static Pattern _parentPattern = Pattern.compile(
705 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
706 private static Pattern _redirectPattern = Pattern.compile(
707 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
708 private static Set<String> _specialMediaWikiDirs = SetUtil.fromArray(
709 new String[] {"archive", "temp", "thumb"});
710 private static Pattern _wikiPageTitlesRemovePattern = Pattern.compile(
711 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP);
712
713 private MediaWikiToCreoleTranslator _translator =
714 new MediaWikiToCreoleTranslator();
715
716 }