001
014
015 package com.liferay.portlet.wiki.importers.mediawiki;
016
017 import com.liferay.portal.NoSuchUserException;
018 import com.liferay.portal.kernel.exception.PortalException;
019 import com.liferay.portal.kernel.exception.SystemException;
020 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
021 import com.liferay.portal.kernel.log.Log;
022 import com.liferay.portal.kernel.log.LogFactoryUtil;
023 import com.liferay.portal.kernel.util.CharPool;
024 import com.liferay.portal.kernel.util.MapUtil;
025 import com.liferay.portal.kernel.util.ObjectValuePair;
026 import com.liferay.portal.kernel.util.ProgressTracker;
027 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
028 import com.liferay.portal.kernel.util.SetUtil;
029 import com.liferay.portal.kernel.util.StreamUtil;
030 import com.liferay.portal.kernel.util.StringBundler;
031 import com.liferay.portal.kernel.util.StringPool;
032 import com.liferay.portal.kernel.util.StringUtil;
033 import com.liferay.portal.kernel.util.Validator;
034 import com.liferay.portal.kernel.xml.Attribute;
035 import com.liferay.portal.kernel.xml.Document;
036 import com.liferay.portal.kernel.xml.DocumentException;
037 import com.liferay.portal.kernel.xml.Element;
038 import com.liferay.portal.kernel.xml.SAXReaderUtil;
039 import com.liferay.portal.kernel.zip.ZipReader;
040 import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
041 import com.liferay.portal.model.User;
042 import com.liferay.portal.service.ServiceContext;
043 import com.liferay.portal.service.UserLocalServiceUtil;
044 import com.liferay.portal.util.PropsValues;
045 import com.liferay.portlet.asset.NoSuchTagException;
046 import com.liferay.portlet.asset.model.AssetTag;
047 import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
048 import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
049 import com.liferay.portlet.asset.util.AssetUtil;
050 import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
051 import com.liferay.portlet.wiki.ImportFilesException;
052 import com.liferay.portlet.wiki.NoSuchPageException;
053 import com.liferay.portlet.wiki.importers.WikiImporter;
054 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
055 import com.liferay.portlet.wiki.model.WikiNode;
056 import com.liferay.portlet.wiki.model.WikiPage;
057 import com.liferay.portlet.wiki.model.WikiPageConstants;
058 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
059 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
060
061 import java.io.IOException;
062 import java.io.InputStream;
063 import java.io.InputStreamReader;
064
065 import java.util.ArrayList;
066 import java.util.Collections;
067 import java.util.HashMap;
068 import java.util.List;
069 import java.util.Map;
070 import java.util.Set;
071 import java.util.regex.Matcher;
072 import java.util.regex.Pattern;
073
074
078 public class MediaWikiImporter implements WikiImporter {
079
080 public static final String SHARED_IMAGES_CONTENT = "See attachments";
081
082 public static final String SHARED_IMAGES_TITLE = "SharedImages";
083
084 public void importPages(
085 long userId, WikiNode node, InputStream[] inputStreams,
086 Map<String, String[]> options)
087 throws PortalException {
088
089 if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
090 throw new PortalException("The pages file is mandatory");
091 }
092
093 InputStream pagesInputStream = inputStreams[0];
094 InputStream usersInputStream = inputStreams[1];
095 InputStream imagesInputStream = inputStreams[2];
096
097 try {
098 Document document = SAXReaderUtil.read(pagesInputStream);
099
100 Map<String, String> usersMap = readUsersFile(usersInputStream);
101
102 Element rootElement = document.getRootElement();
103
104 List<String> specialNamespaces = readSpecialNamespaces(rootElement);
105
106 processSpecialPages(userId, node, rootElement, specialNamespaces);
107 processRegularPages(
108 userId, node, rootElement, specialNamespaces, usersMap,
109 imagesInputStream, options);
110 processImages(userId, node, imagesInputStream);
111
112 moveFrontPage(userId, node, options);
113 }
114 catch (DocumentException de) {
115 throw new ImportFilesException("Invalid XML file provided");
116 }
117 catch (IOException ioe) {
118 throw new ImportFilesException("Error reading the files provided");
119 }
120 catch (PortalException pe) {
121 throw pe;
122 }
123 catch (Exception e) {
124 throw new PortalException(e);
125 }
126 }
127
128 protected long getUserId(
129 long userId, WikiNode node, String author,
130 Map<String, String> usersMap)
131 throws PortalException, SystemException {
132
133 User user = null;
134
135 String emailAddress = usersMap.get(author);
136
137 try {
138 if (Validator.isNull(emailAddress)) {
139 user = UserLocalServiceUtil.getUserByScreenName(
140 node.getCompanyId(), author.toLowerCase());
141 }
142 else {
143 user = UserLocalServiceUtil.getUserByEmailAddress(
144 node.getCompanyId(), emailAddress);
145 }
146 }
147 catch (NoSuchUserException nsue) {
148 user = UserLocalServiceUtil.getUserById(userId);
149 }
150
151 return user.getUserId();
152 }
153
154 protected void importPage(
155 long userId, String author, WikiNode node, String title,
156 String content, String summary, Map<String, String> usersMap,
157 boolean strictImportMode)
158 throws PortalException {
159
160 try {
161 long authorUserId = getUserId(userId, node, author, usersMap);
162 String parentTitle = readParentTitle(content);
163 String redirectTitle = readRedirectTitle(content);
164
165 ServiceContext serviceContext = new ServiceContext();
166
167 serviceContext.setAddGroupPermissions(true);
168 serviceContext.setAddGuestPermissions(true);
169 serviceContext.setAssetTagNames(
170 readAssetTagNames(userId, node, content));
171
172 if (Validator.isNull(redirectTitle)) {
173 _translator.setStrictImportMode(strictImportMode);
174
175 content = _translator.translate(content);
176 }
177 else {
178 content =
179 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
180 StringPool.DOUBLE_CLOSE_BRACKET;
181 }
182
183 WikiPage page = null;
184
185 try {
186 page = WikiPageLocalServiceUtil.getPage(
187 node.getNodeId(), title);
188 }
189 catch (NoSuchPageException nspe) {
190 page = WikiPageLocalServiceUtil.addPage(
191 authorUserId, node.getNodeId(), title,
192 WikiPageConstants.NEW, null, true, serviceContext);
193 }
194
195 WikiPageLocalServiceUtil.updatePage(
196 authorUserId, node.getNodeId(), title, page.getVersion(),
197 content, summary, true, "creole", parentTitle, redirectTitle,
198 serviceContext);
199 }
200 catch (Exception e) {
201 throw new PortalException("Error importing page " + title, e);
202 }
203 }
204
205 protected boolean isSpecialMediaWikiPage(
206 String title, List<String> specialNamespaces) {
207
208 for (String namespace : specialNamespaces) {
209 if (title.startsWith(namespace + StringPool.COLON)) {
210 return true;
211 }
212 }
213
214 return false;
215 }
216
217 protected boolean isValidImage(String[] paths, InputStream inputStream) {
218 if (_specialMediaWikiDirs.contains(paths[0])) {
219 return false;
220 }
221
222 if ((paths.length > 1) && _specialMediaWikiDirs.contains(paths[1])) {
223 return false;
224 }
225
226 String fileName = paths[paths.length - 1];
227
228 try {
229 DLStoreUtil.validate(fileName, true, inputStream);
230 }
231 catch (PortalException pe) {
232 return false;
233 }
234 catch (SystemException se) {
235 return false;
236 }
237
238 return true;
239 }
240
241 protected void moveFrontPage(
242 long userId, WikiNode node, Map<String, String[]> options) {
243
244 String frontPageTitle = MapUtil.getString(
245 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
246
247 if (Validator.isNotNull(frontPageTitle)) {
248 frontPageTitle = normalizeTitle(frontPageTitle);
249
250 try {
251 if (WikiPageLocalServiceUtil.getPagesCount(
252 node.getNodeId(), frontPageTitle, true) > 0) {
253
254 ServiceContext serviceContext = new ServiceContext();
255
256 serviceContext.setAddGroupPermissions(true);
257 serviceContext.setAddGuestPermissions(true);
258
259 WikiPageLocalServiceUtil.movePage(
260 userId, node.getNodeId(), frontPageTitle,
261 WikiPageConstants.FRONT_PAGE, false, serviceContext);
262
263 }
264 }
265 catch (Exception e) {
266 if (_log.isWarnEnabled()) {
267 StringBundler sb = new StringBundler(4);
268
269 sb.append("Could not move ");
270 sb.append(WikiPageConstants.FRONT_PAGE);
271 sb.append(" to the title provided: ");
272 sb.append(frontPageTitle);
273
274 _log.warn(sb.toString(), e);
275 }
276 }
277
278 }
279
280 }
281
282 protected String normalize(String categoryName, int length) {
283 categoryName = AssetUtil.toWord(categoryName.trim());
284
285 return StringUtil.shorten(categoryName, length);
286 }
287
288 protected String normalizeDescription(String description) {
289 description = description.replaceAll(
290 _categoriesPattern.pattern(), StringPool.BLANK);
291
292 return normalize(description, 300);
293 }
294
295 protected String normalizeTitle(String title) {
296 title = title.replaceAll(
297 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
298
299 return StringUtil.shorten(title, 75);
300 }
301
302 protected void processImages(
303 long userId, WikiNode node, InputStream imagesInputStream)
304 throws Exception {
305
306 if (imagesInputStream == null) {
307 return;
308 }
309
310 ProgressTracker progressTracker =
311 ProgressTrackerThreadLocal.getProgressTracker();
312
313 int count = 0;
314
315 ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
316 imagesInputStream);
317
318 List<String> entries = zipReader.getEntries();
319
320 int total = entries.size();
321
322 if (total > 0) {
323 try {
324 WikiPageLocalServiceUtil.getPage(
325 node.getNodeId(), SHARED_IMAGES_TITLE);
326 }
327 catch (NoSuchPageException nspe) {
328 ServiceContext serviceContext = new ServiceContext();
329
330 serviceContext.setAddGroupPermissions(true);
331 serviceContext.setAddGuestPermissions(true);
332
333 WikiPageLocalServiceUtil.addPage(
334 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
335 SHARED_IMAGES_CONTENT, null, true, serviceContext);
336 }
337 }
338
339 List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
340 new ArrayList<ObjectValuePair<String, InputStream>>();
341
342 try {
343 int percentage = 50;
344
345 for (int i = 0; i < entries.size(); i++) {
346 String entry = entries.get(i);
347
348 String key = entry;
349
350 InputStream inputStream = zipReader.getEntryAsInputStream(
351 entry);
352
353 String[] paths = StringUtil.split(key, CharPool.SLASH);
354
355 if (!isValidImage(paths, inputStream)) {
356 if (_log.isInfoEnabled()) {
357 _log.info("Ignoring " + key);
358 }
359
360 continue;
361 }
362
363 String fileName = paths[paths.length - 1].toLowerCase();
364
365 ObjectValuePair<String, InputStream> inputStreamOVP =
366 new ObjectValuePair<String, InputStream>(
367 fileName, inputStream);
368
369 inputStreamOVPs.add(inputStreamOVP);
370
371 count++;
372
373 if ((i % 5) == 0) {
374 WikiPageLocalServiceUtil.addPageAttachments(
375 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
376 inputStreamOVPs);
377
378 inputStreamOVPs.clear();
379
380 percentage = Math.min(50 + (i * 50) / total, 99);
381
382 progressTracker.setPercent(percentage);
383 }
384 }
385
386 if (!inputStreamOVPs.isEmpty()) {
387 WikiPageLocalServiceUtil.addPageAttachments(
388 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
389 inputStreamOVPs);
390 }
391 }
392 finally {
393 for (ObjectValuePair<String, InputStream> inputStreamOVP :
394 inputStreamOVPs) {
395
396 InputStream inputStream = inputStreamOVP.getValue();
397
398 StreamUtil.cleanUp(inputStream);
399 }
400 }
401
402 zipReader.close();
403
404 if (_log.isInfoEnabled()) {
405 _log.info("Imported " + count + " images into " + node.getName());
406 }
407 }
408
409 protected void processRegularPages(
410 long userId, WikiNode node, Element rootElement,
411 List<String> specialNamespaces, Map<String, String> usersMap,
412 InputStream imagesInputStream, Map<String, String[]> options) {
413
414 boolean importLatestVersion = MapUtil.getBoolean(
415 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
416 boolean strictImportMode = MapUtil.getBoolean(
417 options, WikiImporterKeys.OPTIONS_STRICT_IMPORT_MODE);
418
419 ProgressTracker progressTracker =
420 ProgressTrackerThreadLocal.getProgressTracker();
421
422 int count = 0;
423
424 int percentage = 10;
425
426 int maxPercentage = 50;
427
428 if (imagesInputStream == null) {
429 maxPercentage = 99;
430 }
431
432 List<Element> pageElements = rootElement.elements("page");
433
434 for (int i = 0; i < pageElements.size(); i++) {
435 Element pageElement = pageElements.get(i);
436
437 String title = pageElement.elementText("title");
438
439 title = normalizeTitle(title);
440
441 percentage = Math.min(
442 10 + (i * (maxPercentage - percentage)) / pageElements.size(),
443 maxPercentage);
444
445 progressTracker.setPercent(percentage);
446
447 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
448 continue;
449 }
450
451 List<Element> revisionElements = pageElement.elements("revision");
452
453 if (importLatestVersion) {
454 Element lastRevisionElement = revisionElements.get(
455 revisionElements.size() - 1);
456
457 revisionElements = new ArrayList<Element>();
458
459 revisionElements.add(lastRevisionElement);
460 }
461
462 for (Element revisionElement : revisionElements) {
463 Element contributorElement = revisionElement.element(
464 "contributor");
465
466 String author = contributorElement.elementText("username");
467
468 String content = revisionElement.elementText("text");
469 String summary = revisionElement.elementText("comment");
470
471 try {
472 importPage(
473 userId, author, node, title, content, summary, usersMap,
474 strictImportMode);
475 }
476 catch (Exception e) {
477 if (_log.isWarnEnabled()) {
478 _log.warn(
479 "Page with title " + title +
480 " could not be imported",
481 e);
482 }
483 }
484 }
485
486 count++;
487 }
488
489 if (_log.isInfoEnabled()) {
490 _log.info("Imported " + count + " pages into " + node.getName());
491 }
492 }
493
494 protected void processSpecialPages(
495 long userId, WikiNode node, Element rootElement,
496 List<String> specialNamespaces)
497 throws PortalException {
498
499 ProgressTracker progressTracker =
500 ProgressTrackerThreadLocal.getProgressTracker();
501
502 List<Element> pageElements = rootElement.elements("page");
503
504 for (int i = 0; i < pageElements.size(); i++) {
505 Element pageElement = pageElements.get(i);
506
507 String title = pageElement.elementText("title");
508
509 if (!title.startsWith("Category:")) {
510 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
511 rootElement.remove(pageElement);
512 }
513
514 continue;
515 }
516
517 String categoryName = title.substring("Category:".length());
518
519 categoryName = normalize(categoryName, 75);
520
521 Element revisionElement = pageElement.element("revision");
522
523 String description = revisionElement.elementText("text");
524
525 description = normalizeDescription(description);
526
527 try {
528 AssetTag assetTag = null;
529
530 try {
531 assetTag = AssetTagLocalServiceUtil.getTag(
532 node.getCompanyId(), categoryName);
533 }
534 catch (NoSuchTagException nste) {
535 ServiceContext serviceContext = new ServiceContext();
536
537 serviceContext.setAddGroupPermissions(true);
538 serviceContext.setAddGuestPermissions(true);
539 serviceContext.setScopeGroupId(node.getGroupId());
540
541 assetTag = AssetTagLocalServiceUtil.addTag(
542 userId, categoryName, null, serviceContext);
543 }
544
545 if (Validator.isNotNull(description)) {
546 AssetTagPropertyLocalServiceUtil.addTagProperty(
547 userId, assetTag.getTagId(), "description",
548 description);
549 }
550 }
551 catch (SystemException se) {
552 _log.error(se, se);
553 }
554
555 if ((i % 5) == 0) {
556 progressTracker.setPercent((i * 10) / pageElements.size());
557 }
558 }
559 }
560
561 protected String[] readAssetTagNames(
562 long userId, WikiNode node, String content)
563 throws PortalException, SystemException {
564
565 Matcher matcher = _categoriesPattern.matcher(content);
566
567 List<String> assetTagNames = new ArrayList<String>();
568
569 while (matcher.find()) {
570 String categoryName = matcher.group(1);
571
572 categoryName = normalize(categoryName, 75);
573
574 AssetTag assetTag = null;
575
576 try {
577 assetTag = AssetTagLocalServiceUtil.getTag(
578 node.getGroupId(), categoryName);
579 }
580 catch (NoSuchTagException nste) {
581 ServiceContext serviceContext = new ServiceContext();
582
583 serviceContext.setAddGroupPermissions(true);
584 serviceContext.setAddGuestPermissions(true);
585 serviceContext.setScopeGroupId(node.getGroupId());
586
587 assetTag = AssetTagLocalServiceUtil.addTag(
588 userId, categoryName, null, serviceContext);
589 }
590
591 assetTagNames.add(assetTag.getName());
592 }
593
594 if (content.contains(_WORK_IN_PROGRESS)) {
595 assetTagNames.add(_WORK_IN_PROGRESS_TAG);
596 }
597
598 return assetTagNames.toArray(new String[assetTagNames.size()]);
599 }
600
601 protected String readParentTitle(String content) {
602 Matcher matcher = _parentPattern.matcher(content);
603
604 String redirectTitle = StringPool.BLANK;
605
606 if (matcher.find()) {
607 redirectTitle = matcher.group(1);
608
609 redirectTitle = normalizeTitle(redirectTitle);
610
611 redirectTitle += " (disambiguation)";
612 }
613
614 return redirectTitle;
615 }
616
617 protected String readRedirectTitle(String content) {
618 Matcher matcher = _redirectPattern.matcher(content);
619
620 String redirectTitle = StringPool.BLANK;
621
622 if (matcher.find()) {
623 redirectTitle = matcher.group(1);
624
625 redirectTitle = normalizeTitle(redirectTitle);
626 }
627
628 return redirectTitle;
629 }
630
631 protected List<String> readSpecialNamespaces(Element root)
632 throws ImportFilesException {
633
634 List<String> namespaces = new ArrayList<String>();
635
636 Element siteinfoElement = root.element("siteinfo");
637
638 if (siteinfoElement == null) {
639 throw new ImportFilesException("Invalid pages XML file");
640 }
641
642 Element namespacesElement = siteinfoElement.element("namespaces");
643
644 List<Element> namespaceElements = namespacesElement.elements(
645 "namespace");
646
647 for (Element namespaceElement : namespaceElements) {
648 Attribute attribute = namespaceElement.attribute("key");
649
650 String value = attribute.getValue();
651
652 if (!value.equals("0")) {
653 namespaces.add(namespaceElement.getText());
654 }
655 }
656
657 return namespaces;
658 }
659
660 protected Map<String, String> readUsersFile(InputStream usersInputStream)
661 throws IOException {
662
663 if (usersInputStream == null) {
664 return Collections.emptyMap();
665 }
666
667 Map<String, String> usersMap = new HashMap<String, String>();
668
669 UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
670 new InputStreamReader(usersInputStream));
671
672 String line = unsyncBufferedReader.readLine();
673
674 while (line != null) {
675 String[] array = StringUtil.split(line);
676
677 if ((array.length == 2) && Validator.isNotNull(array[0]) &&
678 Validator.isNotNull(array[1])) {
679
680 usersMap.put(array[0], array[1]);
681 }
682 else {
683 if (_log.isInfoEnabled()) {
684 _log.info(
685 "Ignoring line " + line +
686 " because it does not contain exactly 2 columns");
687 }
688 }
689
690 line = unsyncBufferedReader.readLine();
691 }
692
693 return usersMap;
694 }
695
696 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
697
698 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
699
700 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
701
702 private static Pattern _categoriesPattern = Pattern.compile(
703 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
704 private static Pattern _parentPattern = Pattern.compile(
705 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
706 private static Pattern _redirectPattern = Pattern.compile(
707 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
708 private static Set<String> _specialMediaWikiDirs = SetUtil.fromArray(
709 new String[] {"archive", "temp", "thumb"});
710
711 private MediaWikiToCreoleTranslator _translator =
712 new MediaWikiToCreoleTranslator();
713
714 }