001
014
015 package com.liferay.portlet.wiki.importers.mediawiki;
016
017 import com.liferay.portal.NoSuchUserException;
018 import com.liferay.portal.kernel.exception.PortalException;
019 import com.liferay.portal.kernel.exception.SystemException;
020 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
021 import com.liferay.portal.kernel.log.Log;
022 import com.liferay.portal.kernel.log.LogFactoryUtil;
023 import com.liferay.portal.kernel.util.ArrayUtil;
024 import com.liferay.portal.kernel.util.CharPool;
025 import com.liferay.portal.kernel.util.MapUtil;
026 import com.liferay.portal.kernel.util.ObjectValuePair;
027 import com.liferay.portal.kernel.util.ProgressTracker;
028 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
029 import com.liferay.portal.kernel.util.StreamUtil;
030 import com.liferay.portal.kernel.util.StringBundler;
031 import com.liferay.portal.kernel.util.StringPool;
032 import com.liferay.portal.kernel.util.StringUtil;
033 import com.liferay.portal.kernel.util.Validator;
034 import com.liferay.portal.kernel.xml.Document;
035 import com.liferay.portal.kernel.xml.DocumentException;
036 import com.liferay.portal.kernel.xml.Element;
037 import com.liferay.portal.kernel.xml.SAXReaderUtil;
038 import com.liferay.portal.kernel.zip.ZipReader;
039 import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
040 import com.liferay.portal.model.User;
041 import com.liferay.portal.service.ServiceContext;
042 import com.liferay.portal.service.UserLocalServiceUtil;
043 import com.liferay.portal.util.PropsValues;
044 import com.liferay.portlet.asset.NoSuchTagException;
045 import com.liferay.portlet.asset.model.AssetTag;
046 import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
047 import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
048 import com.liferay.portlet.asset.util.AssetUtil;
049 import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
050 import com.liferay.portlet.wiki.ImportFilesException;
051 import com.liferay.portlet.wiki.NoSuchPageException;
052 import com.liferay.portlet.wiki.importers.WikiImporter;
053 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
054 import com.liferay.portlet.wiki.model.WikiNode;
055 import com.liferay.portlet.wiki.model.WikiPage;
056 import com.liferay.portlet.wiki.model.WikiPageConstants;
057 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
058 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
059
060 import java.io.IOException;
061 import java.io.InputStream;
062 import java.io.InputStreamReader;
063
064 import java.util.ArrayList;
065 import java.util.Collections;
066 import java.util.HashMap;
067 import java.util.Iterator;
068 import java.util.List;
069 import java.util.Map;
070 import java.util.regex.Matcher;
071 import java.util.regex.Pattern;
072
073
077 public class MediaWikiImporter implements WikiImporter {
078
079 public static final String SHARED_IMAGES_CONTENT = "See attachments";
080
081 public static final String SHARED_IMAGES_TITLE = "SharedImages";
082
083 public void importPages(
084 long userId, WikiNode node, InputStream[] inputStreams,
085 Map<String, String[]> options)
086 throws PortalException {
087
088 if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
089 throw new PortalException("The pages file is mandatory");
090 }
091
092 InputStream pagesInputStream = inputStreams[0];
093 InputStream usersInputStream = inputStreams[1];
094 InputStream imagesInputStream = inputStreams[2];
095
096 try {
097 Document doc = SAXReaderUtil.read(pagesInputStream);
098
099 Map<String, String> usersMap = readUsersFile(usersInputStream);
100
101 Element root = doc.getRootElement();
102
103 List<String> specialNamespaces = readSpecialNamespaces(root);
104
105 processSpecialPages(userId, node, root, specialNamespaces);
106 processRegularPages(
107 userId, node, root, specialNamespaces, usersMap,
108 imagesInputStream, options);
109 processImages(userId, node, imagesInputStream);
110
111 moveFrontPage(userId, node, options);
112 }
113 catch (DocumentException de) {
114 throw new ImportFilesException("Invalid XML file provided");
115 }
116 catch (IOException de) {
117 throw new ImportFilesException("Error reading the files provided");
118 }
119 catch (PortalException e) {
120 throw e;
121 }
122 catch (Exception e) {
123 throw new PortalException(e);
124 }
125 }
126
127 protected long getUserId(
128 long userId, WikiNode node, String author,
129 Map<String, String> usersMap)
130 throws PortalException, SystemException {
131
132 User user = null;
133
134 String emailAddress = usersMap.get(author);
135
136 try {
137 if (Validator.isNull(emailAddress)) {
138 user = UserLocalServiceUtil.getUserByScreenName(
139 node.getCompanyId(), author.toLowerCase());
140 }
141 else {
142 user = UserLocalServiceUtil.getUserByEmailAddress(
143 node.getCompanyId(), emailAddress);
144 }
145 }
146 catch (NoSuchUserException nsue) {
147 user = UserLocalServiceUtil.getUserById(userId);
148 }
149
150 return user.getUserId();
151 }
152
153 protected void importPage(
154 long userId, String author, WikiNode node, String title,
155 String content, String summary, Map<String, String> usersMap)
156 throws PortalException {
157
158 try {
159 long authorUserId = getUserId(userId, node, author, usersMap);
160 String parentTitle = readParentTitle(content);
161 String redirectTitle = readRedirectTitle(content);
162
163 ServiceContext serviceContext = new ServiceContext();
164
165 serviceContext.setAddGroupPermissions(true);
166 serviceContext.setAddGuestPermissions(true);
167 serviceContext.setAssetTagNames(
168 readAssetTagNames(userId, node, content));
169
170 if (Validator.isNull(redirectTitle)) {
171 content = _translator.translate(content);
172 }
173 else {
174 content =
175 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
176 StringPool.DOUBLE_CLOSE_BRACKET;
177 }
178
179 WikiPage page = null;
180
181 try {
182 page = WikiPageLocalServiceUtil.getPage(
183 node.getNodeId(), title);
184 }
185 catch (NoSuchPageException nspe) {
186 page = WikiPageLocalServiceUtil.addPage(
187 authorUserId, node.getNodeId(), title,
188 WikiPageConstants.NEW, null, true, serviceContext);
189 }
190
191 WikiPageLocalServiceUtil.updatePage(
192 authorUserId, node.getNodeId(), title, page.getVersion(),
193 content, summary, true, "creole", parentTitle, redirectTitle,
194 serviceContext);
195 }
196 catch (Exception e) {
197 throw new PortalException("Error importing page " + title, e);
198 }
199 }
200
201 protected boolean isSpecialMediaWikiPage(
202 String title, List<String> specialNamespaces) {
203
204 for (String namespace: specialNamespaces) {
205 if (title.startsWith(namespace + StringPool.COLON)) {
206 return true;
207 }
208 }
209
210 return false;
211 }
212
213 protected boolean isValidImage(String[] paths, InputStream inputStream) {
214 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
215 return false;
216 }
217
218 if ((paths.length > 1) &&
219 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
220
221 return false;
222 }
223
224 String fileName = paths[paths.length - 1];
225
226 try {
227 DLStoreUtil.validate(fileName, true, inputStream);
228 }
229 catch (PortalException pe) {
230 return false;
231 }
232 catch (SystemException se) {
233 return false;
234 }
235
236 return true;
237 }
238
239 protected void moveFrontPage(
240 long userId, WikiNode node, Map<String, String[]> options) {
241
242 String frontPageTitle = MapUtil.getString(
243 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
244
245 if (Validator.isNotNull(frontPageTitle)) {
246 frontPageTitle = normalizeTitle(frontPageTitle);
247
248 try {
249 if (WikiPageLocalServiceUtil.getPagesCount(
250 node.getNodeId(), frontPageTitle, true) > 0) {
251
252 ServiceContext serviceContext = new ServiceContext();
253
254 serviceContext.setAddGroupPermissions(true);
255 serviceContext.setAddGuestPermissions(true);
256
257 WikiPageLocalServiceUtil.movePage(
258 userId, node.getNodeId(), frontPageTitle,
259 WikiPageConstants.FRONT_PAGE, false, serviceContext);
260
261 }
262 }
263 catch (Exception e) {
264 if (_log.isWarnEnabled()) {
265 StringBundler sb = new StringBundler(4);
266
267 sb.append("Could not move ");
268 sb.append(WikiPageConstants.FRONT_PAGE);
269 sb.append(" to the title provided: ");
270 sb.append(frontPageTitle);
271
272 _log.warn(sb.toString(), e);
273 }
274 }
275
276 }
277
278 }
279
280 protected String normalize(String categoryName, int length) {
281 categoryName = AssetUtil.toWord(categoryName.trim());
282
283 return StringUtil.shorten(categoryName, length);
284 }
285
286 protected String normalizeDescription(String description) {
287 description = description.replaceAll(
288 _categoriesPattern.pattern(), StringPool.BLANK);
289
290 return normalize(description, 300);
291 }
292
293 protected String normalizeTitle(String title) {
294 title = title.replaceAll(
295 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
296
297 return StringUtil.shorten(title, 75);
298 }
299
300 protected void processImages(
301 long userId, WikiNode node, InputStream imagesInputStream)
302 throws Exception {
303
304 if (imagesInputStream == null) {
305 return;
306 }
307
308 ProgressTracker progressTracker =
309 ProgressTrackerThreadLocal.getProgressTracker();
310
311 int count = 0;
312
313 ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
314 imagesInputStream);
315
316 List<String> entries = zipReader.getEntries();
317
318 int total = entries.size();
319
320 if (total > 0) {
321 try {
322 WikiPageLocalServiceUtil.getPage(
323 node.getNodeId(), SHARED_IMAGES_TITLE);
324 }
325 catch (NoSuchPageException nspe) {
326 ServiceContext serviceContext = new ServiceContext();
327
328 serviceContext.setAddGroupPermissions(true);
329 serviceContext.setAddGuestPermissions(true);
330
331 WikiPageLocalServiceUtil.addPage(
332 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
333 SHARED_IMAGES_CONTENT, null, true, serviceContext);
334 }
335 }
336
337 List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
338 new ArrayList<ObjectValuePair<String, InputStream>>();
339
340 try {
341 int percentage = 50;
342
343 for (int i = 0; i < entries.size(); i++) {
344 String entry = entries.get(i);
345
346 String key = entry;
347
348 InputStream inputStream = zipReader.getEntryAsInputStream(
349 entry);
350
351 String[] paths = StringUtil.split(key, CharPool.SLASH);
352
353 if (!isValidImage(paths, inputStream)) {
354 if (_log.isInfoEnabled()) {
355 _log.info("Ignoring " + key);
356 }
357
358 continue;
359 }
360
361 String fileName = paths[paths.length - 1].toLowerCase();
362
363 ObjectValuePair<String, InputStream> inputStreamOVP =
364 new ObjectValuePair<String, InputStream>(
365 fileName, inputStream);
366
367 inputStreamOVPs.add(inputStreamOVP);
368
369 count++;
370
371 if ((i % 5) == 0) {
372 WikiPageLocalServiceUtil.addPageAttachments(
373 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
374 inputStreamOVPs);
375
376 inputStreamOVPs.clear();
377
378 percentage = Math.min(50 + (i * 50) / total, 99);
379
380 progressTracker.updateProgress(percentage);
381 }
382 }
383
384 if (!inputStreamOVPs.isEmpty()) {
385 WikiPageLocalServiceUtil.addPageAttachments(
386 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
387 inputStreamOVPs);
388 }
389 }
390 finally {
391 for (ObjectValuePair<String, InputStream> inputStreamOVP :
392 inputStreamOVPs) {
393
394 InputStream inputStream = inputStreamOVP.getValue();
395
396 StreamUtil.cleanUp(inputStream);
397 }
398 }
399
400 zipReader.close();
401
402 if (_log.isInfoEnabled()) {
403 _log.info("Imported " + count + " images into " + node.getName());
404 }
405 }
406
407 protected void processRegularPages(
408 long userId, WikiNode node, Element root,
409 List<String> specialNamespaces, Map<String, String> usersMap,
410 InputStream imagesInputStream, Map<String, String[]> options) {
411
412 boolean importLatestVersion = MapUtil.getBoolean(
413 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
414
415 ProgressTracker progressTracker =
416 ProgressTrackerThreadLocal.getProgressTracker();
417
418 int count = 0;
419
420 List<Element> pages = root.elements("page");
421
422 int total = pages.size();
423
424 Iterator<Element> itr = root.elements("page").iterator();
425
426 int percentage = 10;
427 int maxPercentage = 50;
428
429 if (imagesInputStream == null) {
430 maxPercentage = 99;
431 }
432
433 int percentageRange = maxPercentage - percentage;
434
435 for (int i = 0; itr.hasNext(); i++) {
436 Element pageEl = itr.next();
437
438 String title = pageEl.elementText("title");
439
440 title = normalizeTitle(title);
441
442 percentage = Math.min(
443 10 + (i * percentageRange) / total, maxPercentage);
444
445 progressTracker.updateProgress(percentage);
446
447 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
448 continue;
449 }
450
451 List<Element> revisionEls = pageEl.elements("revision");
452
453 if (importLatestVersion) {
454 Element lastRevisionEl = revisionEls.get(
455 revisionEls.size() - 1);
456
457 revisionEls = new ArrayList<Element>();
458
459 revisionEls.add(lastRevisionEl);
460 }
461
462 for (Element curRevisionEl : revisionEls) {
463 String author = curRevisionEl.element(
464 "contributor").elementText("username");
465 String content = curRevisionEl.elementText("text");
466 String summary = curRevisionEl.elementText("comment");
467
468 try {
469 importPage(
470 userId, author, node, title, content, summary,
471 usersMap);
472 }
473 catch (Exception e) {
474 if (_log.isWarnEnabled()) {
475 StringBundler sb = new StringBundler(3);
476
477 sb.append("Page with title ");
478 sb.append(title);
479 sb.append(" could not be imported");
480
481 _log.warn(sb.toString(), e);
482 }
483 }
484 }
485
486 count++;
487 }
488
489 if (_log.isInfoEnabled()) {
490 _log.info("Imported " + count + " pages into " + node.getName());
491 }
492 }
493
494 protected void processSpecialPages(
495 long userId, WikiNode node, Element root,
496 List<String> specialNamespaces)
497 throws PortalException {
498
499 ProgressTracker progressTracker =
500 ProgressTrackerThreadLocal.getProgressTracker();
501
502 List<Element> pages = root.elements("page");
503
504 int total = pages.size();
505
506 Iterator<Element> itr = pages.iterator();
507
508 for (int i = 0; itr.hasNext(); i++) {
509 Element page = itr.next();
510
511 String title = page.elementText("title");
512
513 if (!title.startsWith("Category:")) {
514 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
515 root.remove(page);
516 }
517
518 continue;
519 }
520
521 String categoryName = title.substring("Category:".length());
522
523 categoryName = normalize(categoryName, 75);
524
525 String description = page.element("revision").elementText("text");
526
527 description = normalizeDescription(description);
528
529 try {
530 AssetTag assetTag = null;
531
532 try {
533 assetTag = AssetTagLocalServiceUtil.getTag(
534 node.getCompanyId(), categoryName);
535 }
536 catch (NoSuchTagException nste) {
537 ServiceContext serviceContext = new ServiceContext();
538
539 serviceContext.setAddGroupPermissions(true);
540 serviceContext.setAddGuestPermissions(true);
541 serviceContext.setScopeGroupId(node.getGroupId());
542
543 assetTag = AssetTagLocalServiceUtil.addTag(
544 userId, categoryName, null, serviceContext);
545 }
546
547 if (Validator.isNotNull(description)) {
548 AssetTagPropertyLocalServiceUtil.addTagProperty(
549 userId, assetTag.getTagId(), "description",
550 description);
551 }
552 }
553 catch (SystemException se) {
554 _log.error(se, se);
555 }
556
557 if ((i % 5) == 0) {
558 progressTracker.updateProgress((i * 10) / total);
559 }
560 }
561 }
562
563 protected String[] readAssetTagNames(
564 long userId, WikiNode node, String content)
565 throws PortalException, SystemException {
566
567 Matcher matcher = _categoriesPattern.matcher(content);
568
569 List<String> assetTagNames = new ArrayList<String>();
570
571 while (matcher.find()) {
572 String categoryName = matcher.group(1);
573
574 categoryName = normalize(categoryName, 75);
575
576 AssetTag assetTag = null;
577
578 try {
579 assetTag = AssetTagLocalServiceUtil.getTag(
580 node.getGroupId(), categoryName);
581 }
582 catch (NoSuchTagException nste) {
583 ServiceContext serviceContext = new ServiceContext();
584
585 serviceContext.setAddGroupPermissions(true);
586 serviceContext.setAddGuestPermissions(true);
587 serviceContext.setScopeGroupId(node.getGroupId());
588
589 assetTag = AssetTagLocalServiceUtil.addTag(
590 userId, categoryName, null, serviceContext);
591 }
592
593 assetTagNames.add(assetTag.getName());
594 }
595
596 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
597 assetTagNames.add(_WORK_IN_PROGRESS_TAG);
598 }
599
600 return assetTagNames.toArray(new String[assetTagNames.size()]);
601 }
602
603 protected String readParentTitle(String content) {
604 Matcher matcher = _parentPattern.matcher(content);
605
606 String redirectTitle = StringPool.BLANK;
607
608 if (matcher.find()) {
609 redirectTitle = matcher.group(1);
610
611 redirectTitle = normalizeTitle(redirectTitle);
612
613 redirectTitle += " (disambiguation)";
614 }
615
616 return redirectTitle;
617 }
618 protected String readRedirectTitle(String content) {
619 Matcher matcher = _redirectPattern.matcher(content);
620
621 String redirectTitle = StringPool.BLANK;
622
623 if (matcher.find()) {
624 redirectTitle = matcher.group(1);
625
626 redirectTitle = normalizeTitle(redirectTitle);
627 }
628
629 return redirectTitle;
630 }
631 protected List<String> readSpecialNamespaces(Element root)
632 throws ImportFilesException {
633
634 List<String> namespaces = new ArrayList<String>();
635
636 Element siteinfoEl = root.element("siteinfo");
637
638 if (siteinfoEl == null) {
639 throw new ImportFilesException("Invalid pages XML file");
640 }
641
642 Iterator<Element> itr = siteinfoEl.element(
643 "namespaces").elements("namespace").iterator();
644
645 while (itr.hasNext()) {
646 Element namespace = itr.next();
647
648 if (!namespace.attribute("key").getData().equals("0")) {
649 namespaces.add(namespace.getText());
650 }
651 }
652
653 return namespaces;
654 }
655
656 protected Map<String, String> readUsersFile(InputStream usersInputStream)
657 throws IOException {
658
659 if (usersInputStream == null) {
660 return Collections.emptyMap();
661 }
662
663 Map<String, String> usersMap = new HashMap<String, String>();
664
665 UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
666 new InputStreamReader(usersInputStream));
667
668 String line = unsyncBufferedReader.readLine();
669
670 while (line != null) {
671 String[] array = StringUtil.split(line);
672
673 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
674 (Validator.isNotNull(array[1]))) {
675
676 usersMap.put(array[0], array[1]);
677 }
678 else {
679 if (_log.isInfoEnabled()) {
680 _log.info(
681 "Ignoring line " + line +
682 " because it does not contain exactly 2 columns");
683 }
684 }
685
686 line = unsyncBufferedReader.readLine();
687 }
688
689 return usersMap;
690 }
691
692 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
693 "thumb", "temp", "archive"
694 };
695
696 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
697
698 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
699
700 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
701
702 private static Pattern _categoriesPattern = Pattern.compile(
703 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
704 private static Pattern _parentPattern = Pattern.compile(
705 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
706 private static Pattern _redirectPattern = Pattern.compile(
707 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
708
709 private MediaWikiToCreoleTranslator _translator =
710 new MediaWikiToCreoleTranslator();
711
712 }