1
22
23 package com.liferay.portlet.wiki.importers.mediawiki;
24
25 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26 import com.liferay.portal.NoSuchUserException;
27 import com.liferay.portal.PortalException;
28 import com.liferay.portal.SystemException;
29 import com.liferay.portal.kernel.log.Log;
30 import com.liferay.portal.kernel.log.LogFactoryUtil;
31 import com.liferay.portal.kernel.util.ArrayUtil;
32 import com.liferay.portal.kernel.util.MapUtil;
33 import com.liferay.portal.kernel.util.ObjectValuePair;
34 import com.liferay.portal.kernel.util.ProgressTracker;
35 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
36 import com.liferay.portal.kernel.util.StringPool;
37 import com.liferay.portal.kernel.util.StringUtil;
38 import com.liferay.portal.kernel.util.Validator;
39 import com.liferay.portal.kernel.xml.Document;
40 import com.liferay.portal.kernel.xml.DocumentException;
41 import com.liferay.portal.kernel.xml.Element;
42 import com.liferay.portal.kernel.xml.SAXReaderUtil;
43 import com.liferay.portal.kernel.zip.ZipReader;
44 import com.liferay.portal.model.User;
45 import com.liferay.portal.service.ServiceContext;
46 import com.liferay.portal.service.UserLocalServiceUtil;
47 import com.liferay.portal.util.PropsValues;
48 import com.liferay.portlet.tags.NoSuchEntryException;
49 import com.liferay.portlet.tags.model.TagsEntry;
50 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
51 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
52 import com.liferay.portlet.tags.util.TagsUtil;
53 import com.liferay.portlet.wiki.ImportFilesException;
54 import com.liferay.portlet.wiki.NoSuchPageException;
55 import com.liferay.portlet.wiki.importers.WikiImporter;
56 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
57 import com.liferay.portlet.wiki.model.WikiNode;
58 import com.liferay.portlet.wiki.model.WikiPage;
59 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
60 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
61 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
62
63 import java.io.BufferedReader;
64 import java.io.File;
65 import java.io.FileReader;
66 import java.io.IOException;
67
68 import java.util.ArrayList;
69 import java.util.Collections;
70 import java.util.HashMap;
71 import java.util.Iterator;
72 import java.util.List;
73 import java.util.Map;
74 import java.util.regex.Matcher;
75 import java.util.regex.Pattern;
76
77
83 public class MediaWikiImporter implements WikiImporter {
84
85 public static final String SHARED_IMAGES_CONTENT = "See attachments";
86
87 public static final String SHARED_IMAGES_TITLE = "SharedImages";
88
89 public void importPages(
90 long userId, WikiNode node, File[] files,
91 Map<String, String[]> options)
92 throws PortalException {
93
94 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
95 throw new PortalException("The pages file is mandatory");
96 }
97
98 File pagesFile = files[0];
99 File usersFile = files[1];
100 File imagesFile = files[2];
101
102 try {
103 Document doc = SAXReaderUtil.read(pagesFile);
104
105 Map<String, String> usersMap = readUsersFile(usersFile);
106
107 Element root = doc.getRootElement();
108
109 List<String> specialNamespaces = readSpecialNamespaces(root);
110
111 processSpecialPages(userId, node, root, specialNamespaces);
112 processRegularPages(
113 userId, node, root, specialNamespaces, usersMap, imagesFile,
114 options);
115 processImages(userId, node, imagesFile);
116
117 moveFrontPage(userId, node, options);
118 }
119 catch (DocumentException de) {
120 throw new ImportFilesException("Invalid XML file provided");
121 }
122 catch (IOException de) {
123 throw new ImportFilesException("Error reading the files provided");
124 }
125 catch (PortalException e) {
126 throw e;
127 }
128 catch (Exception e) {
129 throw new PortalException(e);
130 }
131 }
132
133 protected long getUserId(
134 long userId, WikiNode node, String author,
135 Map<String, String> usersMap)
136 throws PortalException, SystemException {
137
138 User user = null;
139
140 String emailAddress = usersMap.get(author);
141
142 try {
143 if (Validator.isNull(emailAddress)) {
144 user = UserLocalServiceUtil.getUserByScreenName(
145 node.getCompanyId(), author.toLowerCase());
146 }
147 else {
148 user = UserLocalServiceUtil.getUserByEmailAddress(
149 node.getCompanyId(), emailAddress);
150 }
151 }
152 catch (NoSuchUserException nsue) {
153 user = UserLocalServiceUtil.getUserById(userId);
154 }
155
156 return user.getUserId();
157 }
158
159 protected void importPage(
160 long userId, String author, WikiNode node, String title,
161 String content, String summary, Map<String, String> usersMap)
162 throws PortalException {
163
164 try {
165 long authorUserId = getUserId(userId, node, author, usersMap);
166 String parentTitle = readParentTitle(content);
167 String redirectTitle = readRedirectTitle(content);
168
169 ServiceContext serviceContext = new ServiceContext();
170
171 serviceContext.setAddCommunityPermissions(true);
172 serviceContext.setAddGuestPermissions(true);
173 serviceContext.setTagsEntries(
174 readTagsEntries(userId, node, content));
175
176 if (Validator.isNull(redirectTitle)) {
177 content = _translator.translate(content);
178 }
179 else {
180 content =
181 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
182 StringPool.DOUBLE_CLOSE_BRACKET;
183 }
184
185 WikiPage page = null;
186
187 try {
188 page = WikiPageLocalServiceUtil.getPage(
189 node.getNodeId(), title);
190 }
191 catch (NoSuchPageException nspe) {
192 page = WikiPageLocalServiceUtil.addPage(
193 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
194 null, true, serviceContext);
195 }
196
197 WikiPageLocalServiceUtil.updatePage(
198 authorUserId, node.getNodeId(), title, page.getVersion(),
199 content, summary, true, "creole", parentTitle, redirectTitle,
200 serviceContext);
201 }
202 catch (Exception e) {
203 throw new PortalException("Error importing page " + title, e);
204 }
205 }
206
207 protected boolean isSpecialMediaWikiPage(
208 String title, List<String> specialNamespaces) {
209
210 for (String namespace: specialNamespaces) {
211 if (title.startsWith(namespace + StringPool.COLON)) {
212 return true;
213 }
214 }
215
216 return false;
217 }
218
219 protected boolean isValidImage(String[] paths, byte[] bytes) {
220 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
221 return false;
222 }
223
224 if ((paths.length > 1) &&
225 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
226
227 return false;
228 }
229
230 String fileName = paths[paths.length - 1];
231
232 try {
233 DLLocalServiceUtil.validate(fileName, bytes);
234 }
235 catch (PortalException pe) {
236 return false;
237 }
238 catch (SystemException se) {
239 return false;
240 }
241
242 return true;
243 }
244
245 protected void moveFrontPage(
246 long userId, WikiNode node, Map<String, String[]> options) {
247
248 String frontPageTitle = MapUtil.getString(
249 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
250
251 if (Validator.isNotNull(frontPageTitle)) {
252 frontPageTitle = normalizeTitle(frontPageTitle);
253
254 try {
255 if (WikiPageLocalServiceUtil.getPagesCount(
256 node.getNodeId(), frontPageTitle, true) > 0) {
257
258 ServiceContext serviceContext = new ServiceContext();
259
260 serviceContext.setAddCommunityPermissions(true);
261 serviceContext.setAddGuestPermissions(true);
262
263 WikiPageLocalServiceUtil.movePage(
264 userId, node.getNodeId(), frontPageTitle,
265 WikiPageImpl.FRONT_PAGE, false, serviceContext);
266
267 }
268 }
269 catch (Exception e) {
270 if (_log.isWarnEnabled()) {
271 StringBuilder sb = new StringBuilder();
272
273 sb.append("Could not move ");
274 sb.append(WikiPageImpl.FRONT_PAGE);
275 sb.append(" to the title provided: ");
276 sb.append(frontPageTitle);
277
278 _log.warn(sb.toString(), e);
279 }
280 }
281
282 }
283
284 }
285
286 protected String normalize(String categoryName, int length) {
287 categoryName = TagsUtil.toWord(categoryName.trim());
288
289 return StringUtil.shorten(categoryName, length);
290 }
291
292 protected String normalizeDescription(String description) {
293 description = description.replaceAll(
294 _categoriesPattern.pattern(), StringPool.BLANK);
295
296 return normalize(description, 300);
297 }
298
299 protected String normalizeTitle(String title) {
300 title = title.replaceAll(
301 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
302
303 return StringUtil.shorten(title, 75);
304 }
305
306 private void processImages(long userId, WikiNode node, File imagesFile)
307 throws Exception {
308
309 if ((imagesFile == null) || (!imagesFile.exists())) {
310 return;
311 }
312
313 ProgressTracker progressTracker =
314 ProgressTrackerThreadLocal.getProgressTracker();
315
316 int count = 0;
317
318 ZipReader zipReader = new ZipReader(imagesFile);
319
320 Map<String, byte[]> entries = zipReader.getEntries();
321
322 int total = entries.size();
323
324 if (total > 0) {
325 try {
326 WikiPageLocalServiceUtil.getPage(
327 node.getNodeId(), SHARED_IMAGES_TITLE);
328 }
329 catch (NoSuchPageException nspe) {
330 ServiceContext serviceContext = new ServiceContext();
331
332 serviceContext.setAddCommunityPermissions(true);
333 serviceContext.setAddGuestPermissions(true);
334
335 WikiPageLocalServiceUtil.addPage(
336 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
337 SHARED_IMAGES_CONTENT, null, true, serviceContext);
338 }
339 }
340
341 List<ObjectValuePair<String, byte[]>> attachments =
342 new ArrayList<ObjectValuePair<String, byte[]>>();
343
344 Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
345
346 int percentage = 50;
347
348 for (int i = 0; itr.hasNext(); i++) {
349 Map.Entry<String, byte[]> entry = itr.next();
350
351 String key = entry.getKey();
352 byte[] value = entry.getValue();
353
354 if (key.endsWith(StringPool.SLASH)) {
355 if (_log.isInfoEnabled()) {
356 _log.info("Ignoring " + key);
357 }
358
359 continue;
360 }
361
362 String[] paths = StringUtil.split(key, StringPool.SLASH);
363
364 if (!isValidImage(paths, value)) {
365 if (_log.isInfoEnabled()) {
366 _log.info("Ignoring " + key);
367 }
368
369 continue;
370 }
371
372 String fileName = paths[paths.length - 1].toLowerCase();
373
374 attachments.add(
375 new ObjectValuePair<String, byte[]>(fileName, value));
376
377 count++;
378
379 if ((i % 5) == 0) {
380 WikiPageLocalServiceUtil.addPageAttachments(
381 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
382
383 attachments.clear();
384
385 percentage = Math.min(50 + (i * 50) / total, 99);
386
387 progressTracker.updateProgress(percentage);
388 }
389 }
390
391 if (!attachments.isEmpty()) {
392 WikiPageLocalServiceUtil.addPageAttachments(
393 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
394 }
395
396 if (_log.isInfoEnabled()) {
397 _log.info("Imported " + count + " images into " + node.getName());
398 }
399 }
400
401 protected void processRegularPages(
402 long userId, WikiNode node, Element root,
403 List<String> specialNamespaces, Map<String, String> usersMap,
404 File imagesFile, Map<String, String[]> options) {
405
406 boolean importLatestVersion = MapUtil.getBoolean(
407 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
408
409 ProgressTracker progressTracker =
410 ProgressTrackerThreadLocal.getProgressTracker();
411
412 int count = 0;
413
414 List<Element> pages = root.elements("page");
415
416 int total = pages.size();
417
418 Iterator<Element> itr = root.elements("page").iterator();
419
420 int percentage = 10;
421 int maxPercentage = 50;
422
423 if ((imagesFile == null) || (!imagesFile.exists())) {
424 maxPercentage = 99;
425 }
426
427 int percentageRange = maxPercentage - percentage;
428
429 for (int i = 0; itr.hasNext(); i++) {
430 Element pageEl = itr.next();
431
432 String title = pageEl.elementText("title");
433
434 title = normalizeTitle(title);
435
436 percentage = Math.min(
437 10 + (i * percentageRange) / total, maxPercentage);
438
439 progressTracker.updateProgress(percentage);
440
441 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
442 continue;
443 }
444
445 List<Element> revisionEls = pageEl.elements("revision");
446
447 if (importLatestVersion) {
448 Element lastRevisionEl = revisionEls.get(
449 revisionEls.size() - 1);
450
451 revisionEls = new ArrayList<Element>();
452
453 revisionEls.add(lastRevisionEl);
454 }
455
456 for (Element curRevisionEl : revisionEls) {
457 String author = curRevisionEl.element(
458 "contributor").elementText("username");
459 String content = curRevisionEl.elementText("text");
460 String summary = curRevisionEl.elementText("comment");
461
462 try {
463 importPage(
464 userId, author, node, title, content, summary,
465 usersMap);
466 }
467 catch (Exception e) {
468 if (_log.isWarnEnabled()) {
469 StringBuilder sb = new StringBuilder();
470
471 sb.append("Page with title ");
472 sb.append(title);
473 sb.append(" could not be imported");
474
475 _log.warn(sb.toString(), e);
476 }
477 }
478 }
479
480 count++;
481 }
482
483 if (_log.isInfoEnabled()) {
484 _log.info("Imported " + count + " pages into " + node.getName());
485 }
486 }
487
488 protected void processSpecialPages(
489 long userId, WikiNode node, Element root,
490 List<String> specialNamespaces)
491 throws PortalException {
492
493 ProgressTracker progressTracker =
494 ProgressTrackerThreadLocal.getProgressTracker();
495
496 List<Element> pages = root.elements("page");
497
498 int total = pages.size();
499
500 Iterator<Element> itr = pages.iterator();
501
502 for (int i = 0; itr.hasNext(); i++) {
503 Element page = itr.next();
504
505 String title = page.elementText("title");
506
507 if (!title.startsWith("Category:")) {
508 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
509 root.remove(page);
510 }
511
512 continue;
513 }
514
515 String categoryName = title.substring("Category:".length());
516
517 categoryName = normalize(categoryName, 75);
518
519 String description = page.element("revision").elementText("text");
520
521 description = normalizeDescription(description);
522
523 try {
524 TagsEntry tagsEntry = null;
525
526 try {
527 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
528 node.getCompanyId(), categoryName);
529 }
530 catch (NoSuchEntryException nsee) {
531 ServiceContext serviceContext = new ServiceContext();
532
533 serviceContext.setAddCommunityPermissions(true);
534 serviceContext.setAddGuestPermissions(true);
535 serviceContext.setScopeGroupId(node.getGroupId());
536
537 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
538 userId, null, categoryName, null, null, serviceContext);
539 }
540
541 if (Validator.isNotNull(description)) {
542 TagsPropertyLocalServiceUtil.addProperty(
543 userId, tagsEntry.getEntryId(), "description",
544 description);
545 }
546 }
547 catch (SystemException se) {
548 _log.error(se, se);
549 }
550
551 if ((i % 5) == 0) {
552 progressTracker.updateProgress((i * 10) / total);
553 }
554 }
555 }
556
557 protected String readParentTitle(String content) {
558 Matcher matcher = _parentPattern.matcher(content);
559
560 String redirectTitle = StringPool.BLANK;
561
562 if (matcher.find()) {
563 redirectTitle = matcher.group(1);
564
565 redirectTitle = normalizeTitle(redirectTitle);
566
567 redirectTitle += " (disambiguation)";
568 }
569
570 return redirectTitle;
571 }
572
573 protected String readRedirectTitle(String content) {
574 Matcher matcher = _redirectPattern.matcher(content);
575
576 String redirectTitle = StringPool.BLANK;
577
578 if (matcher.find()) {
579 redirectTitle = matcher.group(1);
580
581 redirectTitle = normalizeTitle(redirectTitle);
582 }
583
584 return redirectTitle;
585 }
586
587 protected List<String> readSpecialNamespaces(Element root)
588 throws ImportFilesException {
589
590 List<String> namespaces = new ArrayList<String>();
591
592 Element siteinfoEl = root.element("siteinfo");
593
594 if (siteinfoEl == null) {
595 throw new ImportFilesException("Invalid pages XML file");
596 }
597
598 Iterator<Element> itr = siteinfoEl.element(
599 "namespaces").elements("namespace").iterator();
600
601 while (itr.hasNext()) {
602 Element namespace = itr.next();
603
604 if (!namespace.attribute("key").getData().equals("0")) {
605 namespaces.add(namespace.getText());
606 }
607 }
608
609 return namespaces;
610 }
611
612 protected String[] readTagsEntries(
613 long userId, WikiNode node, String content)
614 throws PortalException, SystemException {
615
616 Matcher matcher = _categoriesPattern.matcher(content);
617
618 List<String> tagsEntries = new ArrayList<String>();
619
620 while (matcher.find()) {
621 String categoryName = matcher.group(1);
622
623 categoryName = normalize(categoryName, 75);
624
625 TagsEntry tagsEntry = null;
626
627 try {
628 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
629 node.getCompanyId(), categoryName);
630 }
631 catch (NoSuchEntryException nsee) {
632 ServiceContext serviceContext = new ServiceContext();
633
634 serviceContext.setAddCommunityPermissions(true);
635 serviceContext.setAddGuestPermissions(true);
636 serviceContext.setScopeGroupId(node.getGroupId());
637
638 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
639 userId, null, categoryName, null, null, serviceContext);
640 }
641
642 tagsEntries.add(tagsEntry.getName());
643 }
644
645 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
646 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
647 }
648
649 return tagsEntries.toArray(new String[tagsEntries.size()]);
650 }
651
652 protected Map<String, String> readUsersFile(File usersFile)
653 throws IOException {
654
655 if ((usersFile == null) || (!usersFile.exists())) {
656 return Collections.EMPTY_MAP;
657 }
658
659 Map<String, String> usersMap = new HashMap<String, String>();
660
661 BufferedReader reader = new BufferedReader(new FileReader(usersFile));
662
663 String line = reader.readLine();
664
665 while (line != null) {
666 String[] array = StringUtil.split(line);
667
668 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
669 (Validator.isNotNull(array[1]))) {
670
671 usersMap.put(array[0], array[1]);
672 }
673 else {
674 if (_log.isInfoEnabled()) {
675 _log.info(
676 "Ignoring line " + line +
677 " because it does not contain exactly 2 columns");
678 }
679 }
680
681 line = reader.readLine();
682 }
683
684 return usersMap;
685 }
686
687 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
688 "thumb", "temp", "archive"
689 };
690
691 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
692
693 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
694
695 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
696
697 private static Pattern _categoriesPattern = Pattern.compile(
698 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
699 private static Pattern _parentPattern = Pattern.compile(
700 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
701 private static Pattern _redirectPattern = Pattern.compile(
702 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
703
704 private MediaWikiToCreoleTranslator _translator =
705 new MediaWikiToCreoleTranslator();
706
707 }