Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
D
django-wiki
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
OpenEdx
django-wiki
Commits
9c5e6b00
Commit
9c5e6b00
authored
Jun 21, 2014
by
Maximilien Cuony
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Better import: Expend templates, better url handeling and internal links
parent
c4fce27b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
55 additions
and
5 deletions
+55
-5
wiki/plugins/mediawikiimport/management/commands/mediawiki_import.py
+55
-5
No files found.
wiki/plugins/mediawikiimport/management/commands/mediawiki_import.py
View file @
9c5e6b00
...
...
@@ -8,12 +8,24 @@ from django.contrib.sites.models import Site
from
django.contrib.auth
import
get_user_model
from
optparse
import
make_option
import
string
from
django.template.defaultfilters
import
slugify
from
django.template.defaultfilters
import
striptags
import
urllib
def
only_printable
(
s
):
return
filter
(
lambda
x
:
x
in
string
.
printable
,
s
)
class
Command
(
BaseCommand
):
help
=
'Import everything from a MediaWiki'
args
=
'ApiUrl Username [Password]'
articles_worked_on
=
[]
articles_imported
=
[]
matching_old_link_new_link
=
{}
option_list
=
BaseCommand
.
option_list
+
(
make_option
(
'--user-matching'
,
action
=
'append'
,
...
...
@@ -62,11 +74,26 @@ class Command(BaseCommand):
import
pypandoc
print
"Working on
%
s (
%
s)"
%
(
page
.
title
,
page
.
urltitle
)
# Filter titles, to avoid stranges charaters.
title
=
only_printable
(
page
.
title
)
urltitle
=
slugify
(
only_printable
(
urllib
.
unquote
(
page
.
urltitle
))[:
50
])
added
=
1
while
urltitle
in
self
.
articles_worked_on
:
title
=
only_printable
(
page
.
title
)
+
" "
+
str
(
added
)
urltitle
=
only_printable
(
slugify
((
urllib
.
unquote
(
page
.
urltitle
))[:
47
]
+
" "
+
str
(
added
)))
added
+=
1
self
.
articles_worked_on
.
append
(
urltitle
)
print
"Working on
%
s (
%
s)"
%
(
title
,
urltitle
)
# Check if the URL path already exists
try
:
urlp
=
URLPath
.
objects
.
get
(
slug
=
page
.
urltitle
[:
50
])
urlp
=
URLPath
.
objects
.
get
(
slug
=
urltitle
)
self
.
matching_old_link_new_link
[
page
.
title
]
=
urlp
.
article
.
get_absolute_url
()
if
not
replace_existing
:
print
"
\t
Already existing, skipping..."
...
...
@@ -81,7 +108,7 @@ class Command(BaseCommand):
# Create article
article
=
Article
()
for
history_page
in
page
.
getHistory
()[::
-
1
]:
for
history_page
in
page
.
getHistory
()[
-
2
:][
::
-
1
]:
try
:
if
history_page
[
'user'
]
in
user_matching
:
...
...
@@ -94,7 +121,7 @@ class Command(BaseCommand):
article_revision
=
ArticleRevision
()
article_revision
.
content
=
pypandoc
.
convert
(
history_page
[
'*'
],
'md'
,
'mediawiki'
)
article_revision
.
title
=
page
.
title
article_revision
.
title
=
title
article_revision
.
user
=
user
article_revision
.
owner
=
user
...
...
@@ -103,11 +130,32 @@ class Command(BaseCommand):
article_revision
.
created
=
history_page
[
'timestamp'
]
article_revision
.
save
()
# Updated lastest content WITH expended templates
# TODO ? Do that for history as well ?
article_revision
.
content
=
pypandoc
.
convert
(
striptags
(
page
.
getWikiText
(
True
,
True
)
.
decode
(
'utf-8'
))
.
replace
(
'__NOEDITSECTION__'
,
''
)
.
replace
(
'__NOTOC__'
,
''
),
'md'
,
'mediawiki'
)
article_revision
.
save
()
article
.
save
()
upath
=
URLPath
.
objects
.
create
(
site
=
current_site
,
parent
=
url_root
,
slug
=
page
.
urltitle
[:
50
]
,
article
=
article
)
upath
=
URLPath
.
objects
.
create
(
site
=
current_site
,
parent
=
url_root
,
slug
=
urltitle
,
article
=
article
)
article
.
add_object_relation
(
upath
)
self
.
matching_old_link_new_link
[
page
.
title
]
=
upath
.
article
.
get_absolute_url
()
self
.
articles_imported
.
append
((
article
,
article_revision
))
def
update_links
(
self
):
"""Update link in imported articles"""
# TODO: nsquare is bad
for
(
article
,
article_revision
)
in
self
.
articles_imported
:
print
"Updating links of
%
s"
%
(
article_revision
.
title
,
)
for
id_from
,
id_to
in
self
.
matching_old_link_new_link
.
iteritems
():
print
"Replacing (
%
s
\"
wikilink
\"
) with (
%
s)"
%
(
id_from
,
id_to
)
article_revision
.
content
=
article_revision
.
content
.
replace
(
"(
%
s
\"
wikilink
\"
)"
%
(
id_from
,
),
"(
%
s)"
%
(
id_to
,))
article_revision
.
save
()
def
handle
(
self
,
*
args
,
**
options
):
try
:
...
...
@@ -143,3 +191,5 @@ class Command(BaseCommand):
for
page
in
pages
:
self
.
import_page
(
wikitools
.
api
,
site
,
page
,
current_site
,
url_root
,
user_matching
,
options
[
'replace_existing'
])
self
.
update_links
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment