Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-platform
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-platform
Commits
37a4d3ab
Commit
37a4d3ab
authored
Aug 16, 2012
by
Victor Shnayder
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
hackish cleanup script
parent
395b33dd
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
112 additions
and
0 deletions
+112
-0
common/xml_cleanup.py
+112
-0
No files found.
common/xml_cleanup.py
0 → 100755
View file @
37a4d3ab
#!/usr/bin/env python
"""
Victor's xml cleanup script. A big pile of useful hacks. Do not use
without carefully reading the code and deciding that this is what you want.
In particular, the remove-meta option is only intended to be used after pulling out a policy
using the metadata_to_json management command.
"""
import
os
,
fnmatch
,
re
,
sys
from
lxml
import
etree
from
collections
import
defaultdict
INVALID_CHARS
=
re
.
compile
(
r"[^\w.-]"
)
def
clean
(
value
):
"""
Return value, made into a form legal for locations
"""
return
re
.
sub
(
'_+'
,
'_'
,
INVALID_CHARS
.
sub
(
'_'
,
value
))
# category -> set of url_names for that category that we've already seen
used_names
=
defaultdict
(
set
)
def
clean_unique
(
category
,
name
):
cleaned
=
clean
(
name
)
if
cleaned
not
in
used_names
[
category
]:
used_names
[
category
]
.
add
(
cleaned
)
return
cleaned
x
=
1
while
cleaned
+
str
(
x
)
in
used_names
[
category
]:
x
+=
1
# Found one!
cleaned
=
cleaned
+
str
(
x
)
used_names
[
category
]
.
add
(
cleaned
)
return
cleaned
def
cleanup
(
filepath
,
remove_meta
):
# Keys that are exported to the policy file, and so
# can be removed from the xml afterward
to_remove
=
(
'format'
,
'display_name'
,
'graceperiod'
,
'showanswer'
,
'rerandomize'
,
'start'
,
'due'
,
'graded'
,
'hide_from_toc'
,
'ispublic'
,
'xqa_key'
)
try
:
print
"Cleaning {}"
.
format
(
filepath
)
with
open
(
filepath
)
as
f
:
parser
=
etree
.
XMLParser
(
remove_comments
=
False
)
xml
=
etree
.
parse
(
filepath
,
parser
=
parser
)
except
:
print
"Error parsing file {}"
.
format
(
filepath
)
return
for
node
in
xml
.
iter
(
tag
=
etree
.
Element
):
attrs
=
node
.
attrib
if
'url_name'
in
attrs
:
used_names
[
node
.
tag
]
.
add
(
attrs
[
'url_name'
])
if
'name'
in
attrs
:
# Replace name with an identical display_name, and a unique url_name
name
=
attrs
[
'name'
]
attrs
[
'display_name'
]
=
name
attrs
[
'url_name'
]
=
clean_unique
(
node
.
tag
,
name
)
del
attrs
[
'name'
]
if
'url_name'
in
attrs
and
'slug'
in
attrs
:
print
"WARNING: {} has both slug and url_name"
if
(
'url_name'
in
attrs
and
'filename'
in
attrs
and
len
(
attrs
)
==
2
and
attrs
[
'url_name'
]
==
attrs
[
'filename'
]):
# This is a pointer tag in disguise. Get rid of the filename.
print
'turning {}.{} into a pointer tag'
.
format
(
node
.
tag
,
attrs
[
'url_name'
])
del
attrs
[
'filename'
]
if
remove_meta
:
for
attr
in
to_remove
:
if
attr
in
attrs
:
del
attrs
[
attr
]
with
open
(
filepath
,
"w"
)
as
f
:
f
.
write
(
etree
.
tostring
(
xml
))
def
find_replace
(
directory
,
filePattern
,
remove_meta
):
for
path
,
dirs
,
files
in
os
.
walk
(
os
.
path
.
abspath
(
directory
)):
for
filename
in
fnmatch
.
filter
(
files
,
filePattern
):
filepath
=
os
.
path
.
join
(
path
,
filename
)
cleanup
(
filepath
,
remove_meta
)
def
main
(
args
):
usage
=
"xml_cleanup [dir] [remove-meta]"
n
=
len
(
args
)
if
n
<
1
or
n
>
2
or
(
n
==
2
and
args
[
1
]
!=
'remove-meta'
):
print
usage
return
remove_meta
=
False
if
n
==
2
:
remove_meta
=
True
find_replace
(
args
[
0
],
'*.xml'
,
remove_meta
)
if
__name__
==
'__main__'
:
main
(
sys
.
argv
[
1
:])
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment