Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Ronny Eichler
dataman
Commits
f5541bbb
Commit
f5541bbb
authored
Aug 21, 2015
by
Ronny Eichler
Browse files
Basic directory statistics
parent
e295406e
Changes
3
Hide whitespace changes
Inline
Side-by-side
dataman/dataman.py
0 → 100755
View file @
f5541bbb
#!/usr/bin/env python
# ? is short for builtin help
# ! allows shelling out
import
sys
import
cmd
import
tools
class
DataMan
(
cmd
.
Cmd
):
"""Command line tool for quick data documentation."""
prompt
=
"dm> "
intro
=
"Data Manager
\n
--Ronny's way of avoiding having to stare at spreadsheets."
def
preloop
(
self
):
pass
# process command line arguments etc.
def
do_greet
(
self
,
user
):
"""greet [user name]
Simple user greeting. When used in combination with a parameter, will
respond with personalized greeting. Yay."""
if
user
:
print
"hello "
,
user
else
:
print
"hi there!"
def
do_stats
(
self
,
path
):
if
not
path
:
path
=
"."
tools
.
stats
(
path
)
table_hdr
=
"{0:^25}{sep}{1}{sep}{2}{sep}{3}{sep}{4}{sep}{5}{sep}{6}{sep}"
.
format
(
"Folder name"
,
"size"
,
"#files"
,
"#vid"
,
"#img"
,
"#snd"
,
"format"
,
sep
=
"|"
)
print
table_hdr
def
do_EOF
(
self
,
line
):
"Exit"
return
True
def
postloop
(
self
):
print
"Done."
if
__name__
==
"__main__"
:
if
len
(
sys
.
argv
)
>
1
:
DataMan
().
onecmd
(
' '
.
join
(
sys
.
argv
[
1
:]))
else
:
DataMan
().
cmdloop
()
dataman/folder_stats.py
0 → 100755
View file @
f5541bbb
#!/usr/bin/env python
from
__future__
import
print_function
import
tools
import
os
from
termcolor
import
colored
EXT_VIDEO
=
[
'.avi'
,
'.mp4'
,
'.mkv'
,
'.wmv'
]
EXT_SOUND
=
[
'.wav'
,
'.mp3'
,
'.snd'
,
'.wma'
]
EXT_IMAGE
=
[
'.png'
,
'.bmp'
,
'.jpg'
,
'.jpeg'
,
'.pgm'
]
EXT_DOC
=
[
'.md'
,
'.toml'
,
'.xml'
,
'.tsv'
,
'.csv'
,
'.txt'
,
'.doc'
,
'.rst'
]
table_hdr
=
"{0:^28}{sep}{1:^6}{sep}{2:>3}{sep}{3:>3}{sep}{4:>3}{sep}{5:>3}{sep}{6:^10}{sep}"
.
format
(
"Folder name"
,
"size"
,
"#fil"
,
"#vid"
,
"#img"
,
"#snd"
,
"format"
,
sep
=
"|"
)
_row
=
"{0:<28}{1}{2:>4}{3:>4}{4:>4}{5:>4}{6:>10}"
def
check_format
(
*
targets
):
"""Check if directory or list of files contains a dataset of known format (OE, Kwik, etc.)"""
if
len
(
targets
)
==
1
and
os
.
path
.
isdir
(
targets
[
0
]):
root
,
dirs
,
files
=
next
(
os
.
walk
(
targets
[
0
]))
else
:
# for t in targets:
# TODO assert(os.path.exists(t))
files
=
targets
for
f
in
files
:
if
fext
(
f
)
in
[
'.continuous'
]:
return
"OpenEphys"
elif
fext
(
f
)
in
[
'.kwx'
,
'.kwd'
,
'.kwik'
]:
return
"Kwik"
else
:
return
None
def
fext
(
fname
):
return
os
.
path
.
splitext
(
fname
)[
1
]
def
dir_details
(
path
):
name
=
path
size
=
tools
.
dir_size
(
path
)
root
,
dirs
,
files
=
next
(
os
.
walk
(
path
))
num_files
=
len
(
files
)
num_vid
=
len
([
f
for
f
in
files
if
fext
(
f
)
in
EXT_VIDEO
])
num_img
=
len
([
f
for
f
in
files
if
fext
(
f
)
in
EXT_SOUND
])
num_snd
=
len
([
f
for
f
in
files
if
fext
(
f
)
in
EXT_IMAGE
])
num_doc
=
len
([
f
for
f
in
files
if
fext
(
f
)
in
EXT_DOC
])
data_fmt
=
check_format
(
*
files
)
return
dict
(
fname
=
name
,
size
=
size
,
num_files
=
num_files
,
num_vid
=
num_vid
,
num_img
=
num_img
,
num_snd
=
num_snd
,
num_doc
=
num_doc
,
data_fmt
=
data_fmt
)
def
gather
(
path
):
root
,
dirs
,
files
=
next
(
os
.
walk
(
path
))
details
=
[
dir_details
(
root
)]
if
check_format
(
root
):
return
details
else
:
for
d
in
dirs
:
details
.
append
(
dir_details
(
d
))
return
details
def
prettify
(
element
,
color
=
None
,
align
=
'>'
,
width
=
0
,
sepl
=
''
,
sepr
=
''
):
text
=
"{:{align}{width}}"
.
format
(
element
,
align
=
align
,
width
=
width
)
if
color
:
return
sepl
+
colored
(
text
,
color
)
+
sepr
else
:
return
sepl
+
text
+
sepr
def
fit_str
(
string
,
max_len
=
10
,
weight
=
0.7
):
if
len
(
string
)
<
max_len
or
max_len
<
4
:
return
string
indicator
=
'[..]'
head
=
int
((
max_len
-
len
(
indicator
))
*
(
1
-
weight
))
tail
=
int
((
max_len
-
len
(
indicator
))
*
weight
)
return
string
[:
head
]
+
indicator
+
string
[
-
tail
:]
def
mk_row
(
row
,
colorized
=
True
,
cols
=
[
'fname'
,
'size'
,
'num_files'
,
'num_vid'
,
'num_img'
,
'num_snd'
,
'data_fmt'
],
sepr
=
'|'
):
row_str
=
''
for
c
in
cols
:
if
c
==
'fname'
:
row_str
+=
prettify
(
fit_str
(
row
[
c
],
28
),
sepr
=
sepr
,
align
=
'<'
,
width
=
'28'
)
elif
c
==
'size'
:
row_str
+=
prettify
(
tools
.
fmt_size
(
row
[
c
],
unit
=
''
,
sep
=
''
,
col
=
True
,
pad
=
7
),
sepr
=
sepr
,
align
=
'>'
,
width
=
''
)
elif
c
==
'num_files'
:
row_str
+=
prettify
(
row
[
c
],
color
=
'red'
if
row
[
c
]
==
0
and
colored
else
None
,
sepr
=
sepr
,
align
=
'>'
,
width
=
4
)
elif
c
==
'num_vid'
:
row_str
+=
prettify
(
row
[
c
],
color
=
'green'
if
row
[
c
]
>
0
and
colored
else
None
,
sepr
=
sepr
,
align
=
'>'
,
width
=
4
)
elif
c
==
'num_img'
:
row_str
+=
prettify
(
row
[
c
],
color
=
'green'
if
row
[
c
]
>
0
and
colored
else
None
,
sepr
=
sepr
,
align
=
'>'
,
width
=
4
)
elif
c
==
'num_snd'
:
row_str
+=
prettify
(
row
[
c
],
color
=
'green'
if
row
[
c
]
>
0
and
colored
else
None
,
sepr
=
sepr
,
align
=
'>'
,
width
=
4
)
elif
c
==
'data_fmt'
:
if
row
[
c
]
==
'OpenEphys'
:
color
=
'yellow'
elif
row
[
c
]
==
'Kwik'
:
color
=
'green'
else
:
color
=
'red'
row_str
+=
prettify
(
row
[
c
],
color
=
color
if
colored
else
None
,
sepr
=
sepr
,
align
=
'>'
,
width
=
10
)
else
:
row_str
+=
prettify
(
row
[
c
])
return
row_str
if
__name__
==
"__main__"
:
color
=
True
print
(
table_hdr
)
for
row
in
gather
(
"."
)[:
-
9
]:
print
(
mk_row
(
row
))
dataman/tools.py
0 → 100755
View file @
f5541bbb
#!/usr/bin/env python
import
os
from
os.path
import
join
,
getsize
from
termcolor
import
colored
def
fmt_size
(
num
,
unit
=
'B'
,
si
=
True
,
sep
=
' '
,
col
=
False
,
pad
=
0
):
colors
=
{
"k"
:
"blue"
,
"M"
:
"green"
,
"G"
:
"red"
,
"T"
:
"cyan"
,
"Ki"
:
"blue"
,
"Mi"
:
"green"
,
"Gi"
:
"red"
,
"Ti"
:
"cyan"
}
if
si
:
prefixes
=
[
''
,
'k'
,
'M'
,
'G'
,
'T'
,
'P'
,
'E'
]
else
:
prefixes
=
[
''
,
'Ki'
,
'Mi'
,
'Gi'
,
'Ti'
,
'Pi'
,
'Ei'
]
divisor
=
1000
if
si
else
1024
for
prefix
in
prefixes
:
if
abs
(
num
)
<
divisor
:
if
col
:
prefix
=
colored
(
prefix
,
colors
[
prefix
])
if
prefix
else
' '
return
"{:5.1f}{}{}{}"
.
format
(
num
,
sep
,
prefix
,
unit
,
pad
=
pad
-
6
)
num
/=
divisor
def
directory_content
(
path
):
return
next
(
os
.
walk
(
path
))
def
dir_size
(
path
):
total_size
=
0
for
root
,
dirs
,
files
in
os
.
walk
(
path
):
for
f
in
files
:
fp
=
os
.
path
.
join
(
root
,
f
)
total_size
+=
os
.
path
.
getsize
(
fp
)
return
total_size
def
stats
(
path
):
print
"Got path:"
,
path
root
,
dirs
,
files
=
directory_content
(
path
)
print
root
,
"consumes"
,
print
format_filesize
(
sum
(
getsize
(
join
(
root
,
name
))
for
name
in
files
)),
print
"in"
,
len
(
files
),
"non-directory files"
print
"Directories:
\n
"
for
d
in
dirs
:
print
d
,
fmt_size
(
dir_size
(
d
))
print
"Files:
\n
"
,
files
if
__name__
==
"__main__"
:
stats
(
'.'
)
print
fmt_size
(
dir_size
(
"."
))
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment