Commit f5541bbb authored by Ronny Eichler's avatar Ronny Eichler
Browse files

Basic directory statistics

parent e295406e
#!/usr/bin/env python
# ? is short for builtin help
# ! allows shelling out
import sys
import cmd
import tools
class DataMan(cmd.Cmd):
"""Command line tool for quick data documentation."""
prompt = "dm> "
intro = "Data Manager\n --Ronny's way of avoiding having to stare at spreadsheets."
def preloop(self):
# process command line arguments etc.
def do_greet(self, user):
"""greet [user name]
Simple user greeting. When used in combination with a parameter, will
respond with personalized greeting. Yay."""
if user:
print "hello ", user
print "hi there!"
def do_stats(self, path):
if not path:
path = "."
table_hdr = "{0:^25}{sep}{1}{sep}{2}{sep}{3}{sep}{4}{sep}{5}{sep}{6}{sep}".format(
"Folder name", "size", "#files", "#vid", "#img", "#snd", "format", sep="|")
print table_hdr
def do_EOF(self, line):
return True
def postloop(self):
print "Done."
if __name__ == "__main__":
if len(sys.argv) > 1:
DataMan().onecmd(' '.join(sys.argv[1:]))
#!/usr/bin/env python
from __future__ import print_function
import tools
import os
from termcolor import colored
EXT_VIDEO = ['.avi', '.mp4', '.mkv', '.wmv']
EXT_SOUND = ['.wav', '.mp3', '.snd', '.wma']
EXT_IMAGE = ['.png', '.bmp', '.jpg', '.jpeg', '.pgm']
EXT_DOC = ['.md', '.toml', '.xml', '.tsv', '.csv', '.txt', '.doc', '.rst']
table_hdr = "{0:^28}{sep}{1:^6}{sep}{2:>3}{sep}{3:>3}{sep}{4:>3}{sep}{5:>3}{sep}{6:^10}{sep}".format(
"Folder name", "size", "#fil", "#vid", "#img", "#snd", "format", sep="|")
_row = "{0:<28}{1}{2:>4}{3:>4}{4:>4}{5:>4}{6:>10}"
def check_format(*targets):
"""Check if directory or list of files contains a dataset of known format (OE, Kwik, etc.)"""
if len(targets) == 1 and os.path.isdir(targets[0]):
root, dirs, files = next(os.walk(targets[0]))
# for t in targets:
# TODO assert(os.path.exists(t))
files = targets
for f in files:
if fext(f) in ['.continuous']:
return "OpenEphys"
elif fext(f) in ['.kwx', '.kwd', '.kwik']:
return "Kwik"
return None
def fext(fname):
return os.path.splitext(fname)[1]
def dir_details(path):
name = path
size = tools.dir_size(path)
root, dirs, files = next(os.walk(path))
num_files = len(files)
num_vid = len([f for f in files if fext(f) in EXT_VIDEO])
num_img= len([f for f in files if fext(f) in EXT_SOUND])
num_snd = len([f for f in files if fext(f) in EXT_IMAGE])
num_doc = len([f for f in files if fext(f) in EXT_DOC])
data_fmt = check_format(*files)
return dict(fname=name,
def gather(path):
root, dirs, files = next(os.walk(path))
details = [dir_details(root)]
if check_format(root):
return details
for d in dirs:
return details
def prettify(element, color=None, align='>', width=0, sepl='', sepr=''):
text = "{:{align}{width}}".format(element, align=align, width=width)
if color:
return sepl + colored(text, color) + sepr
return sepl+text+sepr
def fit_str(string, max_len=10, weight=0.7):
if len(string) < max_len or max_len < 4:
return string
indicator = '[..]'
head = int((max_len-len(indicator))*(1-weight))
tail = int((max_len-len(indicator))*weight)
return string[:head]+indicator+string[-tail:]
def mk_row(row, colorized=True, cols=['fname', 'size', 'num_files',
'num_vid', 'num_img', 'num_snd',
'data_fmt'], sepr='|'):
row_str = ''
for c in cols:
if c == 'fname':
row_str += prettify(fit_str(row[c], 28), sepr=sepr, align='<', width='28')
elif c == 'size':
row_str += prettify(tools.fmt_size(row[c], unit='', sep='', col=True, pad=7),
sepr=sepr, align='>', width='')
elif c == 'num_files':
row_str += prettify(row[c],
color='red' if row[c]==0 and colored else None,
sepr=sepr, align='>', width=4)
elif c == 'num_vid':
row_str += prettify(row[c],
color='green' if row[c]>0 and colored else None,
sepr=sepr, align='>', width=4)
elif c == 'num_img':
row_str += prettify(row[c],
color='green' if row[c]>0 and colored else None,
sepr=sepr, align='>', width=4)
elif c == 'num_snd':
row_str += prettify(row[c],
color='green' if row[c]>0 and colored else None,
sepr=sepr, align='>', width=4)
elif c == 'data_fmt':
if row[c] == 'OpenEphys':
color = 'yellow'
elif row[c] == 'Kwik':
color = 'green'
color = 'red'
row_str += prettify(row[c],
color=color if colored else None,
sepr=sepr, align='>', width=10)
row_str += prettify(row[c])
return row_str
if __name__ == "__main__":
color = True
for row in gather(".")[:-9]:
#!/usr/bin/env python
import os
from os.path import join, getsize
from termcolor import colored
def fmt_size(num, unit='B', si=True, sep=' ', col=False, pad=0):
colors = {"k": "blue", "M": "green", "G": "red", "T": "cyan",
"Ki": "blue", "Mi": "green", "Gi": "red", "Ti": "cyan"}
if si:
prefixes = ['', 'k', 'M', 'G', 'T', 'P', 'E']
prefixes = ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei']
divisor = 1000 if si else 1024
for prefix in prefixes:
if abs(num) < divisor:
if col:
prefix = colored(prefix, colors[prefix]) if prefix else ' '
return "{:5.1f}{}{}{}".format(num, sep, prefix, unit, pad=pad-6)
num /= divisor
def directory_content(path):
return next(os.walk(path))
def dir_size(path):
total_size = 0
for root, dirs, files in os.walk(path):
for f in files:
fp = os.path.join(root, f)
total_size += os.path.getsize(fp)
return total_size
def stats(path):
print "Got path:", path
root, dirs, files = directory_content(path)
print root, "consumes",
print format_filesize(sum(getsize(join(root, name)) for name in files)),
print "in", len(files), "non-directory files"
print "Directories:\n"
for d in dirs:
print d, fmt_size(dir_size(d))
print "Files:\n", files
if __name__ == "__main__":
print fmt_size(dir_size("."))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment