Commit bdac441f authored by Paul Fiterau Brostean's avatar Paul Fiterau Brostean
Browse files

Added stdev/median calculation in benchmarks

parent 25eb76d3
......@@ -14,9 +14,11 @@ from model.ra import RegisterMachine
from sut import SUTType, ScalableSUTClass
from sut.fifoset import FIFOSetClass
from sut.login import LoginClass
from sut.stack import StackClass
from test import TestGenerator
from learn.algorithm import learn_mbt, Statistics
from test.rwalk import DFARWalkFromState, MealyRWalkFromState, RARWalkFromState, IORARWalkFromState
from statistics import stdev, median
class SutDesc(collections.namedtuple("SutDesc", 'sut_class type size')):
......@@ -24,10 +26,12 @@ class SutDesc(collections.namedtuple("SutDesc", 'sut_class type size')):
return str(self.type).replace("SUTType.","") + "_" + str(self.sut_class.__class__.__name__).replace("Class", "") + "(" + str(self.size) + ")"
TestDesc = collections.namedtuple("TestDesc", 'max_tests rand_length prop_reset')
class CollectedStats(collections.namedtuple("CollectedStats", "states registers learn_tests "
class ExperimentStats(collections.namedtuple("CollectedStats", "states registers learn_tests "
"learn_inputs total_tests learn_time")):
pass
class CollatedStats(collections.namedtuple("CollatedStats", "exp_succ states registers consistent avg_ltests std_ltests avg_inputs std_inputs avg_ltime std_ltime")):
pass
def get_learner_setup(sut_type:SUTType):
if sut_type is SUTType.DFA:
......@@ -61,7 +65,7 @@ class Benchmark:
return self
def _run_benchmark(self, sut_class:ScalableSUTClass, sut_type:SUTType, test_desc:TestDesc, tout:int) \
-> List[Tuple[SutDesc, CollectedStats]]:
-> List[Tuple[SutDesc, ExperimentStats]]:
results = []
size = 1
while True:
......@@ -82,32 +86,66 @@ class Benchmark:
size += 1
return results
def _collect_stats(self, model:Automaton, statistics:Statistics) -> CollectedStats:
def _collect_stats(self, model:Automaton, statistics:Statistics) -> ExperimentStats:
states = len(model.states())
registers = len(model.registers()) if isinstance(model, RegisterMachine) else None
learn_tests = statistics.num_learner_tests
learn_inputs = statistics.num_learner_inputs
total_tests = statistics.suite_size
learn_time = sum(statistics.learning_times)
return CollectedStats(states=states, registers=registers, learn_tests=learn_tests, learn_inputs=learn_inputs,
total_tests=total_tests, learn_time=learn_time)
return ExperimentStats(states=states, registers=registers, learn_tests=learn_tests, learn_inputs=learn_inputs,
total_tests=total_tests, learn_time=learn_time)
def run_benchmarks(self, test_desc:TestDesc, timeout:int) -> List[Tuple[SutDesc, CollectedStats]]:
def run_benchmarks(self, test_desc:TestDesc, timeout:int) -> List[Tuple[SutDesc, ExperimentStats]]:
results = []
for sut_class, sut_type in self.suts:
res = self._run_benchmark(sut_class, sut_type, test_desc, timeout)
results.extend(res)
return results
def collate_stats(sut_desc: SutDesc, exp_stats:List[ExperimentStats]):
if exp_stats is None:
return None
else:
def print_results(results : List[Tuple[SutDesc, CollectedStats]]):
states = [e.states for e in exp_stats]
avg_states = median(states)
regs = [e.registers for e in exp_stats]
if sut_desc.type.has_registers():
avg_reg = median(regs)
else:
avg_reg = None
consistent = len(set(states)) == 1 and \
(not sut_desc.type.has_registers() or len(set(regs)) == 1)
exp_succ = len(exp_stats)
ltests = [e.learn_tests for e in exp_stats]
linputs = [e.learn_inputs for e in exp_stats]
ltime = [e.learn_time for e in exp_stats]
return CollatedStats(
exp_succ=exp_succ,
states=avg_states,
registers=avg_reg,
consistent=consistent,
avg_ltests=median(ltests),
std_ltests=0 if len(ltests) == 1 else stdev(ltests),
avg_inputs=median(linputs),
std_inputs=0 if len(linputs) == 1 else stdev(linputs),
avg_ltime=median(ltime),
std_ltime=0 if len(ltime) == 1 else stdev(ltime),
)
#"exp_succ states registers consistent "
# "avg_ltests std_ltests avg_inputs std_inputs "
# "avg_ltime std_ltime"
def print_results(results : List[Tuple[SutDesc, ExperimentStats]]):
if len(results) == 0:
print ("No statistics to report on")
else:
for sut_desc,stats in results:
print(sut_desc, " ", stats)
b = Benchmark()
# adding learning setups for each type of machine
......@@ -118,20 +156,39 @@ b = Benchmark()
#b.add_setup(SUTType.IORA, RALearner(IORAEncoder()), IORARWalkFromState)
# add the sut classes we want to benchmark
#b.add_sut(LoginClass(), SUTType.DFA)
#b.add_sut(LoginClass())
b.add_sut(FIFOSetClass())
b.add_sut(LoginClass())
b.add_sut(StackClass())
# create a test description
t_desc = TestDesc(max_tests=10000, prop_reset=0.1, rand_length=5)
t_desc = TestDesc(max_tests=10000, prop_reset=0.2, rand_length=4)
# give an smt timeout value (in ms)
timeout = 100
timeout = 10000
# how many times each experiment should be run
num_exp = 4
# run the benchmark and collect results
results = b.run_benchmarks(t_desc, timeout)
results = []
for i in range(0, num_exp):
results += b.run_benchmarks(t_desc, timeout)
# sort according to the sut_desc (the first element)
results.sort()
# print results
print_results(results)
sut_dict = dict()
for sut_desc,exp in results:
if sut_desc not in sut_dict:
sut_dict[sut_desc] = list()
sut_dict[sut_desc].append(exp)
collated_stats = [(sut_desc, collate_stats(sut_desc, experiments)) for sut_desc, experiments in sut_dict.items()]
for (sut_desc, c_stat) in collated_stats:
print(sut_desc, " ", c_stat)
......@@ -106,6 +106,9 @@ class SUTType(Enum):
def is_acceptor(self):
return self == SUTType.RA or self.DFA
def has_registers(self):
return self == SUTType.RA or self == SUTType.IORA
def is_transducer(self):
return not self.is_acceptor()
......
......@@ -28,7 +28,7 @@ class Stack():
class StackClass(ScalableSUTClass):
def __init__(self):
super({
super().__init__({
SUTType.IORA: Stack,
SUTType.RA: Stack,
})
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment