Commit 43f3e7ae authored by Gijs van Cuyck's avatar Gijs van Cuyck

created methods for creating a test suite using the Wp method and the

separating family as state identifiers. No testing has been done as of
yet.
parent 6a291bcc
......@@ -4,6 +4,6 @@ project(complete_ads)
set(CMAKE_CXX_STANDARD 17)
#set(CMAKE_CXX_FLAGS "-static-libgcc -static-libstdc++")
#set(CMAKE_CXX_FLAGS "-static-libgcc")
set(SOURCE_FILES main.cpp src/main_test.cpp)
set(SOURCE_FILES main.cpp main_test.cpp)
add_subdirectory("lib")
add_subdirectory("src")
\ No newline at end of file
No preview for this file type
......@@ -9,7 +9,7 @@
readable_splitting_tree translate_splitting_tree(splitting_tree & tree, std::vector<std::string> & inputs, std::vector<std::string> & states) {
readable_splitting_tree translate_splitting_tree(const splitting_tree & tree, const std::vector<std::string> & inputs, const std::vector<std::string> & states) {
readable_splitting_tree translated_tree(tree.states.size(),tree.depth);
transform(tree.states.begin(),tree.states.end(),translated_tree.states.begin(),[&states](state s){return states[s];});
for (int i = 0; i<tree.separators.size(); i++)
......
......@@ -26,7 +26,7 @@ struct readable_splitting_tree {
};
//uses the translations described in inputs and outputs to translate a splitting tree into a readable splitting tree.
readable_splitting_tree translate_splitting_tree(splitting_tree & tree, std::vector<std::string> & inputs, std::vector<std::string> & states);
readable_splitting_tree translate_splitting_tree(const splitting_tree & tree, const std::vector<std::string> & inputs, const std::vector<std::string> & states);
//outputs a readable splitting tree into an ostream. used for turning it into a string or saving it to a file.
std::ostream& rst_to_stream(readable_splitting_tree & tree, std::ostream & output);
......
//
// Created by Gijs van Cuyck on 20/11/2018.
//
#include "test_suite.hpp"
#include "trie.hpp"
#include "vector_printing.hpp"
#include <numeric>
#include <queue>
#include <cassert>
#include <iostream>
//creates a minimal state cover for the given mealy machine using breath first search
state_cover create_state_cover(const mealy &machine, const state starting_state)
{
std::vector<bool> added(machine.graph_size, false);
state_cover cover(machine.graph_size);
std::vector<input> all_inputs(machine.input_size);
iota(begin(all_inputs), end(all_inputs), input(0));
std::queue<state> work;
work.push(starting_state);
added[starting_state] = true;
while (!work.empty())
{
const state s = work.front();
work.pop();
for (input i : all_inputs)
{
const auto v = apply(machine, s, i).to;
if (added[v]) continue;
work.push(v);
added[v] = true;
cover[v] = cover[s];
cover[v].push_back(i);
}
}
assert(machine.graph_size == cover.size());
return cover;
}
//stores all the combinations of an element from prefixes concatinated with an element from suffixes in the output_buffer.
template<typename Fun>
void set_concatination(const std::vector<word> &prefixes, const std::vector<word> &suffixes, Fun &&output_buffer)
{
for (const word &x : prefixes)
{
for (const word &y : suffixes)
{
word ret;
ret.reserve(x.size() + y.size());
ret.insert(ret.begin(), x.begin(), x.end());
ret.insert(ret.end(), y.begin(), y.end());
output_buffer(ret);
}
}
}
test_suite create_test_suite(const separating_family &family, const mealy &m, const state starting_state)
{
//storing the intermediate results in a trie structure removes all sequences that are prefixes of a different sequence.
//this prevents unnecessary double testing.
trie<input> TS;
//lambda function that can be passed along to other functions that will buffer all its calls in the test_suite
auto TS_input = [&TS](const word &sequence)
{
TS.insert(sequence);
};
const state_cover s_cover = create_state_cover(m, starting_state);
//first part of Wp method: checking if all the states exist by going to every state, and then applying every state identifier in every state.
for (const separating_set &state_identifier : family)
{
set_concatination(s_cover, state_identifier, TS_input);
}
//second part of Wp method: checking if all the transitions exist by going to every state,
//then executing every possible input, and then using the state identifier for the state you end up in.
for (state s = 0; s < m.graph_size; s++)
{
for (input i = 0; i < m.input_size; i++)
{
state expected_output_state = apply(m, s, i).to;
for (const word &identifier:family[expected_output_state])
{
word buffer;
buffer.reserve(s_cover[s].size() + 1 + identifier.size());
buffer.insert(buffer.end(), s_cover[s].begin(), s_cover[s].end());
buffer.push_back(i);
buffer.insert(buffer.end(), identifier.begin(), identifier.end());
TS.insert(buffer);
}
}
}
return flatten(TS);
}
void test_suite_to_stream(std::ostream &output, const test_suite &TS, const std::vector<std::string> &inputs)
{
for (const word &test:TS)
{
std::vector<std::string> translated_word(test.size());
transform(test.begin(), test.end(), translated_word.begin(), [&inputs](size_t symbol_index)
{ return inputs[symbol_index]; });
output << join(translated_word.begin(), translated_word.end(), ",", "\n");
}
}
//
// Created by Gijs van Cuyck on 20/11/2018.
//
#pragma once
#include "types.hpp"
#include "separating_family.hpp"
#include <vector>
//some new names to make the code more readable.
using state_cover = std::vector<word>;
using test_suite = std::vector<word>;
//creates a test_suite using the Wp method, using family as state identifiers.
test_suite create_test_suite(const separating_family& family, const mealy & m,const state starting_state);
void test_suite_to_stream(std::ostream & output, const test_suite& TS, const std::vector<std::string> & inputs);
#pragma once
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
///
/// \brief A Trie datastructure used to remove prefixes in a set of words.
/// Insert-only. Iteration over the structure only uses longest matches.
///
/// Tests : 1M words, avg words length 4 (geometric dist.), alphabet 50 symbols
/// trie reduction 58% in 0.4s
/// set reduction 49% in 1.1s
///
/// I did not implement any iterators, as those are quite hard to get right.
/// There are, however, "internal iterators" exposed as a for_each() member
/// function (if only we had coroutines already...)
///
/// TODO: implement `bool member(...)`
///
template <typename T> struct trie {
/// \brief Inserts a word (given by iterators \p begin and \p end)
/// \returns true if the element was inserted, false if already there
template <typename Iterator> bool insert(Iterator && begin, Iterator && end) {
if (!node) {
node.reset(new trie_node());
if (begin == end) {
return true;
}
}
return node->insert(begin, end);
}
/// \brief Inserts a word given as range \p r
/// \returns true if the element was inserted, false if already there
template <typename Range> bool insert(Range const & r) { return insert(begin(r), end(r)); }
/// \brief Applies \p function to all word (not to the prefixes)
template <typename Fun> void for_each(Fun && function) const {
if (node) {
node->for_each(std::forward<Fun>(function));
} else {
// empty set, so we don't call the function
}
}
/// \brief Empties the complete set
void clear() { node.reset(nullptr); }
private:
struct trie_node;
std::unique_ptr<trie_node> node = nullptr;
// A node always contains the empty word
struct trie_node {
template <typename Iterator> bool insert(Iterator && begin, Iterator && end) {
if (begin == end) return false;
T i = *begin++;
auto it = find(i);
if (it != data.end() && it->first == i) {
return it->second.insert(begin, end);
}
// else, does not yet exist
it = data.emplace(it, i, trie_node());
it->second.insert(begin, end);
return true;
}
template <typename Fun> void for_each(Fun && function) const {
std::vector<T> word;
return for_each_impl(std::forward<Fun>(function), word);
}
private:
template <typename Fun> void for_each_impl(Fun && function, std::vector<T> & word) const {
if (data.empty()) {
// we don't want function to modify word
const auto & cword = word;
function(cword);
}
for (auto const & kv : data) {
// for each letter, we extend the word, recurse and remove extension.
word.push_back(kv.first);
kv.second.for_each_impl(function, word);
word.resize(word.size() - 1);
}
}
typename std::vector<std::pair<T, trie_node>>::iterator find(T const & key) {
return std::lower_bound(
data.begin(), data.end(), key,
[](std::pair<T, trie_node> const & kv, T const & k) { return kv.first < k; });
}
std::vector<std::pair<T, trie_node>> data;
};
};
/// \brief Flattens a trie \p t
/// \returns an array of words (without the prefixes)
template <typename T> std::vector<std::vector<T>> flatten(trie<T> const & t) {
std::vector<std::vector<T>> ret;
t.for_each([&ret](std::vector<T> const & w) { ret.push_back(w); });
return ret;
}
/// \brief Returns size and total sum of symbols
template <typename T> std::pair<size_t, size_t> total_size(trie<T> const & t) {
size_t count = 0;
size_t total_count = 0;
t.for_each([&count, &total_count](std::vector<T> const & w) {
++count;
total_count += w.size();
});
return {count, total_count};
}
......@@ -5,6 +5,7 @@
#include "readable_splitting_tree.hpp"
#include "vector_printing.hpp"
#include "separating_family.hpp"
#include "test_suite.hpp"
#include <algorithm>
......@@ -69,7 +70,7 @@ struct main_options
//"esm-manual-controller.dot";
//"ABP_Sender.flat_0_1.dot";
//"ex5_with_loops_with_hidden_states_minimized.dot";
string input_filename = input_directory + "no_semi-valid_transitions.dot";
string input_filename = input_directory + "ex5_with_loops_with_hidden_states_minimized.dot";
string output_filename = "";
};
......@@ -185,12 +186,10 @@ int main(int argc, char *argv[])
return read_mealy_from_dot(filename);
}();
//leftover old code. main purpose now is to check if machine if fully defined.
const auto &reachable_machine = reachable_submachine(machine_and_translation.first, 0U);
const auto &machine = machine_and_translation.first;
//todo: kijk of dit nodig is
//if(machine.graph_size!=reachable_machine.graph_size)
//throw runtime_error("machine is not fully reachable");
const auto &translation = machine_and_translation.second;
......@@ -211,8 +210,6 @@ int main(int argc, char *argv[])
cout << "creating splitting tree\n";
splitting_tree complete_splitting_tree = create_splitting_tree(machine);
......@@ -233,8 +230,13 @@ int main(int argc, char *argv[])
out_file << "the separating family:\n\n";
separating_family_to_stream(out_file, family, input_translation, state_translation);
out_file.close();
//todo: check hoe ik de starting state kan defineren.
test_suite TS = create_test_suite(family,machine,state(0));
out_file << "the test suite:\n\n";
test_suite_to_stream(out_file, TS, input_translation);
out_file.close();
cout << "finished!\n";
......
//
// Created by gijsc on 13-10-2018.
//
#include <trie.hpp>
#include <algorithm>
#include <chrono>
#include <iostream>
#include <numeric>
#include <random>
#include <set>
#include <stdexcept>
#include <vector>
using namespace std;
int main(int argc, char * argv[])
{
cout << "testing";
using word = vector<size_t>;
static void check(bool r) {
if (!r) throw runtime_error("error in trie");
}
static void test() {
word w1 = {1, 2, 3};
word w2 = {2, 3};
word w3 = {1, 2};
word w4 = {5, 5, 5};
word w5 = {5, 5, 3};
word w6 = {5, 5, 3, 1};
trie<unsigned> t;
check(t.insert(w1));
check(!t.insert(w1));
check(t.insert(w2));
check(!t.insert(w3));
check(t.insert(w4));
check(t.insert(w5));
check(t.insert(w6));
check(flatten(t).size() == 4);
t.for_each([](auto&& w) {
for (auto&& i : w) cout << i << ' ';
cout << '\n';
});
cout << endl;
}
static void performance() {
vector<word> corpus(1000000);
std::random_device rd;
std::mt19937 generator(rd());
uniform_int_distribution<int> unfair_coin(0, 3);
uniform_int_distribution<size_t> symbol(0, 50 - 1);
generate(begin(corpus),
end(corpus),
[&] {
word w;
while (unfair_coin(generator) || w.empty()) {
w.push_back(symbol(generator));
}
return w;
});
size_t size = corpus.size();
size_t total_size
= accumulate(begin(corpus),
end(corpus),
0ul,
[](auto l, auto&& r) { return l + r.size(); });
cout << size << " words\n";
cout << total_size << " symbols\n";
cout << total_size / double(size) << " average word length\n";
cout << endl;
using clock = std::chrono::high_resolution_clock;
using time = std::chrono::time_point<clock>;
using seconds = std::chrono::duration<double>;
auto t_start = clock::now();
trie<unsigned> t;
for (auto&& w : corpus) t.insert(w);
auto t_end = clock::now();
auto s_start = clock::now();
set<word> s;
for (auto&& w : corpus) s.insert(w);
auto s_end = clock::now();
size_t trie_size = flatten(t).size();
size_t set_size = s.size();
cout << trie_size << " words in the trie\n";
cout << trie_size / double(size) << " ratio\n";
cout << seconds(t_end - t_start).count() << " seconds\n";
cout << endl;
cout << set_size << " words in the set\n";
cout << set_size / double(size) << " ratio\n";
cout << seconds(s_end - s_start).count() << " seconds\n";
cout << endl;
}
int main() {
test();
performance();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment