diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..248aedc483a85c032a5aacb5fd62e06577c9266f --- /dev/null +++ b/.clang-format @@ -0,0 +1,11 @@ +BasedOnStyle: LLVM + +AllowShortFunctionsOnASingleLine: true +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true + +ColumnLimit: 100 +IndentWidth: 4 +TabWidth: 4 +PointerAlignment: Middle +UseTab: ForIndentation diff --git a/.gitmodules b/.gitmodules index b953c7cb3fd83d218301edfabb913106d0cda528..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "contrib/docopt.cpp"] - path = contrib/docopt.cpp - url = https://github.com/Jaxan/docopt.cpp.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 0827d7613ac7f1e4376e43f34c7aa44ffe01d00e..c5a4c6441f1ceb9ed8c27bee3d305e7da4037e0f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,12 +3,12 @@ cmake_minimum_required(VERSION 2.8) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y") +find_package (Threads) + find_package(Boost REQUIRED COMPONENTS) include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) -include_directories(SYSTEM "${PROJECT_SOURCE_DIR}/contrib/docopt.cpp") -set(libs ${libs} ${Boost_LIBRARIES} docopt_s) +set(libs ${libs} ${Boost_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) -add_subdirectory("contrib/docopt.cpp") add_subdirectory("lib") add_subdirectory("src") diff --git a/README.md b/README.md index 942d3c5bc7f10b3f8314e43e8cdc242b035f663b..1b2a79ff5d70b76ed6616e205ab1c4de5fde5260 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,53 @@ -Yannakakis -========== - -An algorithm to construct an adaptive distinguishing sequence for a mealy -machine. If it does not exist, a partial sequence will be generated, which is -still useful for generating a seperating set (in the sense of Lee and -Yannakakis). The partial leaves will be augmented via ordinary seperating -sequences. In effect, the resulting test method is an instantiation of the HSI- -method, which tends towards the DS-method. +Hybrid adaptive distinguishing sequences +======================================== + +In FSM-based test generation, a key feature of smart tests are efficient state +identifiers. This tool generates a test suite based on the adaptive +distinguishing sequences as described by Lee and Yannakakis (1994). Many Mealy +machines do not admit such sequences, but luckily the sequences can be extended +in order to obtain a complete test suite. + +*NOTE*: This repository was originally located at +[here](https://gitlab.science.ru.nl/moerman/Yannakakis). +But I realised that installing the software was not as easy as it should be, +and so I cleaned up dependencies, while keeping the original repository fixed. +(Also some people might still use the old link.) + + +## Introduction + +This tool will generate a complete test suite for a given specification. The +specification should be given as a completely-specified, deterministic Mealy +machine (or finite state machine, FSM). Also the implementation is assumed to +be complete and deterministic. If the implementation passes the test suite, +then it is guaranteed to be equivalent or to have at least k more states. +The parameter k can be chosen. The size of the test suite is polynomial in the +number of states of the specification, but exponential in k. + +There are many ways to generate such a complete test suite. Many variations, +W, Wp, HSI, ADS, UIOv, ..., exist in literature. Implemented here (as of +writing) are HSI, ADS and the hybrid-ADS method. Since the ADS method is not +valid for all Mealy machines, this tool extends the method to be complete, +hence the name "hybrid-ADS". This is a new method (although very similar to the +HSI and ADS methods). + +In addition to choosing the state identifier, one can also choose how the +prefixes for the tests are generated. Typically, one will use shortest paths, +but longer ones can be used too. + +All algorithms implemented here can be randomised, which can greatly reduce +the size of the test suite. Most of the algorithms are found in the directory `lib/` and their usage is best -illustrated in `src/main.cpp` or `src/methods.cpp`. - -Currently states and inputs are encoded internally as integer values (because -this enables fast indexing). Only for I/O, maps are used to translate between -integers and strings. To reduce the memory footprint `uint16_t`s are used, as -the range is big enough for our use cases (`uint8_t` is clearly too small for -the number of states, but could be used for alphabets). +illustrated in `src/main.cpp`. The latter can be used as a stand-alone tool. +The input to the executable are `.dot` files (of a specific type). Please look +at the provided example to get started. ## Building -There are two dependencies: docopt.cpp (for handling program options) and boost -(for an optional type and string manipulations). The first dependency is a git -submodule and can be obtained with: - -``` -git submodule update --init -``` - -Assuming boost is installed on your system, we can build the tool with cmake: +Currently there is still one dependency: Boost. Assuming boost is installed on +your system, we can build the tool with cmake: ``` mkdir build @@ -37,26 +56,9 @@ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. make ``` -Note that you'll need c++14, but clang in Mac -OSX will understand that (and if not, you'll have to update Xcode). The main -sourcefile (`src/main.cpp`) can also be built with c++11 (this is tested on some -commits on both Windows and linux). - - -### Notes for linux - -There seems to be a problem with docopt.cpp with gcc-4.9 as well... (Everything -compiles, but the program options are not parsed well.) If you want to build -with `clang` on linux, you should also use `libc++`. Try the following: - -``` -sudo apt-get install libc++-dev -mkdir build -cd build -CXX=clang++ CC=clang CXXFLAGS=-stdlib=libc++ LDFLAGS=-pthread cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -make +I hope most of the code is portable c++11. But I may have used some c++14 +features. (If this is a problem for you, please let me know.) -``` ## Java @@ -65,6 +67,29 @@ tool, is included here (can be out-dated). But it should earn its own repo at some point. Also, my javanese is a bit rusty... +## Implementation details + +Currently states and inputs are encoded internally as integer values (because +this enables fast indexing). Only for I/O, maps are used to translate between +integers and strings. To reduce the memory footprint `uint16_t`s are used, as +the range is big enough for our use cases (`uint8_t` is clearly too small for +the number of states, but could be used for alphabets). + +A prefix tree (or trie) is used to reduce the test suite, by removing common +prefixes. However, this can quickly grow in size. Be warned! + + +## TODO + +* Implement a proper radix tree (or Patricia tree) to reduce memory usage. +* Remove the dependency on boost. +* Implement the SPY method for finding smarter prefixes. +* Compute independent structures in parallel (this was done in the first + version of the tool). +* Implement the O(n log n) algorithm to find state identifiers, instead of the + current (roughly) O(n^2) algorithm. + + ## License See `LICENSE` diff --git a/contrib/docopt.cpp b/contrib/docopt.cpp deleted file mode 160000 index 6c0c8e756487b4ac486306b906e9701e9d256bed..0000000000000000000000000000000000000000 --- a/contrib/docopt.cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 6c0c8e756487b4ac486306b906e9701e9d256bed diff --git a/examples/lee_yannakakis_distinguishable.dot b/examples/lee_yannakakis_distinguishable.dot new file mode 100644 index 0000000000000000000000000000000000000000..3ccae49cab8dbe20035546b054088e7898ed460c --- /dev/null +++ b/examples/lee_yannakakis_distinguishable.dot @@ -0,0 +1,15 @@ +digraph distinguishable { + s1 -> s2 [label="a / 0"]; + s2 -> s3 [label="a / 1"]; + s3 -> s4 [label="a / 0"]; + s4 -> s5 [label="a / 1"]; + s5 -> s6 [label="a / 0"]; + s6 -> s1 [label="a / 1"]; + + s1 -> s1 [label="b / 0"]; + s2 -> s3 [label="b / 0"]; + s3 -> s4 [label="b / 0"]; + s4 -> s5 [label="b / 0"]; + s5 -> s6 [label="b / 0"]; + s6 -> s1 [label="b / 0"]; +} diff --git a/lib/test_suite.cpp b/lib/test_suite.cpp index b1cc87d063f3081ac9007a3808666f1375bdccea..148901549b4c639fca750c5684a6ee440552731e 100644 --- a/lib/test_suite.cpp +++ b/lib/test_suite.cpp @@ -132,13 +132,13 @@ void randomized_test_suffix(const mealy & specification, const transfer_sequence } } -writer default_writer(std::vector<std::string> const & inputs) { +writer default_writer(std::vector<std::string> const & inputs, std::ostream & os) { static const auto print_word = [&](word w) { - for (auto && x : w) cout << inputs[x] << ' '; + for (auto && x : w) os << inputs[x] << ' '; }; static const auto reset = [&] { - cout << endl; - return bool(cout); + os << endl; + return bool(os); }; return {print_word, reset}; } diff --git a/lib/test_suite.hpp b/lib/test_suite.hpp index 768d4c63f96030341c9bc6b582e7300d1ca781c4..23ddda03b2a1177a5479cfaaf6004bf1b1cd0304 100644 --- a/lib/test_suite.hpp +++ b/lib/test_suite.hpp @@ -26,4 +26,4 @@ void randomized_test_suffix(mealy const & specification, transfer_sequences cons size_t rnd_length, writer const & output, uint_fast32_t random_seed); /// \brief returns a writer which simply writes everything to cout (via inputs) -writer default_writer(const std::vector<std::string> & inputs); +writer default_writer(const std::vector<std::string> & inputs, std::ostream & os); diff --git a/src/complete.cpp b/src/complete.cpp deleted file mode 100644 index b483c2775b1b5a91b95311e50fe1ae8d3ae90590..0000000000000000000000000000000000000000 --- a/src/complete.cpp +++ /dev/null @@ -1,100 +0,0 @@ -#include <adaptive_distinguishing_sequence.hpp> -#include <read_mealy.hpp> -#include <separating_family.hpp> -#include <splitting_tree.hpp> -#include <test_suite.hpp> -#include <transfer_sequences.hpp> -#include <trie.hpp> - -#include <docopt.h> - -#include <future> -#include <iostream> -#include <random> -#include <string> - -using namespace std; - -static const char USAGE[] = - R"(FSM-completer (only dot), also renames the state names - - Usage: - methods [options] <file> - - Options: - -h, --help Show current help - --version Show version - --sink Completion with sink - --loop Completion with self loops (default) - --output <out> Output for new transitions (leave empty for fresh output) -)"; - -void write_mealy_to_dot(const mealy & m, const translation & translation, std::ostream & out) { - const auto inputs = create_reverse_map(translation.input_indices); - const auto output = create_reverse_map(translation.output_indices); - - out << "digraph {\n"; - - for (state s = 0; s < m.graph_size; ++s) { - for (input i = 0; i < m.input_size; ++i) { - if (!defined(m, s, i)) continue; - const auto ret = apply(m, s, i); - out << s << " -> " << ret.to << " [label=\"" << inputs[i] << " / " << output[ret.out] - << "\"]\n"; - } - } - - out << "}\n"; -} - -int main(int argc, char * argv[]) { - const auto args = docopt::docopt(USAGE, {argv + 1, argv + argc}, true, __DATE__ __TIME__); - - const string filename = args.at("<file>").asString(); - - auto mt = read_mealy_from_dot(filename, false); - auto & machine = mt.first; - auto & translation = mt.second; - - if (!is_complete(machine)) { - const auto out = [&]() -> output { - if (args.at("--output")) { - const string out_str = args.at("--output").asString(); - if (translation.output_indices.count(out_str)) { - // reuse old output - return translation.output_indices[out_str]; - } - } - // else: grab a new one - string newo = "SILENT"; - while(translation.output_indices.count(newo)) newo += '0'; - return translation.output_indices[newo] = machine.output_size++; - }(); - - - if (args.at("--sink").asBool()) { - // add sink - const auto new_state = machine.graph_size++; - machine.graph.resize(machine.graph_size); - - for (state s = 0; s < machine.graph_size; ++s) { - machine.graph[s].resize(machine.input_size); - for (input i = 0; i < machine.input_size; ++i) { - if (defined(machine, s, i)) continue; - machine.graph[s][i] = mealy::edge(new_state, out); - } - } - } else { - // add self loops - for (state s = 0; s < machine.graph_size; ++s) { - machine.graph[s].resize(machine.input_size); - for (input i = 0; i < machine.input_size; ++i) { - if (defined(machine, s, i)) continue; - machine.graph[s][i] = mealy::edge(s, out); - } - } - } - } - - write_mealy_to_dot(machine, translation, cout); -} diff --git a/src/distance.cpp b/src/distance.cpp deleted file mode 100644 index 1faccf391abda1d9a1ba56ffed9b2045f7e93892..0000000000000000000000000000000000000000 --- a/src/distance.cpp +++ /dev/null @@ -1,109 +0,0 @@ -#include <mealy.hpp> -#include <read_mealy.hpp> - -#include <iostream> -#include <fstream> -#include <sstream> -#include <queue> -#include <set> - -using namespace std; - -int main(int argc, char * argv[]) { - if (argc != 4) { - cerr << "usage: distance <file1> <file2> <log>" << endl; - return 1; - } - - translation t; - const auto m1 = read_mealy_from_dot(argv[1], t); - const auto m2 = read_mealy_from_dot(argv[2], t); - - if (m1.input_size != m2.input_size) throw runtime_error("different alphabets!"); - - const auto log = [&] { - vector<word> log_; - string line; - ifstream log_file(argv[3]); - while (std::getline(log_file, line)) { - log_.emplace_back(); - word & w = log_.back(); - stringstream ss(line); - string symbol; - while (ss >> symbol) { - w.push_back(t.input_indices.at(symbol)); - } - } - return log_; - }(); - - // can be vector of bool, but meh - set<pair<state, state>> visited; - queue<pair<state, state>> work; - - size_t current_length_work = 0; - size_t next_length_work = 0; - size_t current_length = 0; - size_t current_counterexamples = 0; - - const auto push = [&](auto s1, auto s2) { - next_length_work++; - work.push({s1, s2}); - }; - - const auto pop = [&]() { - const auto p = work.front(); - work.pop(); - current_length_work--; - return p; - }; - - const auto check_length = [&]() { - if (current_length_work == 0) { - if (current_counterexamples != 0) { - cout << "(-" << current_length << ", " << current_counterexamples << ")" << endl; - exit(0); - } - - current_length_work = next_length_work; - current_length++; - } - }; - - push(0, 0); - for(auto const & l : log){ - state s1 = 0; - state s2 = 0; - for(auto const & i : l){ - s1 = apply(m1, s1, i).to; - s2 = apply(m2, s2, i).to; - push(s1, s2); - } - } - - while (!work.empty()) { - check_length(); - - const auto p = pop(); - const auto s1 = p.first; - const auto s2 = p.second; - - if (visited.count(p)) continue; - visited.insert(p); - - for (input i = 0; i < m1.input_size; ++i) { - auto q1 = apply(m1, s1, i); - auto q2 = apply(m2, s2, i); - - if (q1.out != q2.out) { - current_counterexamples++; - } - - auto new_p = make_pair(q1.to, q2.to); - if (visited.count(new_p)) continue; - push(q1.to, q2.to); - } - } - - cout << "distance = 0" << endl; -} diff --git a/src/generator.cpp b/src/generator.cpp deleted file mode 100644 index f7ec291c755b95768e36416974828d7c1c8e6c7a..0000000000000000000000000000000000000000 --- a/src/generator.cpp +++ /dev/null @@ -1,208 +0,0 @@ -#include <mealy.hpp> -#include <reachability.hpp> -#include <splitting_tree.hpp> - -#include <docopt.h> -#include <boost/lexical_cast.hpp> - -#include <iostream> -#include <random> -#include <fstream> - -using namespace std; - -static const char USAGE[] = -R"(Random Mealy machine generator - - Usage: - generator random [options] <states> <inputs> <outputs> <machines> [<seed>] - generator hopcroft a <states> - generator hopcroft b <states> - - Options: - -h, --help Show this screen - --version Show version - -m, --minimal Only generate minimal machines - -c, --connected Only generate reachable machines - --output-cluster <factor> How clustered should the outputs be - --state-cluster <factor> And what about states - --single-output-boost <fctr> Boost for a single output (e.g. quiescence) - --permute-alphabets <n> Makes n copies with permuted input/output -)"; - -static size_t number_of_leaves(splitting_tree const & root) { - if (root.children.empty()) return 1; - - return accumulate(root.children.begin(), root.children.end(), 0ul, - [](auto const & l, auto const & r) { return l + number_of_leaves(r); }); -} - -struct random_options { - double output_spread = 0; - double state_spread = 0; - double single_output_boost = 1; -}; - -static mealy permute_alphabets(mealy const & m){ - mt19937 gen(random_device{}()); - const auto create_permutation = [&gen](size_t n){ - vector<size_t> p(n); - iota(p.begin(), p.end(), 0); - shuffle(p.begin(), p.end(), gen); - return p; - }; - - const auto ip = create_permutation(m.input_size); - const auto op = create_permutation(m.output_size); - - mealy ret = m; - for(state s = 0; s < m.graph_size; ++s){ - for(input i = 0; i < m.input_size; ++i) { - ret.graph[s][i] = m.graph[s][ip[i]]; - ret.graph[s][i].out = op[ret.graph[s][i].out]; - } - } - return ret; -} - -static mealy generate_random_machine(size_t N, size_t P, size_t Q, random_options opts, mt19937 & gen) { - mealy m; - - m.graph_size = N; - m.input_size = P; - m.output_size = Q; - - m.graph.assign(m.graph_size, vector<mealy::edge>(m.input_size)); - - auto o_dist = [&] { - const auto factor = opts.output_spread; - vector<double> probs(m.output_size); - for (output o = 0; o < m.output_size; ++o) - probs[o] = exp(factor * o / double(m.output_size - 1)); - probs[0] *= opts.single_output_boost; - discrete_distribution<output> dist(probs.begin(), probs.end()); - return dist; - }(); - - auto s_dist = [&] { - const auto factor = opts.state_spread; - vector<double> probs(m.graph_size); - for (output o = 0; o < m.graph_size; ++o) - probs[o] = exp(factor * o / double(m.graph_size - 1)); - discrete_distribution<state> dist(probs.begin(), probs.end()); - return dist; - }(); - - vector<state> states(m.graph_size); - iota(states.begin(), states.end(), 0); - - for (state s = 0; s < m.graph_size; ++s) { - shuffle(states.begin(), states.end(), gen); - for (input i = 0; i < m.input_size; ++i) { - m.graph[s][i] = {states[s_dist(gen)], o_dist(gen)}; - } - } - - return m; -} - -static void print_machine(string const & prefix, mealy const & m, size_t count) { - ofstream file(prefix + "_" + to_string(m.graph_size) + "_" + to_string(m.input_size) + "_" - + to_string(m.output_size) + "_" + to_string(count) + ".txt"); - for (state s = 0; s < m.graph_size; ++s) { - for (input i = 0; i < m.input_size; ++i) { - auto e = m.graph[s][i]; - file << s << " -- " << i << " / " << e.out << " -> " << e.to << endl; - } - } -} - -int main(int argc, char * argv[]) { - const auto args = docopt::docopt(USAGE, {argv + 1, argv + argc}, true, __DATE__ __TIME__); - - if (args.at("random").asBool()) { - random_options opts; - if (args.at("--output-cluster")) - opts.output_spread = -boost::lexical_cast<double>(args.at("--output-cluster").asString()); - if (args.at("--state-cluster")) - opts.state_spread = -boost::lexical_cast<double>(args.at("--state-cluster").asString()); - if (args.at("--single-output-boost")) - opts.single_output_boost = boost::lexical_cast<double>(args.at("--single-output-boost").asString()); - - auto gen = [&] { - if (args.at("<seed>")) { - auto seed = args.at("<seed>").asLong(); - return mt19937(seed); - } - random_device rd; - return mt19937(rd()); - }(); - - size_t number_of_machines = args.at("<machines>").asLong(); - size_t constructed = 0; - - while (constructed < number_of_machines) { - auto const m = generate_random_machine(args.at("<states>").asLong(), - args.at("<inputs>").asLong(), - args.at("<outputs>").asLong(), opts, gen); - - if (args.at("--connected").asBool()) { - auto const m2 = reachable_submachine(m, 0); - if (m2.graph_size != m.graph_size) continue; - } - - if (args.at("--minimal").asBool()) { - auto const tree = create_splitting_tree(m, min_hopcroft_style, 0).root; - if (number_of_leaves(tree) != m.graph_size) continue; - } - - if (args.at("--permute-alphabets")) { - auto permuted_copies = args.at("--permute-alphabets").asLong(); - while (permuted_copies--) { - constructed++; - auto copy = permute_alphabets(m); - print_machine("machine", copy, constructed); - } - } else { - constructed++; - print_machine("machine", m, constructed); - } - } - } else if (args.at("hopcroft").asBool() && args.at("a").asBool()) { - mealy m; - - m.graph_size = args.at("<states>").asLong(); - m.input_size = m.output_size = 2; - m.graph.assign(m.graph_size, vector<mealy::edge>(m.input_size)); - - for (state s = 0; s < m.graph_size; ++s) { - m.graph[s][0] = mealy::edge(s + 1, 0); - m.graph[s][1] = mealy::edge(s, 0); - } - - // "accepting state" - m.graph[m.graph_size - 1][0] = mealy::edge(m.graph_size - 1, 1); - m.graph[m.graph_size - 1][1] = mealy::edge(m.graph_size - 1, 1); - - print_machine("hopcroft_a", m, 1); - } else if (args.at("hopcroft").asBool() && args.at("b").asBool()) { - // In the original paper, the machine is not well defined... - // So I don't know what Hopcroft had in mind exactly... - mealy m; - - auto n = m.graph_size = args.at("<states>").asLong(); - m.input_size = m.output_size = 2; - m.graph.assign(m.graph_size, vector<mealy::edge>(m.input_size)); - - for (state s = 0; s < n; ++s) { - m.graph[s][0] = mealy::edge(s ? s - 1 : 0, s < n / 2 ? 0 : 1); - if (s < n / 2) { - m.graph[s][1] = mealy::edge(2 * s + 1, 0); - } else { - m.graph[s][1] = mealy::edge(s + (n - s) / 2, 0); - } - } - - print_machine("hopcroft_b", m, 1); - } -} diff --git a/src/learning_graph.cpp b/src/learning_graph.cpp deleted file mode 100644 index 3893089d8395a26901d7d015bafcbae6baf68c0a..0000000000000000000000000000000000000000 --- a/src/learning_graph.cpp +++ /dev/null @@ -1,185 +0,0 @@ -#include <docopt.h> - -#include <boost/range/algorithm/sort.hpp> -#include <boost/range/algorithm/unique.hpp> - -#include <cstdint> -#include <cmath> -#include <fstream> -#include <future> -#include <iostream> -#include <stdexcept> -#include <vector> - -using namespace std; - -static const char USAGE[] = - R"(Generate a statistical learning graph from multiple runs - - Usage: - learning_graph [options] <file> ... - - Options: - --testing_only Only count the figures for testing - --learning_only Only count the figures for learning - --accumulate Accumulates the data - -h, --help Show this screen - --version Show version -)"; - -struct datapoint { - uint64_t states; - uint64_t learning_queries; - uint64_t learning_inputs; - uint64_t testing_queries; - uint64_t testing_inputs; -}; - -using dataset = vector<datapoint>; - -static void accumulate_dataset(dataset & ds) { - for (size_t i = 0; i < ds.size() - 1; ++i) { - ds[i + 1].learning_queries += ds[i].learning_queries; - ds[i + 1].learning_inputs += ds[i].learning_inputs; - ds[i + 1].testing_queries += ds[i].testing_queries; - ds[i + 1].testing_inputs += ds[i].testing_inputs; - } -} - -template <typename C, typename S> -void print_quantiles(C const & container, S && selector, ostream & out) { - const auto index_weight = [&](double p) -> pair<size_t, double> { - auto index = (p * (container.size() - 1)); - return {floor(index), 1 - fmod(index, 1)}; - }; - - auto sorted_container = container; - sort(sorted_container.begin(), sorted_container.end(), - [&](auto const & l, auto const & r) { return selector(l) < selector(r); }); - out << selector(sorted_container.front()) << '\t'; - - const auto i25 = index_weight(0.25); - out << i25.second * selector(sorted_container[i25.first]) - + (1 - i25.second) * selector(sorted_container[i25.first + 1]) - << '\t'; - - const auto i50 = index_weight(0.50); - out << i50.second * selector(sorted_container[i50.first]) - + (1 - i50.second) * selector(sorted_container[i50.first + 1]) - << '\t'; - - const auto i75 = index_weight(0.75); - out << i75.second * selector(sorted_container[i75.first]) - + (1 - i75.second) * selector(sorted_container[i75.first + 1]) - << '\t'; - - out << selector(sorted_container.back()); -} - -auto all(datapoint const & p) { - return p.learning_queries + p.learning_inputs + p.testing_queries + p.testing_inputs; -} -auto testing(datapoint const & p) { - return p.testing_queries + p.testing_inputs; -} -auto learning(datapoint const & p) { - return p.learning_queries + p.learning_inputs; -} - -int main(int argc, char * argv[]) { - const auto args = docopt::docopt(USAGE, {argv + 1, argv + argc}, true, __DATE__ __TIME__); - - const auto field = args.at("--testing_only").asBool() ? &testing : args.at("--learning_only").asBool() ? &learning : &all; - - vector<future<dataset>> dataset_futures; - for (auto const & filename : args.at("<file>").asStringList()) { - dataset_futures.emplace_back(async([filename, &args] { - fstream file(filename); - if (!file) throw runtime_error("Could not open file " + filename); - - dataset s; - datapoint p; - while (file >> p.states >> p.learning_queries >> p.learning_inputs >> p.testing_queries - >> p.testing_inputs) { - s.push_back(p); - } - - if (args.at("--accumulate").asBool()) - accumulate_dataset(s); - - return s; - })); - } - - vector<dataset> datasets; - clog << "datasets"; - for (auto & f : dataset_futures) { - datasets.emplace_back(f.get()); - clog << ' ' << datasets.back().size(); - if (datasets.back().size() == 0) throw runtime_error("empty dataset"); - } - clog << endl; - - vector<size_t> state_values; - state_values.reserve(datasets[0].size()); - - // lazy way of doing things - for (auto && ds : datasets) - for (auto && x : ds) state_values.push_back(x.states); - - sort(state_values.begin(), state_values.end()); - state_values.erase(unique(state_values.begin(), state_values.end()), state_values.end()); - - // id(state_value) -> [total query size] - vector<vector<double>> data; - data.reserve(state_values.size()); - - // we keep track of the current timestamp for the different datasets - struct it_pair { - dataset::const_iterator current, next, end; - }; - vector<it_pair> iterators(datasets.size()); - for (size_t i = 0; i < datasets.size(); ++i) - iterators[i] = {datasets[i].begin(), datasets[i].begin(), datasets[i].end()}; - - for (auto const & state : state_values) { - data.push_back({}); - for (auto & it : iterators) { - while (it.next != it.end && it.next->states < state) { - it.current = it.next; - it.next++; - } - - // one run stopped prior to the others, we can skip it - if (it.next == it.end) continue; - - // one run started earlier, we can skip it - if (it.current->states > state) continue; - - // if we're spot on, update current - if (it.next->states == state) it.current = it.next; - - const auto v2 = field(*it.next); - const auto v1 = field(*it.current); - const auto ratio - = it.next->states == state - ? 1.0 - : (state - it.current->states) / double(it.next->states - it.current->states); - const auto v = ratio * v2 + (1.0 - ratio) * v1; - data.back().push_back(v); - } - } - - for (auto & v : data) { - sort(v.begin(), v.end()); - } - - cout << "s\tmin\tQ1\t\tQ2\tQ3\tmax" << endl; - for (size_t i = 0; i < state_values.size(); ++i) { - auto v = data[i]; - if (v.empty()) continue; - cout << state_values[i] << '\t'; - print_quantiles(v, [](auto const & x) { return x; }, cout); - cout << endl; - } -} diff --git a/src/main.cpp b/src/main.cpp index e1238eecbfe15d0e42ecb76d1588660e1085a565..edbbd059003c7c44d7abda300f5265e487b8ae2b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -7,70 +7,167 @@ #include <splitting_tree.hpp> #include <test_suite.hpp> #include <transfer_sequences.hpp> - -#include <docopt.h> +#include <trie.hpp> #include <algorithm> -#include <future> -#include <iomanip> -#include <numeric> +#include <cstdlib> +#include <iostream> +#include <map> #include <random> +#include <stdexcept> +#include <string> +#include <utility> + +/* + * The reason I use getopt, instead of some library (I've used + * docopts and boost in the past), is that I want no dependencies. + * I've installed this software several times, and it was never + * easy because of its dependencies. + */ +#include <unistd.h> using namespace std; static const char USAGE[] = -R"(Generate a test suite. Use '=' as filename for stdin. + R"(Generate or stream a test suite for a given FSM. Usage: - main [options] <filename> (all|fixed|random) <max k> <rnd length> + main [options] Options: - -h, --help Show this screen - --version Show version - --seed <seed> 32 bits seeds for deterministic execution - --no-ds Only use the classical algorithm (hsi) - --random-ds Choose randomly between the ds method or hsi method - --no-suffix Dont calculate anything smart, just do the random stuff - --suffix-based Only applies in random mode. Chooses suffix first, and not prefix first - --prefix <type> Chooses the kind of prefix: canonical, minimal, buggy, longest + -h Show this screen + -v Show version + -m <arg> Operation mode: all, fixed, random + -p <arg> How to generate prefixes: minimal, lexmin, buggy, longest + -s <arg> How to generate suffixes: hsi, hads, none + -k <num> Number of extra states to check for (minus 1) + -r <num> Expected length of random infix word + -x <seed> 32 bits seeds for deterministic execution (0 is not valid) + -f <filename> Input filename ('-' or don't specify for stdin) + -o <filename> Output filename ('-' or don't specify for stdout) )"; -using time_logger = silent_timer; +enum Mode { ALL, FIXED, RANDOM }; +enum PrefixMode { MIN, LEXMIN, BUGGY, DFS }; +enum SuffixMode { HSI, HADS, NOSUFFIX }; + +struct main_options { + bool help = false; + bool version = false; + + Mode mode = ALL; + PrefixMode prefix_mode = MIN; + SuffixMode suffix_mode = HADS; + + unsigned long k_max = 3; // 3 means 2 extra states + unsigned long rnd_length = 8; // in addition to k_max + unsigned long seed = 0; // 0 for unset/noise + + string input_filename; // empty for stdin + string output_filename; // empty for stdout +}; + +main_options parse_options(int argc, char ** argv) { + main_options opts; + + static const map<string, Mode> mode_names = { + {"all", ALL}, {"fixed", FIXED}, {"random", RANDOM}}; + static const map<string, PrefixMode> prefix_names = { + {"minimal", MIN}, {"lexmin", LEXMIN}, {"buggy", BUGGY}, {"longest", DFS}}; + static const map<string, SuffixMode> suffix_names = { + {"hsi", HSI}, {"hads", HADS}, {"none", NOSUFFIX}}; + + try { + int c; + while ((c = getopt(argc, argv, "hvm:p:s:k:r:x:f:o:")) != -1) { + switch (c) { + case 'h': // show help message + opts.help = true; + break; + case 'v': // show version + opts.version = true; + break; + case 'm': // select operation mode + opts.mode = mode_names.at(optarg); + break; + case 'p': // select prefix mode + opts.prefix_mode = prefix_names.at(optarg); + break; + case 's': // select suffix mode + opts.suffix_mode = suffix_names.at(optarg); + break; + case 'k': // select extra states / k-value + opts.k_max = stoul(optarg); + break; + case 'r': // expected random length + opts.rnd_length = stoul(optarg); + break; + case 'x': // seed + opts.seed = stoul(optarg); + break; + case 'f': // input filename + opts.input_filename = optarg; + break; + case 'o': // output filename + opts.output_filename = optarg; + break; + case ':': // some option without argument + throw runtime_error(string("No argument given to option -") + char(optopt)); + case '?': // all unrecognised things + throw runtime_error(string("Unrecognised option -") + char(optopt)); + } + } + } catch (exception & e) { + cerr << e.what() << endl; + cerr << "Could not parse command line options." << endl; + cerr << "Please use -h to see the available options." << endl; + exit(2); + } -int main(int argc, char *argv[]) try { - const auto args = docopt::docopt(USAGE, {argv + 1, argv + argc}, true, "25 Nov 11:50"); + return opts; +} - const string filename = args.at("<filename>").asString(); - const bool use_stdio = filename == "="; +using time_logger = silent_timer; - const auto k_max = args.at("<max k>").asLong(); - const auto rnd_length = args.at("<rnd length>").asLong(); +int main(int argc, char * argv[]) try { + /* + * First we parse the command line options. + * We quit when asked for help or version + */ + const auto args = parse_options(argc, argv); - const bool streaming = args.at("all").asBool() || args.at("fixed").asBool(); - const bool random_part = args.at("all").asBool() || args.at("random").asBool(); - const bool no_suffix = args.at("--no-suffix").asBool(); - const bool suffix_based = args.at("--suffix-based").asBool(); + if (args.help) { + cout << USAGE << endl; + exit(0); + } - const bool seed_provided = bool(args.at("--seed")); - const uint_fast32_t seed = seed_provided ? args.at("--seed").asLong() : 0; + if (args.version) { + cout << "Version 2 (July 2017)" << endl; + exit(0); + } + + const bool no_suffix = args.suffix_mode == NOSUFFIX; + const bool use_distinguishing_sequence = args.suffix_mode == HADS; - const bool use_distinguishing_sequence = [&]{ - if(args.at("--random-ds").asBool()) { - random_device rd; - return rd() - rd.min() < (rd.max() - rd.min()) / 2; - } - return !args.at("--no-ds").asBool(); - }(); - const string prefix_type = args.at("--prefix") ? args.at("--prefix").asString() : "minimal"; const bool randomize_hopcroft = true; const bool randomize_lee_yannakakis = true; - const auto machine_and_translation = [&]{ + if (args.output_filename != "" && args.output_filename != "-") { + throw runtime_error("File ouput is currently not supported"); + } + + /* + * Then all the setup is done. Parsing the automaton, + * construction all types of sequences needed for the + * test suite. + */ + const auto machine_and_translation = [&] { + const auto & filename = args.input_filename; time_logger t_("reading file " + filename); - if(use_stdio){ + if (filename == "" || filename == "-") { return read_mealy_from_dot(cin); } - if(filename.find(".txt") != string::npos) { + if (filename.find(".txt") != string::npos) { const auto m = read_mealy_from_txt(filename); const auto t = create_translation_for_mealy(m); return make_pair(move(m), move(t)); @@ -88,8 +185,8 @@ int main(int argc, char *argv[]) try { // every thread gets its own seed const auto random_seeds = [&] { vector<uint_fast32_t> seeds(4); - if (seed_provided) { - seed_seq s{seed}; + if (args.seed != 0) { + seed_seq s{args.seed}; s.generate(seeds.begin(), seeds.end()); } else { random_device rd; @@ -98,29 +195,35 @@ int main(int argc, char *argv[]) try { return seeds; }(); - auto all_pair_separating_sequences = [&]{ - if(no_suffix) return splitting_tree(0, 0); + auto all_pair_separating_sequences = [&] { + if (no_suffix) return splitting_tree(0, 0); - const auto splitting_tree_hopcroft = [&]{ + const auto splitting_tree_hopcroft = [&] { time_logger t("creating hopcroft splitting tree"); - return create_splitting_tree(machine, randomize_hopcroft ? randomized_hopcroft_style : hopcroft_style, random_seeds[0]); + return create_splitting_tree( + machine, randomize_hopcroft ? randomized_hopcroft_style : hopcroft_style, + random_seeds[0]); }(); return splitting_tree_hopcroft.root; }(); - auto sequence = [&]{ - if(no_suffix) return adaptive_distinguishing_sequence(0, 0); + auto sequence = [&] { + if (no_suffix) return adaptive_distinguishing_sequence(0, 0); - const auto tree = [&]{ + const auto tree = [&] { time_logger t("Lee & Yannakakis I"); - if(use_distinguishing_sequence) - return create_splitting_tree(machine, randomize_lee_yannakakis ? randomized_lee_yannakakis_style : lee_yannakakis_style, random_seeds[1]); + if (use_distinguishing_sequence) + return create_splitting_tree(machine, + randomize_lee_yannakakis + ? randomized_lee_yannakakis_style + : lee_yannakakis_style, + random_seeds[1]); else return result(machine.graph_size); }(); - const auto sequence_ = [&]{ + const auto sequence_ = [&] { time_logger t("Lee & Yannakakis II"); return create_adaptive_distinguishing_sequence(tree); }(); @@ -130,19 +233,25 @@ int main(int argc, char *argv[]) try { auto transfer_sequences = [&] { time_logger t("determining transfer sequences"); - if(prefix_type == "canonical") return create_transfer_sequences(canonical_transfer_sequences, machine, 0, random_seeds[2]); - if(prefix_type == "minimal") return create_transfer_sequences(minimal_transfer_sequences, machine, 0, random_seeds[2]); - if(prefix_type == "buggy") return create_transfer_sequences(buggy_transfer_sequences, machine, 0, random_seeds[2]); - if(prefix_type == "longest") return create_transfer_sequences(longest_transfer_sequences, machine, 0, random_seeds[2]); - - cerr << "Warning: no valid prefix type specified. Assuming minimal.\n"; - return create_transfer_sequences(minimal_transfer_sequences, machine, 0, random_seeds[2]); + switch (args.prefix_mode) { + case LEXMIN: + return create_transfer_sequences(canonical_transfer_sequences, machine, 0, + random_seeds[2]); + case MIN: + return create_transfer_sequences(minimal_transfer_sequences, machine, 0, + random_seeds[2]); + case BUGGY: + return create_transfer_sequences(buggy_transfer_sequences, machine, 0, random_seeds[2]); + case DFS: + return create_transfer_sequences(longest_transfer_sequences, machine, 0, + random_seeds[2]); + } }(); - auto inputs = create_reverse_map(translation.input_indices); + auto const inputs = create_reverse_map(translation.input_indices); - const auto separating_family = [&]{ - if(no_suffix) { + const auto separating_family = [&] { + if (no_suffix) { separating_set s{{word{}}}; vector<separating_set> suffixes(machine.graph_size, s); return suffixes; @@ -152,22 +261,58 @@ int main(int argc, char *argv[]) try { return create_separating_family(sequence, all_pair_separating_sequences); }(); - if(streaming){ + /* + * From here on, we will be spamming the output with test cases. + * Depending on the operation mode, this will be either a finite + * or infinite test suite. + */ + const bool fixed_part = args.mode == ALL || args.mode == FIXED; + const bool random_part = args.mode == ALL || args.mode == RANDOM; + + // we will remove redundancies using a radix tree/prefix tree/trie + trie<input> test_suite; + word buffer; + const auto output_word = [&inputs](const auto & w) { + for (const auto & x : w) { + cout << inputs[x] << ' '; + } + cout << endl; + }; + + if (fixed_part) { + // For the exhaustive/preset part we first collect all words + // (while removing redundant ones) before outputting them. time_logger t("outputting all preset tests"); - test(machine, transfer_sequences, separating_family, k_max, default_writer(inputs)); + test(machine, transfer_sequences, separating_family, args.k_max, + {[&buffer](auto const & w) { buffer.insert(buffer.end(), w.begin(), w.end()); }, + [&buffer, &test_suite]() { + test_suite.insert(buffer); + buffer.clear(); + return true; + }}); + + test_suite.for_each(output_word); } - if(random_part){ + if (random_part) { + // For the random part we immediately output new words, since + // there is no way of collecting an infinite set first... + // Note that this part terminates when the stream is closed. time_logger t("outputting all random tests"); - const auto k_max_ = streaming ? k_max + 1 : 0; - - if (suffix_based) { - randomized_test_suffix(machine, transfer_sequences, separating_family, k_max_, - rnd_length, default_writer(inputs), random_seeds[3]); - } else { - randomized_test(machine, transfer_sequences, separating_family, k_max_, rnd_length, - default_writer(inputs), random_seeds[3]); - } + const auto k_max_ = fixed_part ? args.k_max + 1 : 0; + + randomized_test( + machine, transfer_sequences, separating_family, k_max_, args.rnd_length, + {[&buffer](auto const & w) { buffer.insert(buffer.end(), w.begin(), w.end()); }, + [&buffer, &test_suite, &output_word]() { + // TODO: probably we want to bound the size of the prefix tree + if (test_suite.insert(buffer)) { + output_word(buffer); + } + buffer.clear(); + return bool(cout); + }}, + random_seeds[3]); } } catch (exception const & e) { diff --git a/src/measure.cpp b/src/measure.cpp deleted file mode 100644 index 6bd6048afdaa3658dee370807d08f3feec3c04e2..0000000000000000000000000000000000000000 --- a/src/measure.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include <trie.hpp> - -#include <cstdint> -#include <iostream> -#include <sstream> -#include <string> -#include <unordered_map> - -using namespace std; - -template <typename T> -int func(std::istream & in, std::ostream & out) { - unordered_map<string, T> translation; - trie<T> unique_traces; - - string line; - vector<T> current_word; - while (getline(in, line)) { - current_word.clear(); - // TODO: this can be done more efficiently, I guess - stringstream ss(line); - string symbol; - while (ss >> symbol) { - if (symbol.empty()) continue; - const auto id = translation.insert(make_pair(symbol, translation.size())).first->second; - current_word.push_back(id); - } - unique_traces.insert(current_word); - } - - const auto p = total_size(unique_traces); - out << p.first << '\t' << p.second << '\t' << p.first + p.second << endl; - - return 0; -} - -int main(int argc, char * argv[]) { - // default is an alphabet is maximal 2^32 = 4'294'967'296 symbols - // this bound does not really matter for speed or space - return func<uint32_t>(cin, cout); -} diff --git a/src/methods.cpp b/src/methods.cpp deleted file mode 100644 index a26395af5f06bbf763366711ded18b66db0ef6ca..0000000000000000000000000000000000000000 --- a/src/methods.cpp +++ /dev/null @@ -1,133 +0,0 @@ -#include <adaptive_distinguishing_sequence.hpp> -#include <read_mealy.hpp> -#include <separating_family.hpp> -#include <splitting_tree.hpp> -#include <test_suite.hpp> -#include <transfer_sequences.hpp> -#include <trie.hpp> - -#include <docopt.h> - -#include <future> -#include <iostream> -#include <random> -#include <string> - -using namespace std; - -static const char USAGE[] = - R"(FSM-based test methods - - Usage: - methods (hsi|ads) [options] <file> - - Options: - -h, --help Show current help - --version Show version - -s <seed>, --seed <seed> Specify a seed - --non-random Iterate inputs in specified order (as occurring in input file) - -k <states> Testing extra states [default: 1] - --print-suite Prints the whole test suite -)"; - -int main(int argc, char * argv[]) { - const auto args = docopt::docopt(USAGE, {argv + 1, argv + argc}, true, __DATE__ __TIME__); - - const string filename = args.at("<file>").asString(); - const size_t k_max = args.at("-k").asLong(); - - const auto machine = [&] { - if (filename.find(".txt") != string::npos) { - return read_mealy_from_txt(filename); - } else if (filename.find(".dot") != string::npos) { - return read_mealy_from_dot(filename).first; - } - - clog << "warning: unrecognized file format, assuming dot"; - return read_mealy_from_dot(filename).first; - }(); - - const auto random_seeds = [&] { - vector<uint_fast32_t> seeds(3); - if (args.at("--seed")) { - seed_seq s{args.at("--seed").asLong()}; - s.generate(seeds.begin(), seeds.end()); - } else { - random_device rd; - generate(seeds.begin(), seeds.end(), ref(rd)); - } - return seeds; - }(); - - auto sequence_fut = async([&] { - if (args.at("hsi").asBool()) { - return create_adaptive_distinguishing_sequence(result(machine.graph_size)); - } - const auto tree = create_splitting_tree(machine, args.at("--non-random").asBool() - ? lee_yannakakis_style - : randomized_lee_yannakakis_style, - random_seeds[0]); - return create_adaptive_distinguishing_sequence(tree); - }); - - auto pairs_fut = async([&] { - const auto tree = create_splitting_tree(machine, args.at("--non-random").asBool() - ? min_hopcroft_style - : randomized_min_hopcroft_style, - random_seeds[1]); - return tree.root; - }); - - auto prefixes_fut = async([&] { - return create_transfer_sequences(args.at("--non-random").asBool() - ? canonical_transfer_sequences - : minimal_transfer_sequences, - machine, 0, random_seeds[2]); - }); - - auto suffixes_fut - = async([&] { return create_separating_family(sequence_fut.get(), pairs_fut.get()); }); - - trie<input> test_suite; - word buffer; - - const auto suffixes = suffixes_fut.get(); - for(state s = 0; s < suffixes.size(); ++s){ - clog << "suffixes for " << s << endl; - for(auto s2 : suffixes[s].local_suffixes) { - for(auto x : s2){ - clog << x; - } - clog << endl; - } - } - - const auto prefixes = prefixes_fut.get(); - for(state s = 0; s < prefixes.size(); ++s) { - clog << "prefix for " << s << endl; - for(auto x : prefixes[s]) { - clog << x; - } - clog << endl; - } - - test(machine, prefixes, suffixes, k_max, - {[&buffer](auto const & w) { buffer.insert(buffer.end(), w.begin(), w.end()); }, - [&buffer, &test_suite]() { - test_suite.insert(buffer); - buffer.clear(); - return true; - }}); - - const auto p = total_size(test_suite); - cout << p.first << '\t' << p.second << '\t' << p.first + p.second << endl; - - if(args.at("--print-suite").asBool()){ - test_suite.for_each([](const auto & w){ - for(const auto & x : w) { - cout << x << ' '; - } - cout << endl; - }); - } -} diff --git a/src/pre_gephi_tool.cpp b/src/pre_gephi_tool.cpp deleted file mode 100644 index d1e99e564a1c88979bde9e8c6fe93d67816ddb4e..0000000000000000000000000000000000000000 --- a/src/pre_gephi_tool.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include <mealy.hpp> -#include <read_mealy.hpp> -#include <transfer_sequences.hpp> - -#include <boost/optional.hpp> - -#include <fstream> -#include <iostream> -#include <queue> -#include <string> -#include <vector> - -using namespace std; -using namespace boost; - -int main(int argc, char *argv[]){ - if(argc != 5) return argc; - - // Program options - const string machince_filename = argv[1]; - const string machince_positions_filename = argv[2]; - const string hypo_filename_pattern = argv[3]; - const size_t maximal_hypothesis = stoul(argv[4]); - - // Read all the hypothesis - translation trans; - vector<mealy> hypotheses; - for(size_t i = 0; i <= maximal_hypothesis; ++i){ - clog << "Reading hypo " << i << endl; - string hypothesis_filename = hypo_filename_pattern; - auto it = hypothesis_filename.find('%'); - hypothesis_filename.replace(it, 1, to_string(i)); - hypotheses.push_back(read_mealy_from_dot(hypothesis_filename, trans)); - } - - const auto machine = read_mealy_from_dot(machince_filename, trans); - - // Read the positions by gephi, indexed by state - // (export to .net file and then `tail +2 Untitled.net | awk '{print $3, $4}' > positions.txt`) - vector<pair<double, double>> positions(machine.graph_size); - ifstream position_file(machince_positions_filename); - for(auto & p : positions){ - position_file >> p.first >> p.second; - } - - // Visit all states and record in which hypo it is reached - vector<optional<size_t>> visited(machine.graph_size); - for(size_t i = 0; i <= maximal_hypothesis; ++i){ - clog << "Visiting hypo " << i << endl; - const auto state_cover = create_transfer_sequences(canonical_transfer_sequences, hypotheses[i], 0, 0); - for(auto && p : state_cover){ - state s = 0; - for(auto && inp : p){ - if(!visited[s]) visited[s] = i; - s = apply(machine, s, inp).to; - } - if(!visited[s]) visited[s] = i; - } - } - - // Output a dot per hypo, making a movie - for(size_t h = 0; h < hypotheses.size(); ++h){ - clog << "Saving frame " << h << endl; - string hypothesis_filename = hypo_filename_pattern + ".movie"; - auto it = hypothesis_filename.find('%'); - hypothesis_filename.replace(it, 1, to_string(h)); - - ofstream out(hypothesis_filename); - out << "digraph {\n"; - - for(state s = 0; s < machine.graph_size; ++s){ - bool is_visited = visited[s] ? (visited[s].value() <= h) : false; - out << "\t" << "s" << s << " ["; - out << "color=\"" << (is_visited ? "green" : "red") << "\"" << ", "; - out << "pos=\"" << positions[s].first << "," << positions[s].second << "\""; - out << "]\n"; - } - - for(state s = 0; s < machine.graph_size; ++s){ - vector<bool> should_ignore(machine.graph_size, false); - should_ignore[s] = true; - for(input i = 0; i < machine.input_size; ++i){ - const auto t = apply(machine, s, i).to; - if(should_ignore[t]) continue; - out << "\t" << "s" << s << " -> " << "s" << t << "\n"; - should_ignore[t] = true; - } - } - - out << "}" << endl; - } -} diff --git a/src/reachability.cpp b/src/reachability.cpp deleted file mode 100644 index 0ff3a92fe89912ee029ac4b9dcaff004d3f8cfc8..0000000000000000000000000000000000000000 --- a/src/reachability.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include <mealy.hpp> -#include <read_mealy.hpp> - -#include <fstream> -#include <iostream> -#include <queue> -#include <vector> - -using namespace std; - -int main(int argc, char *argv[]){ - if(argc != 3) return 1; - - const string filename = argv[1]; - const string machince_positions_filename = argv[2]; - - // Read machine and its positions - const auto machine_translation = read_mealy_from_dot(filename); - const auto & machine = machine_translation.first; - const auto & translation = machine_translation.second; - - vector<pair<double, double>> positions(machine.graph_size); - ifstream position_file(machince_positions_filename); - for(auto & p : positions){ - position_file >> p.first >> p.second; - } - - // read subalphabet - cout << "Machine is read, please provide a subalphabet" << endl; - vector<input> subalphabet; - string in; - while(cin >> in){ - const input x = translation.input_indices.at(in); - subalphabet.push_back(x); - } - - // visit with subalphabet - vector<bool> visited(machine.graph_size, false); - queue<state> work; - work.push(0); - while(!work.empty()){ - const state s = work.front(); - work.pop(); - - if(visited[s]) continue; - visited[s] = true; - - for(auto x : subalphabet){ - const state t = apply(machine, s, x).to; - if(!visited[t]) work.push(t); - } - } - - // write to dot - ofstream out(filename + ".reachable.dot"); - out << "digraph {\n"; - - for(state s = 0; s < machine.graph_size; ++s){ - bool is_visited = visited[s]; - out << "\t" << "s" << s << " ["; - out << "color=\"" << (is_visited ? "green" : "red") << "\"" << ", "; - out << "pos=\"" << positions[s].first << "," << positions[s].second << "\""; - out << "]\n"; - } - - for(state s = 0; s < machine.graph_size; ++s){ - vector<bool> should_ignore(machine.graph_size, false); - should_ignore[s] = true; - for(input i = 0; i < machine.input_size; ++i){ - const auto t = apply(machine, s, i).to; - if(should_ignore[t]) continue; - out << "\t" << "s" << s << " -> " << "s" << t << "\n"; - should_ignore[t] = true; - } - } - - out << "}" << endl; -} - diff --git a/src/scatter_plot.cpp b/src/scatter_plot.cpp deleted file mode 100644 index 7732293882b20ab39b9009ea3fe84fb3ce811ff8..0000000000000000000000000000000000000000 --- a/src/scatter_plot.cpp +++ /dev/null @@ -1,81 +0,0 @@ -#include <docopt.h> - -#include <cstdint> -#include <cmath> -#include <fstream> -#include <future> -#include <iostream> -#include <stdexcept> -#include <vector> - -using namespace std; - -static const char USAGE[] = - R"(Generate a statistical learning graph from multiple runs - - Usage: - learning_graph <file> ... - - Options: - -h, --help Show this screen - --version Show version -)"; - -struct datapoint { - uint64_t states; - uint64_t learning_queries; - uint64_t learning_inputs; - uint64_t testing_queries; - uint64_t testing_inputs; -}; - -using dataset = vector<datapoint>; - -static void accumulate_dataset(dataset & ds) { - for (size_t i = 0; i < ds.size() - 1; ++i) { - ds[i + 1].learning_queries += ds[i].learning_queries; - ds[i + 1].learning_inputs += ds[i].learning_inputs; - ds[i + 1].testing_queries += ds[i].testing_queries; - ds[i + 1].testing_inputs += ds[i].testing_inputs; - } -} - -int main(int argc, char * argv[]) { - const auto args = docopt::docopt(USAGE, {argv + 1, argv + argc}, true, __DATE__ __TIME__); - - vector<future<dataset>> dataset_futures; - for (auto const & filename : args.at("<file>").asStringList()) { - dataset_futures.emplace_back(async([filename] { - fstream file(filename); - if (!file) throw runtime_error("Could not open file " + filename); - - dataset s; - datapoint p; - while (file >> p.states >> p.learning_queries >> p.learning_inputs >> p.testing_queries - >> p.testing_inputs) { - s.push_back(p); - } - - accumulate_dataset(s); - - return s; - })); - } - - vector<dataset> datasets; - clog << "datasets"; - for (auto & f : dataset_futures) { - datasets.emplace_back(f.get()); - clog << ' ' << datasets.back().size(); - if (datasets.back().size() == 0) throw runtime_error("empty dataset"); - } - clog << endl; - - for (auto const & set : datasets) { - for (auto const & p : set) { - const auto v - = p.learning_queries + p.learning_inputs + p.testing_queries + p.testing_inputs; - cout << p.states << '\t' << v << endl; - } - } -} diff --git a/src/stats.cpp b/src/stats.cpp deleted file mode 100644 index 3289334a61e95c311909716ddc44ea9927d9bae7..0000000000000000000000000000000000000000 --- a/src/stats.cpp +++ /dev/null @@ -1,175 +0,0 @@ -#include <adaptive_distinguishing_sequence.hpp> -#include <mealy.hpp> -#include <reachability.hpp> -#include <read_mealy.hpp> -#include <separating_family.hpp> -#include <splitting_tree.hpp> -#include <transfer_sequences.hpp> - -#include <algorithm> -#include <fstream> -#include <future> -#include <iostream> -#include <numeric> -#include <random> - -using namespace std; - -template <typename C, typename S> -void print_quantiles(C const & container, S && selector, ostream & out) { - const auto index_weight = [&](double p) -> pair<size_t, double> { - auto index = (p * (container.size() - 1)); - return {floor(index), 1 - fmod(index, 1)}; - }; - - auto sorted_container = container; - sort(sorted_container.begin(), sorted_container.end(), - [&](auto const & l, auto const & r) { return selector(l) < selector(r); }); - out << "min/Q1/Q2/Q3/max "; - out << selector(sorted_container.front()) << '/'; - - const auto i25 = index_weight(0.25); - out << i25.second * selector(sorted_container[i25.first]) - + (1 - i25.second) * selector(sorted_container[i25.first + 1]) - << '/'; - - const auto i50 = index_weight(0.50); - out << i50.second * selector(sorted_container[i50.first]) - + (1 - i50.second) * selector(sorted_container[i50.first + 1]) - << '/'; - - const auto i75 = index_weight(0.75); - out << i75.second * selector(sorted_container[i75.first]) - + (1 - i75.second) * selector(sorted_container[i75.first + 1]) - << '/'; - - out << selector(sorted_container.back()); -} - -static auto count_self_loops(mealy const & m) { - vector<long> ret(m.graph_size); - for (state s = 0; s < m.graph_size; ++s) { - ret[s] = count_if(m.graph[s].begin(), m.graph[s].end(), [=](auto e) { return e.to == s; }); - } - return ret; -} - -static void print_stats_for_machine(string filename) { - const auto machine = [&] { - if (filename.find(".txt") != string::npos) { - return read_mealy_from_txt(filename); - } else if (filename.find(".dot") != string::npos) { - return read_mealy_from_dot(filename).first; - } - - clog << "warning: unrecognized file format, assuming dot"; - return read_mealy_from_dot(filename).first; - }(); - - cout << "machine " << filename << " has\n"; - cout << '\t' << machine.graph_size << " states\n"; - cout << '\t' << machine.input_size << " inputs\n"; - cout << '\t' << machine.output_size << " outputs" << endl; - - const auto reachable_machine = reachable_submachine(machine, 0); - cout << '\t' << reachable_machine.graph_size << " reachable states" << endl; - - const auto prefixes = create_transfer_sequences(canonical_transfer_sequences, reachable_machine, 0, 0); - cout << "prefixes "; - print_quantiles(prefixes, [](auto const & l) { return l.size(); }, cout); - cout << endl; - - const auto self_loop_counts = count_self_loops(reachable_machine); - cout << "self loops "; - print_quantiles(self_loop_counts, [](auto const & l) { return l; }, cout); - cout << endl; - - const auto counted_outputs = [&] { - vector<unsigned> counts(reachable_machine.input_size, 0); - for (auto && r : reachable_machine.graph) - for (auto && e : r) counts[e.out]++; - return counts; - }(); - cout << "output usage "; - print_quantiles(counted_outputs, [](auto const & l) { return l; }, cout); - cout << endl; - { - ofstream extended_log("extended_log.txt"); - for(auto && x : counted_outputs) extended_log << x << '\n'; - } - - const auto counted_states = [&] { - vector<unsigned> counts(reachable_machine.graph_size, 0); - for (auto && r : reachable_machine.graph) - for (auto && e : r) counts[e.to]++; - return counts; - }(); - cout << "state usage "; - print_quantiles(counted_states, [](auto const & l) { return l; }, cout); - cout << endl; - { - ofstream extended_log("extended_log2.txt"); - for(auto && x : counted_states) extended_log << x << '\n'; - } - - const auto unique_transitions = [&] { - vector<unsigned> ret(reachable_machine.input_size+1, 0); - for (state s = 0; s < reachable_machine.graph_size; ++s) { - unsigned count = 0; - vector<bool> c(reachable_machine.graph_size, false); - for (auto && x : reachable_machine.graph[s]) { - if(!c[x.to]) { - c[x.to] = true; - count++; - } - } - ret[count]++; - } - return ret; - }(); - cout << "transition usage "; - print_quantiles(unique_transitions, [](auto const & l) { return l; }, cout); - cout << endl; - { - ofstream extended_log("extended_log3.txt"); - for (auto && x : unique_transitions) extended_log << x << '\n'; - } - - return; - - random_device rd; - uint_fast32_t seeds[] = {rd(), rd()}; - auto sequence_fut = async([&] { - const auto tree = create_splitting_tree(machine, randomized_lee_yannakakis_style, seeds[0]); - return create_adaptive_distinguishing_sequence(tree); - }); - - auto pairs_fut = async([&] { - const auto tree = create_splitting_tree(machine, randomized_min_hopcroft_style, seeds[1]); - return tree.root; - }); - - const auto suffixes = create_separating_family(sequence_fut.get(), pairs_fut.get()); - - cout << "number of suffixes (randomized) "; - print_quantiles(suffixes, [](auto const & l) { return l.local_suffixes.size(); }, cout); - cout << endl; - - vector<word> all_suffixes; - for (auto const & s : suffixes) - for (auto const & t : s.local_suffixes) all_suffixes.push_back(t); - - cout << "length of all suffixes (randomized) "; - print_quantiles(all_suffixes, [](auto const & l) { return l.size(); }, cout); - cout << endl; -} - -int main(int argc, char * argv[]) { - if (argc != 2) { - cerr << "usages: stats <filename>" << endl; - return 1; - } - - const string filename = argv[1]; - print_stats_for_machine(filename); -} diff --git a/src/trees.cpp b/src/trees.cpp deleted file mode 100644 index 54002c297034dd0cf24de213577235744be71547..0000000000000000000000000000000000000000 --- a/src/trees.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include <adaptive_distinguishing_sequence.hpp> -#include <read_mealy.hpp> -#include <splitting_tree.hpp> -#include <write_tree_to_dot.hpp> - -#include <iostream> -#include <random> -#include <string> - -using namespace std; - -int main(int argc, char * argv[]) { - if (argc != 3) return 1; - const string filename = argv[1]; - - const string randomized_str = argv[2]; - const bool randomized = randomized_str == "randomized"; - - const auto machine_and_translation = read_mealy_from_dot(filename); - const auto & machine = machine_and_translation.first; - const auto & translation = machine_and_translation.second; - - const auto options = randomized ? randomized_lee_yannakakis_style : lee_yannakakis_style; - - random_device rd; - const auto tree = create_splitting_tree(machine, options, rd()); - const auto sequence = create_adaptive_distinguishing_sequence(tree); - - write_splitting_tree_to_dot(tree.root, filename + ".tree"); - write_adaptive_distinguishing_sequence_to_dot(sequence, translation, filename + ".seq"); -}