Commit 42d3bb3a authored by Michele's avatar Michele

started working on leaning nondeterministic tic tac toe

parent 7c2e3881
......@@ -63,10 +63,18 @@ class TicTacToeOutputPurpose(Purpose):
else:
# In this SUL we have many outputs (>4000)
# for this reason we use a placeholder
# It sould not be a problem later because before checking
# if a row is more specific than another, an observation query
# should be ask for all entries. (And determinism -> no problem)
# We also need a way to compare a set of outputs with the
# placeholder
return 'PLACEHOLDER'
def isIncluded(self, set1, set2):
if set2 == 'PLACEHOLDER':
return True
else:
if set1 == 'PLACEHOLDER':
return False
else:
return set1.issubset(set2)
def allOutputs(self):
return set(itertools.product('XO_', repeat=9))
# Copyright (c) 2015 Michele Volpato
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import random
random.seed(100)
import os, inspect, sys
# Include project dir in path
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)
import socket
import logging
import itertools
from tictactoe.tictacteacher import TicTacToeTeacher
from nd_tictacoracle import TicTacToeOracle
from tictactoe.tictacpurpose import TicTacToeInputPurpose, TicTacToeOutputPurpose
from learning.learning import LearningAlgorithm
#from testing.randomtesting import RandomTester
from tictactoe.completetesting import CompleteTicTacToeTester
from systems.implementations import SuspensionAutomaton
import helpers.bisimulation as bi
import csv
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
HOST = 'localhost'
PORT = 29000 # Arbitrary non-privileged port
outputExpert = TicTacToeOutputPurpose()
inputExpert = TicTacToeInputPurpose()
inputs = set(['0','1','2','3','4','5','6','7','8'])
# Use a placeholder for outputs
outputs = outputExpert.allOutputs()
#outputs = set(itertools.product('XO_', repeat=9))
quiescence = 'delta'
T1 = TicTacToeTeacher(HOST, PORT)
O1 = TicTacToeOracle(inputs, quiescence)
#tester = RandomTester(T1, 50000, 100)
tester = CompleteTicTacToeTester(T1)
currentdir = os.path.dirname(os.path.abspath(
inspect.getfile(inspect.currentframe())))
path = os.path.join(currentdir, "dotFiles")
print("Starting learning...")
#print(T1.oneOutput(('1')))
L = LearningAlgorithm(T1, O1, printPath=path, maxLoops=4,
tablePreciseness=100000, logger=logger, tester=tester, outputPurpose=outputExpert,
inputPurpose=inputExpert)
minus, plus = L.run()
print("Models learned.")
print("Number of inputs sent to the SUL: " + str(T1.getInputCounter()))
T1.close()
# Copyright (c) 2015 Michele Volpato
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import os, inspect, sys
# Include project dir in path
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
currentdir = os.path.dirname(currentdir)
currentdir = os.path.dirname(currentdir)
sys.path.append(currentdir)
from teachers.baseoracle import AbstractOracle
import random
import itertools
# SUL is nondeterministic.
class TicTacToeOracle(AbstractOracle):
def __init__(self, inputs, quiescence):
self._inputs = inputs.copy()
self._quiescence = quiescence
self._outputs = set(itertools.product('XO_', repeat=9))
# Reply to an observation query
# trace is a list of inputs and or outputs
# outputs is the set of outputs observed so far (after trace)
def observation(self, trace, outputs):
if len(trace) < 1:
if self._quiescence in outputs:
return True
else:
return False
# If trace ends in an output or quiescence, only quiescence is enabled:
if trace[-1] not in self._inputs:
if self._quiescence in outputs:
return True
else:
return False
# If trace ends in input, then collect last output, calculate possible
# outputs from there
if len(trace) < 2:
if len(outputs) == 8:
return True
else:
return False
lastOutput = trace[-2]
for i in range(2,len(trace)):
if trace[-i] in self._outputs:
lastOutput = trace[-i]
break
possiblePositions = lastOutput.count('_') + 1 # +1 because same output is possible
if len(outputs) == possiblePositions:
return True
else:
return False
# Tic Tac Toe
# python version of the javascript found at http://ostermiller.org/calc/tictactoe.html
# Copyright 2015 Michele Volpato - m.volpato@cs.ru.nl
#
# This program (Tic Tac Toe) is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# ---------------------------------------------------------------
#
# This program is derived from Tic-Tac-Toe at http://ostermiller.org/calc/tictactoe.html
# Copyright Stephen Ostermiller 2002-2014
import random
random.seed(1)
import socket
import sys
import logging
turn = -1
xWon = 0
oWon = 0
catsGame = 0
cells = []
# start with player 1 human and player 2 intermediate
p1_index = 0
p2_index = 2
# start with player 1
firstMove = 1
def makeCells():
# code cells as list of strings
# 7 | 8 | 9
# 4 | 5 | 6
# 1 | 2 | 3
global cells
cells = ['','','','','','','','','']
# loadStats not needed
def nextTurn():
global turn
turn = 0 - turn;
if (turn == 1):
if(p1_index == 1):
beginnerMove()
if(p1_index == 2):
intermediateMove()
if(p1_index == 3):
experiencedMove()
if(p1_index == 4):
perfectMove()
else:
if(p2_index == 1):
beginnerMove()
if(p2_index == 2):
intermediateMove()
if(p2_index == 3):
experiencedMove()
if(p2_index == 4):
perfectMove()
def getLegalMoves(state):
moves = 0;
for i in range(9):
if ((state & (1<<(i*2+1))) == 0):
moves |= 1 << i
return moves
def moveRandom(moves):
numMoves = 0;
for i in range(9):
if ((moves & (1<<i)) != 0):
numMoves += 1
if numMoves > 0:
# System is nondeterministic.
moveNum = random.sample(range(numMoves), 1)[0]
#moveNum = 1
numMoves = 0
for j in range(9):
if ((moves & (1<<j)) != 0):
numMoves += 1
if (numMoves == moveNum):
move(j)
return
def openingBook(state):
mask = state & int('0x2AAAA', 16)
if (mask == 0x00000):
return 0x1FF
if (mask == 0x00200):
return 0x145
if (mask == 0x00002 or mask == 0x00020 or mask == 0x02000 or mask == 0x20000):
return 0x010
if (mask == 0x00008):
return 0x095
if (mask == 0x00080):
return 0x071
if (mask == 0x00800):
return 0x11C
if (mask == 0x08000):
return 0x152
return 0
def perfectMove():
state = getState()
winner = detectWin(state)
if (winner == 0):
moves = getLegalMoves(state)
hope = -999;
goodMoves = openingBook(state)
if (goodMoves == 0):
for i in range(9):
if ((moves & (1<<i)) != 0):
value = moveValue(state, i, turn, turn, 15, 1);
if (value > hope):
hope = value
goodMoves = 0
if (hope == value):
goodMoves |= (1<<i)
moveRandom(goodMoves)
def moveValue(istate, move, moveFor, nextTurn, limit, depth):
state = stateMove(istate, move, nextTurn)
winner = detectWin(state)
if ((winner & 0x300000) == 0x300000):
return 0
elif (winner != 0):
if (moveFor == nextTurn):
return 10 - depth
else:
return depth - 10
hope = 999
if (moveFor != nextTurn):
hope = -999
if (depth == limit):
return hope
moves = getLegalMoves(state)
for i in range(9):
if ((moves & (1<<i)) != 0):
value = moveValue(state, i, moveFor, -nextTurn, 10-Math.abs(hope), depth+1)
if (Math.abs(value) != 999):
if (moveFor == nextTurn and value < hope):
hope = value
elif (moveFor != nextTurn and value > hope):
hope = value
return hope
def detectWinMove(state, cellNum, nextTurn):
value = 0x3
if (nextTurn == -1):
value = 0x2
newState = state | (value << cellNum*2)
return detectWin(newState)
def beginnerMove():
state = getState()
winner = detectWin(state)
if (winner == 0):
moveRandom(getLegalMoves(state))
def getGoodMove(state):
moves = getLegalMoves(state)
for i in range(9):
if ((moves & (1<<i)) != 0):
if (detectWinMove(state, i, turn)):
move(i)
return 0
for j in range(9):
if ((moves & (1<<j)) != 0):
if (detectWinMove(state, j, -turn)):
move(j)
return 0
return moves
def intermediateMove():
state = getState()
winner = detectWin(state)
if (winner == 0):
moveRandom(getGoodMove(state))
def experiencedMove():
state = getState()
winner = detectWin(state)
if (winner == 0):
moves = openingBook(state)
if (state == 0):
moves = 0x145
if (moves == 0):
moves = getGoodMove(state)
moveRandom(moves)
def getState():
state = 0
for i in range(9):
cell = cells[i]
value = 0
if (cell == 'X'):
value = 0x3
if (cell == 'O'):
value = 0x2
state |= value << (i*2)
return state;
def detectWin(state):
if ((state & 0x3F000) == 0x3F000):
return 0x13F000
if ((state & 0x3F000) == 0x2A000):
return 0x22A000
if ((state & 0x00FC0) == 0x00FC0):
return 0x100FC0
if ((state & 0x00FC0) == 0x00A80):
return 0x200A80
if ((state & 0x0003F) == 0x0003F):
return 0x10003F
if ((state & 0x0003F) == 0x0002A):
return 0x20002A
if ((state & 0x030C3) == 0x030C3):
return 0x1030C3
if ((state & 0x030C3) == 0x02082):
return 0x202082
if ((state & 0x0C30C) == 0x0C30C):
return 0x10C30C
if ((state & 0x0C30C) == 0x08208):
return 0x208208
if ((state & 0x30C30) == 0x30C30):
return 0x130C30
if ((state & 0x30C30) == 0x20820):
return 0x220820
if ((state & 0x03330) == 0x03330):
return 0x103330
if ((state & 0x03330) == 0x02220):
return 0x202220
if ((state & 0x30303) == 0x30303):
return 0x130303
if ((state & 0x30303) == 0x20202):
return 0x220202
if ((state & 0x2AAAA) == 0x2AAAA):
return 0x300000
return 0
def recordWin(winner):
if ((winner & 0x300000) == 0x100000):
xWon += 1
elif ((winner & 0x300000) == 0x200000):
oWon += 1
elif ((winner & 0x300000) == 0x300000):
catsGame += 1
#drawStats();
# drawStats is not needed
def clearStats():
xWon = 0
oWon = 0
catsGame = 0
# drawStats();
def drawState(state):
winner = detectWin(state)
global oWon
global xWon
global catsGame
global cells
if ((winner & 0x300000) != 0):
if ((winner & 0x300000) == 0x100000):
xWon += 1
elif ((winner & 0x300000) == 0x200000):
oWon += 1
else:
catsGame += 1
#drawStats()
for i in range(9):
value = ''
if ((state & (1<<(i*2+1))) != 0):
if ((state & (1<<(i*2))) != 0):
value = 'X'
else:
value = 'O'
cells[i] = value
def stateMove(state, move, nextTurn):
value = 0x3
if (nextTurn == -1):
value = 0x2
return (state | (value << (move*2)))
def move(cell):
if (cells[cell] == ''):
state = getState()
winner = detectWin(state)
if (winner == 0):
state = stateMove(state, cell, turn)
drawState(state)
nextTurn()
def countMoves(state):
count = 0
for i in range(9):
if ((state & (1<<(i*2+1))) != 0):
count += 1
return count;
def newGame():
state = getState()
winner = detectWin(state)
global turn
global oWon
global xWon
if (winner == 0 and countMoves(state) > 1):
if (turn == 1):
oWon += 1
else:
xWon += 1
#drawStats()
drawState(0)
if (firstMove == 1):
turn = -1
else:
turn = 1
nextTurn()
# getCookie is not needed
def nice_print():
for j in [0,3,6]:
line = "| "
for i in range(j,j+3):
cell = cells[i]
if cell == '':
cell = str(i)
line = line + cell + " | "
print(line)
def in_line_board():
ret = ''
for j in range(9):
char = cells[j]
if char == '':
char = '_'
ret = ret + char
return ret
if __name__ == "__main__":
makeCells()
newGame()
#nice_print()
# 0 to 8 for moves, 9 for reset
HOST = 'localhost'
PORT = 29000 # Arbitrary non-privileged port
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
logger.info('Socket created')
#Bind socket to local host and port
try:
s.bind((HOST, PORT))
except socket.error as msg:
logger.error('Bind failed. Error Code : ' + str(msg[0]) + ' Message ' + msg[1])
sys.exit()
logger.info('Socket bind complete')
#Start listening on socket
s.listen(1) # only 1 connection
logger.info('Socket now listening')
#wait to accept a connection - blocking call
conn, addr = s.accept()
logger.info("Connected with client")
conn.send(bytes('CONNECTED\n', 'UTF-8'))
while True:
#Receiving from client
data = conn.recv(1024)
move1 = data
if not isinstance(data,int):
if (not data or "EXIT" in str(data.decode("utf-8"))):
break
if isinstance(data.decode("utf-8")[0],int):
move1 = data.decode("utf-8")[0]
else:
move1 = int(data.decode("utf-8")[0])
logger.debug("Received: " + str(move1))
if move1 == 9:
newGame()
logger.debug("Resetting")
elif (move1 > 9 or move1 < 0):
pass
else:
move(move1)
#nice_print()
state = getState()
winner = detectWin(state)
# build an output
# _O_X_O_X_ stands for
# | _ | O | _ |
# | X | _ | O |
# | _ | X | _ |
board = in_line_board()
if winner != 0:
logger.debug("We have a winner!")
#board = board + "END"
conn.sendall(bytes(board+"\n", 'UTF-8'))
newGame()
else:
logger.debug("Sending: " + board)
conn.sendall(bytes(board+"\n", 'UTF-8'))
logger.debug("Sending: EXIT")
conn.sendall(bytes("EXIT", 'UTF-8'))
# out of loop
conn.close()
s.close()