As mentioned in my previous post, I’ve been reading “Collective Intelligence” by Toby Segaram and I’m really enjoying in it. It’s different to a lot of programming books, in that rather than focusing a specific language or API it focus on a particular set of problems and shows techniques that can be used to crack them.
For example I’ve known for a long time that it’s easy to create an abstract syntax tree (AST) in F#:
/// Untyped expression tree
type Expression =
| Multiply of Expression * Expression
| Add of Expression * Expression
| Subtract of Expression * Expression
| GreaterThan of Expression * Expression
| If of Expression * Expression * Expression
| Constant of int
| Parameter of int
Here we present a simple AST for representing numeric expressions, while it’s easy to provide a function to evaluate such a tree:
/// Given a list of parameters evaluate the tree
let evaluateExpression parameters =
let rec innerEval tree =
match tree with
| Multiply (x, y) -> innerEval x * innerEval y
| Add (x, y) -> innerEval x + innerEval y
| Subtract (x, y) -> innerEval x - innerEval y
| GreaterThan (x, y) -> if innerEval x > innerEval y then 0 else 1
| If (pred, tval, fval) -> if innerEval pred = 0 then innerEval fval else innerEval tval
| Constant value -> value
| Parameter pos -> List.nth parameters pos
innerEval
I’ve often had problems coming up with nice simple examples of what you might want to do with such an expression. Well “Collective Intelligence” gave me a great idea of what you can do with such an expression: genetic programming. The example we’re going to look at is presented in chapter 11, pages 251 to 268. The idea is straight forward if you can abstractly represent a program as we can with our numeric expression AST then it’s possible to make random changes to our program. Wants we have made the random changes we can measure which programs are successful and use the successful ones to create a new generation of programs. So what kind of problems can we solve with this approach? Well here’s a nice one, suppose we have a set of points:
X Y Result
36 38 1485
17 13 371
13 2 217
24 31 715
13 0 213
38 3 1569
10 11 157
11 32 223
18 25 433
And we want to find out what function fits best to these points, we can use genetic programming to solve this problem. I’ve given a program below that can solve this problem though genetic programming. I don’t want to go into too many details, you should really buy the book if you want a good explanation, but in a nutshell the algorithm works in the following way:
- We create a bunch of random function
- We test how close these get to the predefined points
- If we find a function that fits we return it
- Otherwise we breed the most successful function and loop to the testing step
Anyway the full implementation is here, and version can be downloaded here [EDIT: link fixed]:
#light
open System
open Microsoft.FSharp.Math
/// Untyped expression tree
type Expression =
| Multiply of Expression * Expression
| Add of Expression * Expression
| Subtract of Expression * Expression
| GreaterThan of Expression * Expression
| If of Expression * Expression * Expression
| Constant of int
| Parameter of int
/// Given a list of parameters evaluate the tree
let evaluateExpression parameters =
let rec innerEval tree =
match tree with
| Multiply (x, y -> innerEval x * innerEval y
| Add (x, y -> innerEval x + innerEval y
| Subtract (x, y -> innerEval x - innerEval y
| GreaterThan (x, y -> if innerEval x > innerEval y then 0 else 1
| If (pred, tval, fval -> if innerEval pred = 0 then innerEval fval else innerEval tval
| Constant value -> value
| Parameter pos -> List.nth parameters pos
innerEval
let simplifyExpression =
let rec innerEval tree =
match tree with
| Multiply (Constant 0, _ -> Constant (0
| Multiply (_, Constant 0 -> Constant (0
| Multiply (Constant x, Constant y -> Constant (x * y
| Add (Constant x, Constant y -> Constant (x + y
| Subtract (Constant x, Constant y -> Constant (x - y
| GreaterThan (Constant x, Constant y -> if x > y then Constant 0 else Constant 1
| If (Constant pred, tval, fval -> if pred = 0 then fval else tval
| x -> x
let rec loop tree =
let tree' = innerEval tree
if tree' = tree then
tree
else
loop tree
loop
/// print the expression to the console
let printExpression =
let rec innerPrint ppf tree =
match tree with
| Multiply (x, y -> Printf.fprintf ppf "(%a * %a" innerPrint x innerPrint y
| Add (x, y -> Printf.fprintf ppf "(%a + %a" innerPrint x innerPrint y
| Subtract (x, y -> Printf.fprintf ppf "(%a - %a" innerPrint x innerPrint y
| GreaterThan (x, y -> Printf.fprintf ppf "(%a > %a" innerPrint x innerPrint y
| If (pred, tval, fval -> Printf.fprintf ppf "(if %a then %a else %a" innerPrint pred innerPrint fval innerPrint tval
| Constant value -> Printf.fprintf ppf "%i" value
| Parameter pos -> Printf.fprintf ppf "p%i" pos
innerPrint System.Console.Out
let rand = new Random(
/// build a random expression with limited depth, a maximum constants value,
/// and a limited number of parameters
let buildRandomExpression maxDepth maxConst noParams =
let rec innerBuild curDepth =
if curDepth < maxDepth then
let nextDepth = curDepth + 1
match rand.Next(7 with
| 0 -> Multiply (innerBuild nextDepth, innerBuild nextDepth
| 1 -> Add (innerBuild nextDepth, innerBuild nextDepth
| 2 -> Subtract (innerBuild nextDepth, innerBuild nextDepth
| 3 -> GreaterThan (innerBuild nextDepth, innerBuild nextDepth
| 4 -> If (innerBuild nextDepth, innerBuild nextDepth, innerBuild nextDepth
| 5 -> Constant (rand.Next(maxConst
| 6 -> Parameter (rand.Next(noParams
| _ -> failwith "assert false"
else
match rand.Next(2 with
| 0 -> Constant (rand.Next(maxConst
| 1 -> Parameter (rand.Next(noParams
| _ -> failwith "assert false"
innerBuild 0
/// make a change to an existing tree by replace a node
/// with a randomly generated tree
let mutateExpression maxConst maxParam rate =
let rec innerMutate currDepth tree =
let mutate node =
let newNode =
if rand.NextDouble( < rate then
buildRandomExpression maxConst maxParam (currDepth + 1
else node
innerMutate (currDepth + 1 node
match tree with
| Multiply (x, y -> Multiply (mutate x, mutate y
| Add (x, y -> Add(mutate x, mutate y
| Subtract (x, y -> Subtract (mutate x, mutate y
| GreaterThan (x, y -> GreaterThan (mutate x, mutate y
| If (pred, tval, fval -> If (mutate pred, mutate fval, mutate tval
| Constant value -> Constant( value
| Parameter pos -> Parameter ( pos
innerMutate 0
let (|Binary|Nullary| = function
| Add(x,y -> Binaryfun(x,y -> Add(x,y,x,y
| Subtract(x,y -> Binaryfun(x,y -> Subtract(x,y,x,y
| Multiply(x,y -> Binaryfun(x,y -> Multiply(x,y,x,y
| GreaterThan(x,y -> Binary(fun (x,y -> GreaterThan(x,y,x,y
| If(pred,tval,fval -> Binaryfun (x,y -> If (pred,x,y,tval,fval
| x -> Nullary(x
type HoleTree =
| LeftHole of (Expression * Expression -> Expression * HoleTree * Expression
| RightHole of (Expression * Expression -> Expression * Expression * HoleTree
| Hole
let rec plug = function
| LeftHole(con,h,r,t -> con(plug(h,t, r
| RightHole(con,l,h,t -> con(l, plug(h,t
| Hole,t -> t
let rec descendTree top p = function
| Nullary(x -> Hole, x
| t when not top && rand.NextDouble( < p -> Hole, t
| Binary(con,l,r ->
if rand.NextDouble( < 0.5 then
let h,t = descendTree false p l
LeftHole(con,h,r,t
else
let h,t = descendTree false p r
RightHole(con,l,h,t
let crossOverExpressions p t1 t2 =
let h,_ = descendTree true p t1
let _,t = descendTree true p t2
plug(h,t
let evolve scoreFunction mutRate crossRate breedChance pop maxGen maxDepth maxConst noParams =
let initPop = List.init pop (fun _ -> buildRandomExpression maxDepth maxConst noParams
// the inner loop which will handle each generation
let rec innerGenEvolve currPop currGen =
// calculate score sort list to find the winner
let res =
[ for expr in currPop ->
scoreFunction expr, expr ]
let res = List.sort (fun (score1,_ (score2,_ -> compare score1 score2 res
let score,winner = List.hd res
// print the winner ... just for info
printfn "\nGen:%i score:%A" currGen score
printExpression winner
// if we've found winner or reached the maxium gens return
if score = 0I || currGen = maxGen then
winner
else
// get rid of scores, no longer needed
let res = List.map snd res
// always keep winner and second
let winner, second =
match res with
| winner :: second :: _ -> winner, second
| _ -> failwith "assert false"
let newpop = winner :: second :: []
// select an expression probably towards to top of the list
let selectExpr( = List.nth res (min (List.length res - 1 (int(log(rand.NextDouble( / log(breedChance
// loop to calculate the new population
let rec addExpress acc=
if List.length acc = pop then
acc
else
// cross two expressions then mutate
let crossExpress = (crossOverExpressions crossRate (selectExpr( (selectExpr(
let newExp = mutateExpression maxConst noParams mutRate crossExpress
addExpress (newExp :: acc
let newpop = addExpress newpop
// loop recursively
innerGenEvolve newpop (currGen + 1
// start the loop
innerGenEvolve initPop 0
// define a secret funtion we're trying to find
let secertFunction x y = (x * x + (2 * y + (3 * x + 5
// calculate some data from the secret function
let data = [ for x in [0 .. 200] ->
let x = rand.Next(40
let y = rand.Next(40
(x,y, secertFunction x y ]
// evaluate the an expression to see how close to the secret function it is
let scoreFunction expr =
let results =
[ for (x,y,res in data ->
res - evaluateExpression [x;y] expr ]
results |> List.fold_left (fun acc x -> BigInt.Abs (BigInt x + acc 0I
// call the evolve function
evolve scoreFunction 0.2 0.7 0.91 800 100 6 10 2
Happy hacking, and a big thanks to “Keith” from the cs.hubsf.net who helped me with the tree “crossover” function.