/* * Implementation enables to describe the tree with BNF text and builds random trees according to it. * Resulting trees can be either saved to file (for evaluation) or evolved (to be done). */ object prog_gen extends App { Generator.generate(System.out) } object Generator { val cfactor = 20 // convergence factor val MEM_SIZE = 2 val productions = List( // Below, | creates alternatives using enum type // Every alternative is a sequence type // Sequence or alternative type is created for every non-terminal type // EOL, INDENT, INDENT++, RANGE and * symbols have obvious, built-in meaning // Otherwise, constant type is used for terminals. // First symbol (upper left corner) is the start one "PROGRAM -> HEAD void autogenerated() BODY TAIL", "HEAD -> #include EOL char mem[ MEM_SIZE ]; ", "BODY -> { INDENT++ EOL STATEMENT_LN* INDENT-- INDENT }", "STATEMENT_LN -> INDENT STATEMENT ; EOL", "STATEMENT -> ; | MEM = INT | while ( BOOL ) BODY | if ( BOOL ) BODY else BODY", "INT -> RANGE_-128_127 | INT + INT | MEM", "MEM -> mem[ RANGE_0_MEM_SIZE ]", "BOOL -> 0 | 1 | BOOL && BOOL | !( BOOL ) | INT == INT | INT < INT", "TAIL -> int main() { autogenerated() ; int i; for(i = 0; i < 2; i++) printf(\"%d \", mem[i]); }" //"E -> Int | Int + " // fails ) // Parse into LHS => List[List[String]], where RHS is list of alternaitves // and every alternative is a seq of tokens . map {production => val rhsStart = production.indexOf("->") val lhs = production.substring(0, rhsStart).trim val rhs = production.replaceAll("MEM_SIZE", MEM_SIZE+"").substring(rhsStart+2).split('|') (lhs, rhs.toList map (_.trim.split("\\s+").toList map {_.trim}) ) } val syntax = productions.toMap syntax foreach {case (x,y) => println(x + " -> " + y)} val instantiatedTypes = scala.collection.mutable.Map[Any, Type]() abstract class Type(val symKey: Any) { me => println("instantiated " + this + " for " + symKey) // keep report here to see that no classes are created after initialization, during value generation, which is possible due to lazy call-by-value instantiatedTypes.update(symKey, this) def rnd(path: Weights): Value// call this method } new Const("EOL") { override def format(io: IO) = io.write("\n") } new Const("INDENT") { override def format(io: IO) = io.write(io.indent) } new Const("INDENT++") { override def format(io: IO) = io.indent += " " } new Const("INDENT--") { override def format(io: IO) = io.indent = io.indent.drop(1)} val startSymbolSyntax = productions(0) match {case (lhs, rhs) => parseEnum(lhs, rhs)} println("Start sym = " + startSymbolSyntax) def generate(out: java.io.OutputStream) = { val io = IO("", new java.io.PrintWriter(out, true)) startSymbolSyntax.rnd(Map.empty).write(io) io.out.println() } case class IO(var indent: String = "", val out: java.io.PrintWriter = new java.io.PrintWriter(System.out, true)) { def write(s: String) = out.write(s) def writeAll(seq: Any*) { seq foreach { _ match { case v:Value => v.write(this) case subseq: Seq[Any] => writeAll(subseq:_*) case o => write(o.toString) }}} } abstract class Value(val t: Type) { def write(io: IO) override def toString = "value of " + t.symKey } type Weights = Map[Type, Int] // during value generation, weights limit the growth with convergence factor def rand(rangeLen: Int, from: Int = 0) = (Math.random * rangeLen).toInt + from def lazyCreate(key: Any, ctor: => Type): Type = instantiatedTypes.getOrElse(key, ctor) def parseSymSequence(sequence: Seq[String]): Type = { //println("parseSymSequence " + sequence) val children = sequence.map {sym => def ifdoesnotexist = if (sym.startsWith("RANGE_")) { val range = sym.split('_'); new Range(sym, range(1).toInt, range(2).toInt) } else if (sym.endsWith("*")) { val subSym: String = sym.dropRight(1) val subExpr = parseSymSequence(List(subSym)) // this can either be a const or enum new VarLenExpr(sym, 3, 5, subExpr) } else syntax.get(sym) match { case Some(rhs) => parseEnum(sym, rhs) case None => new Const(sym) } lazyCreate(sym, ifdoesnotexist) } if (children.length == 1) children(0) else { lazyCreate(children, new FixedLenSeq(children)) } } def parseEnum(lhs: String, rhs: Seq[Seq[String]]): Type = { println("parseEnum " + lhs) if (rhs.length == 1) println("info: production " + lhs + " produced a single-option alternative. This makes no sense.") lazyCreate(lhs, new Enum(lhs, rhs map {seq => parseSymSequence(seq)})) } class Const(val s: String) extends Type(s) { def format(io: IO) = io.write(s + " ") def rnd(weights: Weights) = new Value(this) { def write(io: IO) = format(io) } override def toString = s } case class Range(sym: String, val min: Int, val max: Int) extends Type(sym) { // class Val extends Value(this) -- it might be better to declare new Value this way // because anonymous class refers the weights in the context of rnd, which is unnecessary // But overhead is small since we won't generate huge programs. So, let's leave it here. def rnd(weights: Weights) = new Value(this) { val child = rand(max-min, min) def write(io: IO) = io.write("%+d".format(child) + " ") } } //Initially, I have determined that I need to defer the argument with => // to "tie the knot" when modelled syntax describing classes in scala // `A => a A`. Yet, later, I had to have remove it later here // because of the bug https://issues.scala-lang.org/browse/SI-9223 class FixedLenSeq(syntax: Seq[Type]) extends Type(syntax) { def rnd(weights: Weights) = new Value(this) { val children = syntax map {_.rnd(weights)} def write(io: IO) = io.writeAll(children) } } class VarLenExpr(sym: String, min: Int, max: Int, elemSyntax: => Type) extends Type(sym) { def rnd(weights: Weights) = { val list = List.fill(rand(max-min, min))(elemSyntax) new Value (this) { val ref = lazyCreate(list, new FixedLenSeq(list)).rnd(weights) def write(io: IO) = ref.write(io) } } } // Call-by-name to prevent stackoverflow. Fortunately it does not happen if I // call-by-value in the FixedLenSeq. // I do not know why we need call-by-name in one case but not the other and // why there is a bug there. class Enum(lhs: String, alternatives: => Seq[Type]) extends Type(lhs) { def rnd(weights: Weights) = { // val picked = iRnd(alternatives.length) // simply random val roulette = alternatives.map {weights get _ match { case Some(count) => Math.pow(cfactor, -count) case None => 1 }}.toArray val ball = Math.random * roulette.sum def drop(n: Int, sum: Double): Int = if (sum > 0) drop(n+1, sum-roulette(n)) else n-1 val pickedType = alternatives(drop(0, ball)) val uw = weights updated (pickedType, weights.getOrElse(pickedType, 0) + 1) val child = pickedType.rnd(uw) new Value(this) {def write(io: IO) = child.write(io)} } } def println(a: Any*) = System.err.println(a) } Example output is #include char mem[ 2 ]; void autogenerated() { ; ; if ( mem[ +1 ] == +79 ) { ; ; while ( 0 ) { mem[ +0 ] = mem[ +0 ] ; ; ; mem[ +1 ] = -31 + mem[ +0 ] ; ; ; } ; mem[ +1 ] = mem[ +0 ] ; } else { mem[ +1 ] = mem[ +1 ] + -90 ; while ( +38 == -122 ) { mem[ +0 ] = -76 + +34 ; ; ; mem[ +0 ] = +34 + +87 ; ; ; } ; ; ; } ; ; ; } int main() { autogenerated() ; int i; for(i = 0; i < 2; i++) printf("%d ", mem[i]); }