ref: 120f6de605b9a431e3b084bfc34d7cf33b6c9905
parent: ed82ef5c0e74de5ef743e61aebad59b2acea7a49
author: Simon Tatham <[email protected]>
date: Thu Apr 11 08:51:06 EDT 2013
Introduce some extra testing and benchmarking command-line options to the GTK front end, plus a 'make test' target in the GTK makefile which uses them to automatically generate 100 puzzles for each game at each preset configuration, test-run them back through the solver without their aux_info to ensure that can cope, and produce an HTML box plot of game generation times for each preset. As part of this work I've removed the TESTSOLVE mechanism from r9549, since the new --test-solve option does the same thing better (in that when something goes wrong it prints the random seed that caused the problem). [originally from svn r9825] [r9549 == 5a095b8a08fa9f087b93c86aea0fa027138b028d]
--- a/Recipe
+++ b/Recipe
@@ -205,3 +205,19 @@
echo '<applet archive="'$@'" code="PuzzleApplet" width="700" height="500"></applet>' >$*.html
mv PuzzleEngine.class $<
!end
+
+# A benchmarking and testing target for the GTK puzzles.
+!begin gtk
+test: benchmark.html benchmark.txt
+
+benchmark.html: benchmark.txt benchmark.pl
+ ./benchmark.pl benchmark.txt > $@
+
+benchmark.txt: $(GAMES)
+ for i in $(GAMES); do \
+ for params in $$(env -i ./$(BINPREFIX)$$i --list-presets | cut -f1 -d' '); do \
+ env -i ./$(BINPREFIX)$$i --test-solve --time-generation --generate 100 $$params \
+ || exit 1; \
+ done; \
+ done > $@
+!end
--- /dev/null
+++ b/benchmark.pl
@@ -1,0 +1,153 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+my @presets = ();
+my %presets = ();
+my $maxval = 0;
+
+while (<>) {
+ chomp;
+ if (/^(.*)(#.*): ([\d\.]+)$/) {
+ push @presets, $1 unless defined $presets{$1};
+ push @{$presets{$1}}, $3;
+ $maxval = $3 if $maxval < $3;
+ }
+}
+
+print <<EOF;
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ASCII" />
+<title>Puzzle generation-time benchmarks</title>
+<script type="text/javascript">
+//<![CDATA[
+function choose_scale_ticks(scale) {
+ var nscale = 1, j = 0, factors = [2,2.5,2];
+ while (scale / nscale > 20) {
+ nscale *= factors[j];
+ j = (j+1) % factors.length;
+ }
+ return nscale;
+}
+function initPlots() {
+ var canvases = document.getElementsByTagName('canvas');
+ for (var i = 0; i < canvases.length; i++) {
+ var canvas = canvases[i];
+ var scale = eval(canvas.getAttribute("data-scale"));
+ var add = 20.5, mult = (canvas.width - 2*add) / scale;
+ var data = eval(canvas.getAttribute("data-points"));
+ var ctx = canvas.getContext('2d');
+ ctx.lineWidth = '1px';
+ ctx.lineCap = 'round';
+ ctx.lineJoin = 'round';
+ ctx.strokeStyle = ctx.fillStyle = '#000000';
+ if (data === "scale") {
+ // Draw scale.
+ ctx.font = "16px sans-serif";
+ ctx.textAlign = "center";
+ ctx.textBaseline = "alphabetic";
+ var nscale = choose_scale_ticks(scale);
+ for (var x = 0; x <= scale; x += nscale) {
+ ctx.beginPath();
+ ctx.moveTo(add+mult*x, canvas.height);
+ ctx.lineTo(add+mult*x, canvas.height - 3);
+ ctx.stroke();
+ ctx.fillText(x + "s", add+mult*x, canvas.height - 6);
+ }
+ } else {
+ // Draw a box plot.
+ function quantile(x) {
+ var n = (data.length * x) | 0;
+ return (data[n-1] + data[n]) / 2;
+ }
+
+ var q1 = quantile(0.25), q2 = quantile(0.5), q3 = quantile(0.75);
+ var iqr = q3 - q1;
+ var top = 0.5, bot = canvas.height - 1.5, mid = (top+bot)/2;
+ var wlo = null, whi = null; // whisker ends
+
+ ctx.strokeStyle = '#bbbbbb';
+ var nscale = choose_scale_ticks(scale);
+ for (var x = 0; x <= scale; x += nscale) {
+ ctx.beginPath();
+ ctx.moveTo(add+mult*x, 0);
+ ctx.lineTo(add+mult*x, canvas.height);
+ ctx.stroke();
+ }
+ ctx.strokeStyle = '#000000';
+
+ for (var j in data) {
+ var x = data[j];
+ if (x >= q1 - 1.5 * iqr && x <= q3 + 1.5 * iqr) {
+ if (wlo === null || wlo > x)
+ wlo = x;
+ if (whi === null || whi < x)
+ whi = x;
+ } else {
+ ctx.beginPath();
+ ctx.arc(add+mult*x, mid, 2, 0, 2*Math.PI);
+ ctx.stroke();
+ if (x >= q1 - 3 * iqr && x <= q3 + 3 * iqr)
+ ctx.fill();
+ }
+ }
+
+ ctx.beginPath();
+
+ // Box
+ ctx.moveTo(add+mult*q1, top);
+ ctx.lineTo(add+mult*q3, top);
+ ctx.lineTo(add+mult*q3, bot);
+ ctx.lineTo(add+mult*q1, bot);
+ ctx.closePath();
+
+ // Line at median
+ ctx.moveTo(add+mult*q2, top);
+ ctx.lineTo(add+mult*q2, bot);
+
+ // Lower whisker
+ ctx.moveTo(add+mult*q1, mid);
+ ctx.lineTo(add+mult*wlo, mid);
+ ctx.moveTo(add+mult*wlo, top);
+ ctx.lineTo(add+mult*wlo, bot);
+
+ // Upper whisker
+ ctx.moveTo(add+mult*q3, mid);
+ ctx.lineTo(add+mult*whi, mid);
+ ctx.moveTo(add+mult*whi, top);
+ ctx.lineTo(add+mult*whi, bot);
+
+ ctx.stroke();
+ }
+ }
+}
+//]]>
+</script>
+</head>
+<body onLoad="initPlots();">
+<h1 align=center>Puzzle generation-time benchmarks</h1>
+<table>
+<tr><th>Preset</th><td><canvas width=700 height=30 data-points='"scale"' data-scale="$maxval"></td></tr>
+EOF
+
+for my $preset (@presets) {
+ print "<tr><td>", &escape($preset), "</td><td><canvas width=700 height=15 data-points=\"[";
+ print join ",", sort { $a <=> $b } @{$presets{$preset}};
+ print "]\" data-scale=\"$maxval\"></td></tr>\n";
+}
+
+print <<EOF;
+</body>
+</html>
+EOF
+
+sub escape {
+ my ($text) = @_;
+ $text =~ s/&/&/g;
+ $text =~ s/</</g;
+ $text =~ s/>/>/g;
+ return $text;
+}
--- a/gtk.c
+++ b/gtk.c
@@ -12,6 +12,7 @@
#include <math.h>
#include <sys/time.h>
+#include <sys/resource.h>
#include <gtk/gtk.h>
#include <gdk/gdkkeysyms.h>
@@ -2482,6 +2483,7 @@
char *pname = argv[0];
char *error;
int ngenerate = 0, print = FALSE, px = 1, py = 1;
+ int time_generation = FALSE, test_solve = FALSE, list_presets = FALSE;
int soln = FALSE, colour = FALSE;
float scale = 1.0F;
float redo_proportion = 0.0F;
@@ -2534,6 +2536,12 @@
}
} else
ngenerate = 1;
+ } else if (doing_opts && !strcmp(p, "--time-generation")) {
+ time_generation = TRUE;
+ } else if (doing_opts && !strcmp(p, "--test-solve")) {
+ test_solve = TRUE;
+ } else if (doing_opts && !strcmp(p, "--list-presets")) {
+ list_presets = TRUE;
} else if (doing_opts && !strcmp(p, "--save")) {
if (--ac > 0) {
savefile = *++av;
@@ -2722,7 +2730,8 @@
* generated descriptive game IDs.)
*/
while (ngenerate == 0 || i < n) {
- char *pstr, *err;
+ char *pstr, *err, *seed;
+ struct rusage before, after;
if (ngenerate == 0) {
pstr = fgetline(stdin);
@@ -2748,10 +2757,64 @@
return 1;
}
}
- sfree(pstr);
- midend_new_game(me);
+ if (time_generation)
+ getrusage(RUSAGE_SELF, &before);
+ midend_new_game(me);
+
+ seed = midend_get_random_seed(me);
+
+ if (time_generation) {
+ double elapsed;
+
+ getrusage(RUSAGE_SELF, &after);
+
+ elapsed = (after.ru_utime.tv_sec -
+ before.ru_utime.tv_sec);
+ elapsed += (after.ru_utime.tv_usec -
+ before.ru_utime.tv_usec) / 1000000.0;
+
+ printf("%s %s: %.6f\n", thegame.name, seed, elapsed);
+ }
+
+ if (test_solve && thegame.can_solve) {
+ /*
+ * Now destroy the aux_info in the midend, by means of
+ * re-entering the same game id, and then try to solve
+ * it.
+ */
+ char *game_id, *err;
+
+ game_id = midend_get_game_id(me);
+ err = midend_game_id(me, game_id);
+ if (err) {
+ fprintf(stderr, "%s %s: game id re-entry error: %s\n",
+ thegame.name, seed, err);
+ return 1;
+ }
+ midend_new_game(me);
+ sfree(game_id);
+
+ err = midend_solve(me);
+ /*
+ * If the solve operation returned the error "Solution
+ * not known for this puzzle", that's OK, because that
+ * just means it's a puzzle for which we don't have an
+ * algorithmic solver and hence can't solve it without
+ * the aux_info, e.g. Netslide. Any other error is a
+ * problem, though.
+ */
+ if (err && strcmp(err, "Solution not known for this puzzle")) {
+ fprintf(stderr, "%s %s: solve error: %s\n",
+ thegame.name, seed, err);
+ return 1;
+ }
+ }
+
+ sfree(pstr);
+ sfree(seed);
+
if (doc) {
err = midend_print_puzzle(me, doc, soln);
if (err) {
@@ -2794,7 +2857,7 @@
}
sfree(realname);
}
- if (!doc && !savefile) {
+ if (!doc && !savefile && !time_generation) {
id = midend_get_game_id(me);
puts(id);
sfree(id);
@@ -2813,6 +2876,30 @@
midend_free(me);
return 0;
+ } else if (list_presets) {
+ /*
+ * Another specialist mode which causes the puzzle to list the
+ * game_params strings for all its preset configurations.
+ */
+ int i, npresets;
+ midend *me;
+
+ me = midend_new(NULL, &thegame, NULL, NULL);
+ npresets = midend_num_presets(me);
+
+ for (i = 0; i < npresets; i++) {
+ game_params *params;
+ char *name, *paramstr;
+
+ midend_fetch_preset(me, i, &name, ¶ms);
+ paramstr = thegame.encode_params(params, TRUE);
+
+ printf("%s %s\n", paramstr, name);
+ sfree(paramstr);
+ }
+
+ midend_free(me);
+ return 0;
} else {
frontend *fe;
--- a/midend.c
+++ b/midend.c
@@ -445,55 +445,6 @@
sfree(movestr);
}
- /*
- * Soak test, enabled by setting <gamename>_TESTSOLVE in the
- * environment. This causes an immediate attempt to re-solve the
- * game without benefit of aux_info. The effect is that (at least
- * on Unix) you can run 'FOO_TESTSOLVE=1 foo --generate 10000
- * <params>#12345' and it will generate a lot of game ids and
- * instantly pass each one back to the solver.
- *
- * (It's worth putting in an explicit seed in any such test, so
- * you can repeat it to diagnose a problem if one comes up!)
- */
- {
- char buf[80];
- int j, k;
- static int doing_test_solve = -1;
- if (doing_test_solve < 0) {
- sprintf(buf, "%s_TESTSOLVE", me->ourgame->name);
- for (j = k = 0; buf[j]; j++)
- if (!isspace((unsigned char)buf[j]))
- buf[k++] = toupper((unsigned char)buf[j]);
- buf[k] = '\0';
- if (getenv(buf)) {
- /*
- * Since this is used for correctness testing, it's
- * helpful to have a visual acknowledgment that the
- * user hasn't mistyped the environment variable name.
- */
- fprintf(stderr, "Running solver soak tests\n");
- doing_test_solve = TRUE;
- } else {
- doing_test_solve = FALSE;
- }
- }
- if (doing_test_solve) {
- game_state *s;
- char *msg, *movestr;
-
- msg = NULL;
- movestr = me->ourgame->solve(me->states[0].state,
- me->states[0].state,
- NULL, &msg);
- assert(movestr && !msg);
- s = me->ourgame->execute_move(me->states[0].state, movestr);
- assert(s);
- me->ourgame->free_game(s);
- sfree(movestr);
- }
- }
-
me->states[me->nstates].movestr = NULL;
me->states[me->nstates].movetype = NEWGAME;
me->nstates++;