Return to Answer

updated for new rules, optimized

edited Jun 2, 2015 at 7:30

4.9k
1
14
19

Initial versionAfter a round of optimizations, and adapted for the modified rules:

extern int atoi(const char*); extern int puts(const char*); typedef unsigned long uint64_t;u64; static int W, H, W1;S; static u64 RM, DM, NSol; static char Out[64 * 2 + 1]; static void place(uint64_tu64 cMaskcM, u64 vM, int n) { if (n) { intu64 posm = 1ul << __builtin_ctzl(~cMaskcM);  uint64_t maskcM -= 1ulm;  << pos;  if (m cMask& |=RM) mask; { int y u64 nM = posm /<< W;1; int x =if pos(cM -& ynM) *place(cM W; - nM, vM, n uint64_t- neighMask1);  = mask << 1;}  if ((x < W - 1) && !(cMaskm & neighMask)DM) { Out[W1 * y +u64 x]nM = '-';  Out[W1m *<< yW;  + x + 1] = '-'; vM |= m; vM |= nM; place(cMaskcM |- neighMasknM, vM, n - 1); }  } else if (S) { neighMask++NSol;  = mask} <<else W;{ ifchar* ((yp <= HOut;  - 1) && !for (cMaskint &y neighMask)= 0; y < H; ++y) { Out[W1 * yfor +(int x]x = '|'; 0; x < W; ++x) { Out[W1*p++ *= (yvM +& 1) +? x]'|' =: '|'; '-'; vM >>= 1; }  place(cMask | neighMask, n -*p++ 1);= '\n'; } } else {*p++ = '\0'; puts(Out);   ++NSol; } } int main(int argc, char* argv[]) { W = atoi(argv[1]);  H = atoi(argv[2]);  W1S = W(argc +> 1;3); int n = W * H; if (n & 1) { return 0;  }   n >>= 1; for (int y = 0; y < H; ++y) { Out[W1RM *<<= yW; +RM W]|= =(1ul '\n';<< (W - 1)) - 1; } Out[W1DM = (1ul << (W * H](H =- '\0';1))) - 1; place(-1, 0, n >> 1); printf("%lu\n", NSol); return 0; }

This is not much optimized yet. But optimization ofI started bumping into the actual algorithm code is largely moot because mostlength limit of the time is used for output anyway1024 characters, and compilation time is already similar to execution time.so I had a version that only countedto reduce the solutions instead of printing themreadability somewhat. Much shorter variable names, and it took about 8 milliseconds for the 8x6 size on my modest laptopetc.

Build/test instructions:

> gcc -O0O2 Code.c

Run with solution output enabled:

> ./a.out 8 68 >/dev/null

My development was on a Mac using clang as the compiler. But I hopeRun with only solution count:

> ./a.out 8 8 s

Some comments:

With the larger test example, I do want optimization now. While my system is different (Mac), around -O2 seems to be good.

The code has gotten slower for the case where output is generated. This was a conscious sacrifice for optimizing the "count only" mode, and for reducing the code length.

There will be a few compiler warnings because of missing includes and external declarations for system functions. It was the easiest way to get me to below 1024 characters in the end without making the code totally unreadable.

Also note that gcc will behave similarlythe code still generates the actual solutions, even in "count only" mode. The execution timeWhenever a solution is reduced when compiling with e.g. -O3found, but the increase in compile time more than compensated for it in my tests. Ideally it could be tried with -O0vM tobitmask contains a -O31 on the benchmark system to determine which results in the lowest value for the sum.

I hope you'll also test for correctness, because I don'tpositions that have much to validate it beyond thea vertical bar, and a 4x20 casefor positions with a horizontal bar. Only the conversion of this bitmask into ASCII format, and the actual output, is skipped.

Initial version:

extern int atoi(const char*); extern int puts(const char*); typedef unsigned long uint64_t; static int W, H, W1; static char Out[64 * 2 + 1]; static void place(uint64_t cMask, int n) { if (n) { int pos = __builtin_ctzl(~cMask);  uint64_t mask = 1ul << pos;  cMask |= mask;  int y = pos / W; int x = pos - y * W;  uint64_t neighMask = mask << 1; if ((x < W - 1) && !(cMask & neighMask)) { Out[W1 * y + x] = '-';  Out[W1 * y + x + 1] = '-';  place(cMask | neighMask, n - 1); } neighMask = mask << W; if ((y < H - 1) && !(cMask & neighMask)) { Out[W1 * y + x] = '|';  Out[W1 * (y + 1) + x] = '|';  place(cMask | neighMask, n - 1); } } else { puts(Out); } } int main(int argc, char* argv[]) { W = atoi(argv[1]);  H = atoi(argv[2]);  W1 = W + 1; int n = W * H; if (n & 1) { return 0;  }   n >>= 1; for (int y = 0; y < H; ++y) { Out[W1 * y + W] = '\n'; } Out[W1 * H] = '\0'; place(0, n); return 0; }

This is not much optimized yet. But optimization of the actual algorithm code is largely moot because most of the time is used for output anyway, and compilation time is already similar to execution time. I had a version that only counted the solutions instead of printing them, and it took about 8 milliseconds for the 8x6 size on my modest laptop.

Build/test instructions:

> gcc -O0 Code.c > ./a.out 8 6 >/dev/null

My development was on a Mac using clang as the compiler. But I hope that gcc will behave similarly. The execution time is reduced when compiling with e.g. -O3, but the increase in compile time more than compensated for it in my tests. Ideally it could be tried with -O0 to -O3 on the benchmark system to determine which results in the lowest value for the sum.

I hope you'll also test for correctness, because I don't have much to validate it beyond the 4x2 case.

After a round of optimizations, and adapted for the modified rules:

typedef unsigned long u64; static int W, H, S; static u64 RM, DM, NSol; static char Out[64 * 2 + 1]; static void place(u64 cM, u64 vM, int n) { if (n) { u64 m = 1ul << __builtin_ctzl(cM); cM -= m;  if (m & RM) { u64 nM = m << 1; if (cM & nM) place(cM - nM, vM, n - 1);  }  if (m & DM) { u64 nM = m << W;  vM |= m; vM |= nM; place(cM - nM, vM, n - 1); }  } else if (S) { ++NSol;  } else { char* p = Out;  for (int y = 0; y < H; ++y) { for (int x = 0; x < W; ++x) { *p++ = vM & 1 ? '|' : '-'; vM >>= 1; }  *p++ = '\n'; } *p++ = '\0'; puts(Out);   ++NSol; } } int main(int argc, char* argv[]) { W = atoi(argv[1]); H = atoi(argv[2]); S = (argc > 3); int n = W * H; if (n & 1) return 0; for (int y = 0; y < H; ++y) { RM <<= W; RM |= (1ul << (W - 1)) - 1; } DM = (1ul << (W * (H - 1))) - 1; place(-1, 0, n >> 1); printf("%lu\n", NSol); return 0; }

I started bumping into the length limit of 1024 characters, so I had to reduce the readability somewhat. Much shorter variable names, etc.

Build instructions:

> gcc -O2 Code.c

Run with solution output enabled:

> ./a.out 8 8 >/dev/null

Run with only solution count:

> ./a.out 8 8 s

Some comments:

With the larger test example, I do want optimization now. While my system is different (Mac), around -O2 seems to be good.

The code has gotten slower for the case where output is generated. This was a conscious sacrifice for optimizing the "count only" mode, and for reducing the code length.

There will be a few compiler warnings because of missing includes and external declarations for system functions. It was the easiest way to get me to below 1024 characters in the end without making the code totally unreadable.

Also note that the code still generates the actual solutions, even in "count only" mode. Whenever a solution is found, the vM bitmask contains a 1 for the positions that have a vertical bar, and a 0 for positions with a horizontal bar. Only the conversion of this bitmask into ASCII format, and the actual output, is skipped.

avoid using system includes

Source Link

edited Jun 1, 2015 at 4:48

Reto Koradi

4.9k
1
14
19

#includeextern <stdint.h>int atoi(const char*); #includeextern <stdlib.h>int puts(const char*); #include typedef <stdio.h>unsigned long uint64_t; static int W, H, W1; static char Out[64 * 2 + 1]; static void place(uint64_t cMask, int nStepsn) { if (nStepsn) { int pos = __builtin_ctzl(~cMask); uint64_t mask = 1ul << pos; cMask |= mask; int y = pos / W; int x = pos - y * W; uint64_t neighMask = mask << 1; if ((x < W - 1) && !(cMask & neighMask)) { Out[W1 * y + x] = '-'; Out[W1 * y + x + 1] = '-'; place(cMask | neighMask, nStepsn - 1); } neighMask = mask << W; if ((y < H - 1) && !(cMask & neighMask)) { Out[W1 * y + x] = '|'; Out[W1 * (y + 1) + x] = '|'; place(cMask | neighMask, nStepsn - 1); } } else { puts(Out); } } int main(int argc, char* argv[]) { W = atoi(argv[1]); H = atoi(argv[2]); W1 = W + 1; int nStepsn = W * H; if (nStepsn & 1) { return 0; } nStepsn >>= 1; for (int y = 0; y < H; ++y) { Out[W1 * y + W] = '\n'; } Out[W1 * H] = '\0'; place(0, nStepsn); return 0; }

#include <stdint.h> #include <stdlib.h> #include <stdio.h> static int W, H, W1; static char Out[64 * 2 + 1]; static void place(uint64_t cMask, int nSteps) { if (nSteps) { int pos = __builtin_ctzl(~cMask); uint64_t mask = 1ul << pos; cMask |= mask; int y = pos / W; int x = pos - y * W; uint64_t neighMask = mask << 1; if ((x < W - 1) && !(cMask & neighMask)) { Out[W1 * y + x] = '-'; Out[W1 * y + x + 1] = '-'; place(cMask | neighMask, nSteps - 1); } neighMask = mask << W; if ((y < H - 1) && !(cMask & neighMask)) { Out[W1 * y + x] = '|'; Out[W1 * (y + 1) + x] = '|'; place(cMask | neighMask, nSteps - 1); } } else { puts(Out); } } int main(int argc, char* argv[]) { W = atoi(argv[1]); H = atoi(argv[2]); W1 = W + 1; int nSteps = W * H; if (nSteps & 1) { return 0; } nSteps >>= 1; for (int y = 0; y < H; ++y) { Out[W1 * y + W] = '\n'; } Out[W1 * H] = '\0'; place(0, nSteps); return 0; }

extern int atoi(const char*); extern int puts(const char*);  typedef unsigned long uint64_t; static int W, H, W1; static char Out[64 * 2 + 1]; static void place(uint64_t cMask, int n) { if (n) { int pos = __builtin_ctzl(~cMask); uint64_t mask = 1ul << pos; cMask |= mask; int y = pos / W; int x = pos - y * W; uint64_t neighMask = mask << 1; if ((x < W - 1) && !(cMask & neighMask)) { Out[W1 * y + x] = '-'; Out[W1 * y + x + 1] = '-'; place(cMask | neighMask, n - 1); } neighMask = mask << W; if ((y < H - 1) && !(cMask & neighMask)) { Out[W1 * y + x] = '|'; Out[W1 * (y + 1) + x] = '|'; place(cMask | neighMask, n - 1); } } else { puts(Out); } } int main(int argc, char* argv[]) { W = atoi(argv[1]); H = atoi(argv[2]); W1 = W + 1; int n = W * H; if (n & 1) { return 0; } n >>= 1; for (int y = 0; y < H; ++y) { Out[W1 * y + W] = '\n'; } Out[W1 * H] = '\0'; place(0, n); return 0; }

added 11 characters in body

Source Link

edited Jun 1, 2015 at 2:31

Reto Koradi

4.9k
1
14
19

> gcc -O0 Code.c > ./a.out 8 6 >/dev/null

> gcc -O0 Code.c > ./a.out 8 6

> gcc -O0 Code.c > ./a.out 8 6 >/dev/null

Source Link

answered Jun 1, 2015 at 2:23

Reto Koradi

4.9k
1
14
19