kig
diff --git a/‎README.md‎
Lines changed: 78 additions & 2 deletions b/‎README.md‎
Lines changed: 78 additions & 2 deletions
diff --git a/‎examples/hello_crash.glsl‎
Lines changed: 19 additions & 0 deletions b/‎examples/hello_crash.glsl‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎examples/hello_dlopen_gh.glsl‎
Lines changed: 1 addition & 1 deletion b/‎examples/hello_dlopen_gh.glsl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/parallel.glsl‎
Lines changed: 23 additions & 19 deletions b/‎lib/parallel.glsl‎
Lines changed: 23 additions & 19 deletions
@@ -4,14 +4,89 @@ GLSL as a scripting language? Say no more!
 
 Crash is a super not production-ready asynchronous IO runtime and module system for Vulkan compute shaders.
 
+## What does it look like?
+
+```glsl
+#include <file.glsl>
+#include <parallel.glsl>
+
+void main() {
+ if (ThreadId == 0) {
+ println("Hello from crash!");
+ println(`We are running on ${ThreadLocalCount * ThreadGroupCount} threads across ${ThreadGroupCount} thread groups. Let me introduce the first four thread groups.`);
+ }
+ globalBarrier();
+ if (ThreadGroupId < 4 && ThreadLocalId % 16 == 0) {
+ println(`Thread ${ThreadId} from thread group ${ThreadGroupId}[${ThreadLocalId}] checking in.`);
+ }
+ globalBarrier();
+ if (ThreadId == 0) {
+ println("What's your name?");
+ FREE_ALL(
+ string name = awaitIO(readLine(stdin, malloc(256)));
+ println(`Hello ${name}!`);
+ )
+ }
+}
+```
+
+Output
+```
+Hello from crash!
+We are running on 16384 threads across 256 thread groups. Let me introduce the first four thread groups.
+Thread 0 from thread group 0[0] checking in.
+Thread 16 from thread group 0[16] checking in.
+Thread 224 from thread group 3[32] checking in.
+Thread 240 from thread group 3[48] checking in.
+Thread 160 from thread group 2[32] checking in.
+Thread 176 from thread group 2[48] checking in.
+Thread 32 from thread group 0[32] checking in.
+Thread 64 from thread group 1[0] checking in.
+Thread 48 from thread group 0[48] checking in.
+Thread 80 from thread group 1[16] checking in.
+Thread 128 from thread group 2[0] checking in.
+Thread 192 from thread group 3[0] checking in.
+Thread 144 from thread group 2[16] checking in.
+Thread 208 from thread group 3[16] checking in.
+Thread 96 from thread group 1[32] checking in.
+Thread 112 from thread group 1[48] checking in.
+What's your name?
+John
+Hello John!
+
+```
+
+## Features
+
 With crash, your compute shaders can tell your CPU to do arbitrary IO:
 
- * Print strings (the crash GLSL preprocessor adds strings to GLSL)
+ * Print strings
  * Read from files and write to files
+ * Run commands and await their completion
  * Listen on network sockets
  * Tell the current time
+ * Allocate memory on the CPU side and do reads and writes to it
  * dlopen CPU libraries and call functions in them
- * Run commands and await their completion
+
+Extra language features
+
+ * Asynchronous IO `alloc_t buf = malloc(1024); io r = read("myfile.txt", buf); awaitIO(r); awaitIO(write("mycopy.txt", buf));`
+ * Strings `string s = "foobar"; string s2 = str(vec3(0.0, 1.0, 2.0)); string s3 = concat(s1, " = ", s2); awaitIO(println(s3));`
+ * Multi-line template strings with backticks `` `foo ${bar}` ``
+ * Character literals `'x'` and int32 literals `'\x89PNG'`
+ * Dynamically allocated arrays `i32array a = i32{1,2,3}; i32array b = i32{4,5}; i32array ab = i32concat(a,b);`
+ * String and Array libraries that mostly match JavaScript, with some Pythonic `str()` thrown in.
+ * Hashtables `i32map h = i32hAlloc(256); i32hSet(h, 12891, 23); println(str(i32hGet(h, 12891) == 23));`
+ * Malloc that works by bumping a heap pointer and a FREE() macro to free all memory allocated inside it `FREE(alloc_t ptr = malloc(4));`
+ * A second heap for IO to make life complicated: `FREE(alloc_t buf = malloc(1024); FREE_IO(readSync("myfile.txt", buf)); println(buf));`
+ * And a macro to free both heaps at the same time: `FREE_ALL(alloc_t buf = malloc(1024); readSync("myfile.txt", buf); println(buf));`
+ * Set warp width and warp count from GLSL: `ThreadLocalCount = 64; ThreadGroupCount = 256;`
+ * By default, crash programs run across 16384 threads. A naive hello world will fill your screen with hellos. Use ThreadId to limit it to a single thread `if (ThreadId == 0) println("Hello, World!");` 
+ * Set heap size per thread, per thread group and total program heap `HeapSize = 4096; GroupHeapSize = HeapSize * ThreadLocalCount; TotalHeapSize = GroupHeapSize * ThreadGroupCount;`
+ * An `#include <file.glsl>` system powered by the C preprocessor.
+ * That also loads things over HTTPS with SHA256 integrity verification `#include <https://raw.githubusercontent.com/kig/spirv-wasm/12f2554994f5b733da65e6705099e2afd160649c/spirv-io/lib/dlopen.glsl> @ 4b43671ba494238b3855c2990c2cd844573a91d15464ed8b77d2a5b98d0eb2e1`
+
+## Examples
 
 The example scripts in [examples/](examples/) show you how to do various things, such as:
 
@@ -84,6 +159,7 @@ The GPU spins and waits for the request status flag to be set to completed. Afte
 buffer, sets the request status flag to handled, and goes on its merry way.
 
 So far, so good. What if you couldn't trust the order of the writes and reads above? What if cache lines in your memory buffer were getting transported over UDP.
+What if the driver killed your compute shaders after 10 seconds. Since clearly it has hung, right? Who would wait for user input in a shader? Crash, that's who!
 
 
 ## License
 
@@ -0,0 +1,19 @@
+#include <file.glsl>
+#include <parallel.glsl>
+
+void main() {
+ if (ThreadId == 0) {
+ println("Hello from crash!");
+ println(`We are running on ${ThreadLocalCount * ThreadGroupCount} threads across ${ThreadGroupCount} thread groups. Let me introduce the first four thread groups.`);
+ }
+ globalBarrier();
+ if (ThreadGroupId < 4 && ThreadLocalId % 16 == 0) {
+ println(`Thread ${ThreadId} from thread group ${ThreadGroupId}[${ThreadLocalId}] checking in.`);
+ }
+ globalBarrier();
+ if (ThreadId == 0) {
+ println("What's your name?");
+ string name = awaitIO(readLine(stdin, malloc(256)));
+ println(`Hello ${name}!`);
+ }
+}
@@ -1,7 +1,7 @@
 #!/usr/bin/env gls
 
 #include <file.glsl>
-#include <https://raw.githubusercontent.com/kig/spirv-wasm/master/spirv-io/lib/dlopen.glsl> @ dbc62e1bd6df8765f90b9f54e72bb644e20489ad17fefae51450cdf5321ca769
+#include <https://raw.githubusercontent.com/kig/spirv-wasm/12f2554994f5b733da65e6705099e2afd160649c/spirv-io/lib/dlopen.glsl> @ 4b43671ba494238b3855c2990c2cd844573a91d15464ed8b77d2a5b98d0eb2e1
 
 ThreadLocalCount = 1;
 ThreadGroupCount = 1;
 
@@ -48,7 +48,7 @@
  while (!done) {
  freeIO(
  fillLock(ReadBuffers[ActiveGroupId].status, {
- io r = read(file, readOffset + ActiveGroupID*readSize, readSize, ReadBuffers[ActiveGroupId].buffer);
+ io r = read(file, readOffset + ActiveGroupId*readSize, readSize, ReadBuffers[ActiveGroupId].buffer);
  readOffset += ActiveGroupCount * readSize;
  readResult = awaitIO(r);
  ReadBuffers[ActiveGroupId].length = strLen(readResult);
@@ -82,7 +82,7 @@
  while (!done) {
  freeIO(
  drainLock(WriteBuffers[ActiveGroupId].status, {
- writeSync(outFile, writeOffset + ActiveGroupID*writeSize, writeSize, string(0, WriteBuffers[ActiveGroupId].length) + WriteBuffers[ActiveGroupId].buffer.x);
+ writeSync(outFile, writeOffset + ActiveGroupId*writeSize, writeSize, string(0, WriteBuffers[ActiveGroupId].length) + WriteBuffers[ActiveGroupId].buffer.x);
  writeOffset += ActiveGroupCount * writeSize;
  done = WriteBuffers[ActiveGroupId].done;
  });
@@ -104,7 +104,23 @@ shared int32_t _ctx_;
 shared int32_t _start_group_;
 shared int32_t _end_group_;
 
-#define GlobalBarrierLock io_pad_15
+#define GlobalBarrierLock io_pad_11
+
+void globalBarrier() {
+ atomicAdd(GlobalBarrierLock, 1);
+ if (ThreadId == 0) {
+ while (GlobalBarrierLock < ThreadCount);
+ GlobalBarrierLock = 0;
+ }
+ while (GlobalBarrierLock != 0);
+}
+
+void deviceBarrier() {
+ controlBarrier(gl_ScopeDevice, gl_ScopeDevice, gl_StorageSemanticsBuffer | gl_StorageSemanticsShared, gl_SemanticsAcquireRelease);
+}
+
+/*
+
 #define ActiveGroupCount (_end_group_ - _start_group_)
 #define ActiveGroupId (ThreadGroupId - _start_group_)
 
@@ -122,31 +138,18 @@ shared int32_t _end_group_;
 #define CTX_MULTIGROUP_THREADS 3
 
 #define widefor(t, i, start, end) for ( \
- t i = t(_ctx_ == CTX_SINGLE_THREAD ? 0 : (_ctx_ == CTX_ALL_THREADS ? ThreadId : ThreadLocalID )) + (start), \
+ t i = t(_ctx_ == CTX_SINGLE_THREAD ? 0 : (_ctx_ == CTX_ALL_THREADS ? ThreadId : ThreadLocalId )) + (start), \
  t _incr_ = t(_ctx_ == CTX_SINGLE_THREAD ? 1 : (_ctx_ == CTX_ALL_THREADS ? ThreadCount : ThreadLocalCount)); \
  i < (end); i += _incr_)
 
-#define allfor(t, i, start, end) for (t i = ThreadID + start; i < end; i += ThreadCount)
-#define groupfor(t, i, start, end) for (t i = ThreadLocalID + start; i < end; i += ThreadLocalCount)
+#define allfor(t, i, start, end) for (t i = ThreadId + start; i < end; i += ThreadCount)
+#define groupfor(t, i, start, end) for (t i = ThreadLocalId + start; i < end; i += ThreadLocalCount)
 
 void copyFromIOToHeap(ptr_t src, ptr_t dst, size_t len) {
  widefor(ptr_t, i, 0, len/16) i64v2heap[dst/16+i] = i64v2fromIO[src/16+i];
  widefor(ptr_t, i, len/16*16, len ) heap[dst+i] = fromIO[src+i];
 }
 
-void globalBarrier() {
- atomicAdd(GlobalBarrierLock, 1);
- if (ThreadId == 0) {
- while (GlobalBarrierLock < ThreadCount);
- GlobalBarrierLock = 0;
- }
- while (GlobalBarrierLock != 0);
-}
-
-void deviceBarrier() {
- controlBarrier(gl_ScopeDevice, gl_ScopeDevice, gl_StorageSemanticsBuffer | gl_StorageSemanticsShared, gl_SemanticsAcquireRelease);
-}
-
 #define unique(f) { \
  atomicMax(CtxLock, 1); \
  if (ThreadId == 0) { \
@@ -263,3 +266,4 @@ void deviceBarrier() {
 // }
 // }
 //}
+*/