Ruby 2 - 540 447 420 characters
Run as " ruby2.0 jumper.rb 'instructions' 'initialization data' ". 1.x Ruby won't work (no String.bytes method).
Added multi-line commands and comments and improved my putting.
i=$*[0].gsub(/\([^)]*\)/m,' ').scan(/(\??)\s*([#=:><+-])\s*(\d*)/m).map{|a|[a[0]!='?',a[1],a[2]==''?/[#=:]/=~a[1]?0:1:a[2].to_i]} N=i.size d=$*[1].bytes r=p=0 while p<N u,o,x=i[p] p+=1 d[r]=0 if d[r].nil? case o when'#';r=x when'>';r+=x when'<';r-=x when/[=+-]/;eval "d[r]#{o.tr'=',''}=x";d[r]%=256 when':';p=x;abort'Error'if p>=N end if u||d[r]>0 abort'Error'if r<0 end printf"%s\n",d.take_while{|v|v&&v!=0}.pack('C*')
Here's a test suite with some scatter-shot tests. The easiest way to use it is to stuff the code into t/jumper.t and run "perl t/jumper.t".
#/usr/bin/perl use strict; use warnings; # timestamp: 2014 August 3, 19:00 # # - Assume program takes machine code and initialization string as command # line options. # - Assume all required errors reported as "Error\n". # - Go with the flow and suffix output with \n. Merged terminal newlines are # unacceptable [I'm talkin' to YOU Ruby puts()!]. # - As per OP - jumping to > end-of-program must be an error. use Test::More qw(no_plan); # use Test::More tests => 4; my $jumper = "jumper.rb"; # # "happy" path # # starter tests provided by OP is( `$jumper '=72>=101>=108>=108>=111>=32>=119>=111>=114>=108>=100>=33>=' '' 2>&1`, "Hello world!\n", "hello world (from user2992539)"); is( `$jumper '?:2 :4 >1 :0 =33 >1 =0' 'a' 2>&1`, "a!\n", 'append !, #1 (from user2992539)'); # simple variations is( `$jumper '?:2 :4 >1 :0 =33 >1 =0' '' 2>&1`, "!\n", 'append !, #2'); is( `$jumper '?:2 :4 >1 :0 =33' '' 2>&1`, "!\n", 'append !, #3, no NUL'); # comment delimiters don't nest is( `$jumper "(()=" 'oops' 2>&1`, "\n", "() don't nest"); # comments and termination is( `$jumper '(start with a comment)?(comment w/ trailing sp) # (comment w/ surrounding sp) 1 =98' 'a' 2>&1`, "ab\n", 'walk to exit'); is( `$jumper '(start with a comment)? (comment w/ leading sp)= (comment w/ surrounding sp) 97()' '' 2>&1`, "\n", 'skip to exit'); is( `$jumper '#1=0 (actually two instructions, but it scans well) :5 #=(truncate further if not jumped over)' 'a b' 2>&1`, "Error\n", 'truncate & jump to exit'); # is RAM pointer initialized to 0? is( `$jumper '-103(g-g) ?:1025(exit) =103 #4=10' 'good' 2>&1`, "good\n\n", 'intial string in right place?'); # TBD, do jumps work? # TBD, do conditional jumps work? # jump right to a harder case, copy byte 0 to byte 3 and format, e.g. input="Y" output="Y=>Y" is( `$jumper '#1=61#2=62#4=0#3=#10=#(11:)?:13:20(13:)#3+#10+#0-:11(20:)#10(21:)?:23:28(23:)#0+#10-:21(28:)#' 'Y' 2>&1`, "Y=>Y\n", 'copy a byte'); # test memory allocation by dropping 255s at increasingly large intervals is( `$jumper '#16=511 #64=511 #256=511 #1024=511 #4096=511 #16384=511 #65536=511 #262144=511 #1048576=511 #65536-255 (20:)?:23(exit) #=' 'wrong' 2>&1`, "\n", 'test alloc()'); # upcase by subtraction is( `$jumper '-32' 't' 2>&1`, "T\n", 'upcase via subtraction'); # 2 nested loops to upcase a character, like so: #0=2; do { #0--; #1=16; do { #1--; #2--; } while (#1); } while (#0); is( `$jumper '#=2 (2:)#- #1=16 (6:)#1- #2- #1?:6 #0?:2 #=32 #1=32' ' t' 2>&1`, " T\n", 'upcase via loops'); # downcase by addition is( `$jumper '+32' 'B' 2>&1`, "b\n", 'downcase via addition'); # same thing with a loop, adjusted to walk the plank instead of jumping off it is( `$jumper '#1 ?:3 :7 -<+ :0 #' 'B ' 2>&1`, "b\n", 'downcase via adder (from Sieg)'); # base 10 adder with carry is( `$jumper '#0-48#10=9#11=#5=#0(9:)?:11:22(11:)#10?:14:22(14:)-#11+#5+#0-:9(22:)#0?:110#11(25:)?:27:32(27:)#0+#11-:25(32:)#0+48>-43?:110=43>-48#10=9#11=#2(45:)?:47:58(47:)#10?:50:58(50:)-#11+#5+#2-:45(58:)#2?:110#11(61:)?:63:68(63:)#2+#11-:61(68:)#2+48>-61?:110=61>?:110=32#10=9#11=#5-10(83:)?:85:94(85:)#10?:88:94(88:)-#11+#5-:83(94:)#5?:99#4=49:100(99:)+10(100:)#11(101:)?:103:108(103:)#5+#11-:101(108:)#5+48' '1+1=' 2>&1`, "1+1= 2\n", 'base 10 adder, #1'); is( `$jumper '#0-48#10=9#11=#5=#0(9:)?:11:22(11:)#10?:14:22(14:)-#11+#5+#0-:9(22:)#0?:110#11(25:)?:27:32(27:)#0+#11-:25(32:)#0+48>-43?:110=43>-48#10=9#11=#2(45:)?:47:58(47:)#10?:50:58(50:)-#11+#5+#2-:45(58:)#2?:110#11(61:)?:63:68(63:)#2+#11-:61(68:)#2+48>-61?:110=61>?:110=32#10=9#11=#5-10(83:)?:85:94(85:)#10?:88:94(88:)-#11+#5-:83(94:)#5?:99#4=49:100(99:)+10(100:)#11(101:)?:103:108(103:)#5+#11-:101(108:)#5+48' '9+9=' 2>&1`, "9+9=18\n", 'base 10 adder, #2'); # order of assignment shouldn't affect order of print is( `$jumper '#1=98 #0=97' '' 2>&1`, "ab\n", 'print order != assignment order'); # are chars modulo 256? is( `$jumper '#10(#10 defaults to 0) +255+(#10 += 256) ?#(skip if #10==0) =' 'good' 2>&1`, "good\n", 'memory values limited to 0<x<255'); # go for the cycle; is( `$jumper '(0:)+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ (256:)#4=10' 'BCID' 2>&1`, "ACID\n\n", 'cycle character less 1, PC>255'); # same thing with a loop; is( `$jumper '#4=255(#4 = 255) (2:)#1+(#1++) #4-(#4--) ?:2(loop 255 times) #4=10(#4 = NL)' 'ADID' 2>&1`, "ACID\n\n", 'cycle character less 1, PC>255'); # Exercise the program counter. # PC > 255; is( `$jumper '(0:)= (1:)############################################################################################################################################################################################################################################################### (256:)?:259 (257:)+ (258:):1 (259:)=97#3=10' 'a==' 2>&1`, "a==\n\n", 'program counter range >255'); # # "sad" path # # Error checking required by the specification. # # simplest test case of PC going out of bounds is( `$jumper ':2' '' 2>&1`, "Error\n", 'program counter too big by 1'); is( `$jumper ':1024' '' 2>&1`, "Error\n", 'program counter in space'); is( `$jumper ':1073741824' '' 2>&1`, "Error\n", 'program counter in hyperspace'); # try to drive program counter negative, if 32-bit signed integer is( `$jumper ':2147483648(exit)' 'ridiculous speed' 2>&1`, "Error\n", 'program counter goes negative?, #1'); # try to drive program counter negative, if 64-bit signed integer is( `$jumper ':9223372036854775808 (exit)' 'ludicrous speed' 2>&1`, "Error\n", 'program counter goes negative?, #2'); # spaces not allowed in operand; error or silently ignore (my choice) isnt(`$jumper '#= #= #= #= #= +1 4 ' 'aops' 2>&1`, "oops\n", 'do not accept spaces in operands'); # ditto w/ a comment ; error or silently ignore (my choice) isnt(`$jumper '#= #= #= #= #= +1(not valid)4 ' 'aops' 2>&1`, "oops\n", 'do not accept spaces in operands'); # RAM pointer error-checking; "Error" or "" are OK isnt( `$jumper '<>=' 'oops' 2>&1 | grep -v Error`, "oops\n", 'unused negative RAM pointer behavior unspecified'); # RAM pointer negative and use it is( `$jumper '<=' '' 2>&1`, "Error\n", 'cannot use negative RAM pointer, #1'); # check for RAM pointer wrap-around is( `$jumper '<=' '0123456789' 2>&1`, "Error\n", 'cannot use negative RAM pointer, #2'); # The way I read this # "Commands and arguments may be delimited with spaces or new lines but # not necessary." # multi-line commands are legit. is( `$jumper "#4#?\n=" 'oops' 2>&1`, "\n", 'multi-line commands allowed'); # Multi-line comments would be consistent with multi-line commands, but I can't # find something I can translate into a "must" or "must not" requirement in # "Program can have comments between (). ... Comments can be placed # anywhere." # Until uncertainty resolved, no test case. # # "bad" path # # These tests violate the assumption that the instruction stream is wellll-farmed. # # characters not in the language; error or (my choice) silently skip isnt(`$jumper 'x =' 'oops' 2>&1`, "oops\n", 'opcode discrimination'); # is ? accepted as an operator (vs operation modifier); error or (my choice) silently skip is(`$jumper '(bad 0, good 0:)??0 (bad 1, good 0:):3 (bad 2, good 1:)#0' '' 2>&1`, "Error\n", '? not accepted as an opcode'); exit 0;
Ungolfed version.
# # Turing Machine Mach 2.0. # Tape? Tape? We don't need no stinkin' tape! We gots RAM! # # dM = data memory # iM = instruction memory # pC = program counter # rP = RAM pointer # u, o, x = current instruction being executed # # N = number of instructions in instruction memory # # instruction decoder iM = $*[0].gsub(/\([^)]*\)/m,' ').scan(/(\??)\s*([#=:><+-])\s*(\d*)/m).map { |a| [ a[0] != '?', a[1], (a[2] == '') ? (/[#=:]/ =~ a[1] ? 0 : 1) : a[2].to_i ] } pC = 0 N = iM.size dM = $*[1].bytes rP = 0 while pC < N do # u, unconditional instruction, execute if true || (dM[rP] > 0) # skip if false && (dM[rP] == 0) # o, operator # x, operand (u, o, x) = iM[pC] pC += 1 dM[rP] = 0 if dM[rP].nil? if u || (dM[rP] > 0) case o when '#' rP = x when '>' rP += x when '<' rP -= x when /[=+-]/ eval "dM[rP]#{o.tr'=',''}=x" dM[rP] %= 256 when ':' pC = x abort 'Error' if pC >= N end end abort 'Error' if rP < 0 end printf "%s\n", dM.take_while{|v|v&&v!=0}.pack('C*')
A quickie proto-assembler.
# # Jumper "assembler" - symbolic goto labels. # # what it does: # - translates labels/targets into absolute position # @label ?:good_exit # ... # :label # # - a label is [a-zA-Z][a-zA-Z0-9_]* # - a target is @label # - one special label: # - "hyperspace" is last instruction index + 1 # - strips out user comments # - everything from "//" to EOL is stripped # - jumper comments are stripped # - adds "label" comments of the form "(ddd:)" # limitations & bugs: # - multi-line jumper comments aren't alway handled gracefully # - a target not followed by an instruction will reference # the previous instruction. this can only happen # at the end of the program. recommended idiom to # avoid this: # @good_exit # # what it doesn't do: # - TBD, simple error checking # - labels defined and not used # - TBD, symbolic memory names # # Example: # # input - # ( # adder from Sieg # ) # @loop_head # 1 // while (*(1)) { # ?:continue # :good_exit # # @continue - // *(1) -= 1; # <- // *(0) += 1; # + # :loop_head // } # @good_exit # # # output - # (0:) #1 ?:3 :7 (3:) - < + :0 (7:)# rawSource = ARGF.map do |line| line.gsub(/\([^)]*\)/, ' ') # eat intra-line jumper comments .gsub(/\/\/.*/, ' ') # eat C99 comments .gsub(/^/, "#{$<.filename}@#{$<.file.lineno}\n") # add line ID end.join rawSource.gsub! /\([^)]*\)/m, '' # eat multi-line jumper comments # # Using example from above # # rawSource = # "sieg.ja@1\n \n" + # "sieg.ja@4\n@loop_head # 1\n" # ... # "sieg.ja@12\n@good_exit # \n" instructionPattern = %r{ (?<label> [[:alpha:]]\w* ){0} (?<operator> \??\s*[#=:><+-]) {0} (?<operand> \d+|[[:alpha:]]\w* ){0} \G\s*(@\g<label>\s*)?(\g<operator>\s*)?(\g<operand>)? }x FAIL = [nil, nil, nil] instructionOffset = 0 iStream = Array.new target = Hash.new targetComment = nil for a in rawSource.lines.each_slice(2) do # only parse non-empty lines if /\S/ =~ a[1] m = nil catch( :parseError ) do chopped = a[1] while m = instructionPattern.match(chopped) if m.captures.eql?(FAIL) || (!m[:operator] && m[:operand]) m = nil throw :parseError end if m[:label] if target.has_key?(m[:label].to_sym) printf $stderr, a[0].chomp + ": error: label '#{m[:label]}' is already defined" abort a[1] end target[ m[:label].to_sym ] = instructionOffset targetComment = "(#{instructionOffset}:)" end if m[:operator] iStream[instructionOffset] = [ targetComment, m[:operator], /\A[[:alpha:]]/.match(m[:operand]) ? m[:operand].to_sym : m[:operand] ] targetComment = nil instructionOffset += 1 end chopped = m.post_match if /\A\s*\Z/ =~ chopped # nothing parseable left break end end end if !m printf $stderr, a[0].chomp + ": error: parse failure" abort a[1] end end end # inject hyperspace label target[:hyperspace] = instructionOffset # replace operands that are labels iStream.each do |instruction| if instruction[2] if !(/\A\d/ =~ instruction[2]) # its a label if target.has_key?(instruction[2]) instruction[2] = target[instruction[2]] else abort "error: label '@#{instruction[2]}' is used but not defined" end end end puts instruction.join end