caml-urm

A OCaml module for manipulating unlimited register machines

Commit
5179b2ff1775066145a8eb74679c4d7f647d7493
Parent
09576bc799899f52dd768b2b459039ffa834715c
Author
Pablo <pablo-escobar@riseup.net>
Date

Added functionality to parse programs from a string

Also improved the documentation

Diffstat

3 files changed, 105 insertions, 18 deletions

Status File Name N° Changes Insertions Deletions
Modified docs.pdf 0 0 0
Modified urm.ml 51 45 6
Modified urm.mli 72 60 12
diff --git a/docs.pdf b/docs.pdf
Binary files differ.
diff --git a/urm.ml b/urm.ml
@@ -6,12 +6,12 @@ type instruction =
   | S of int
   | J of int * int * int
 
-type machine = 
+type t = machine = 
   { instruction_pointer : int;
     registers : nat -> nat;
   }
 
-let from_registers f =
+let of_registers f =
   { intruction_pointer = 0;
     registers = f
   }
@@ -23,8 +23,7 @@ let register m = m.registers
 (** Executes the next instruction. Returns [Some _] is the instruction pointer
  points to a valid adress and [None] otherwise. *)
 let step (program : instruction array) (m : machine) : machine option =
-  if m.instruction_pointer >= length program || m.instruction_pointer < 0
-  then None
+  if m.instruction_pointer >= length program then None
   else
     let updated =
       match program.(m.instruction_pointer) with
@@ -43,7 +42,12 @@ let step (program : instruction array) (m : machine) : machine option =
           instruction_pointer = m.instruction_pointer + 1;
         }
       | J (r1, r2, i) when m.registers r1 == m.registers r2 -> 
-        { m with instruction_pointer = i }
+        (* [i - 1] is used so that the instruction count starts at 1 instead of
+         0 *)
+        if i > 0 then { m with instruction_pointer = i - 1 }
+        else 
+          let e = Printf.sprintf "invalid jump address: J(%d, %d, %d)" r1 r2 i
+          in raise (Invalid_argument e)
       | J _ -> 
         { m with instruction_pointer = m.instruction_pointer + 1 } 
     in Some updated
@@ -53,7 +57,7 @@ let rec exec program m =
   | None -> { m with instruction_pointer = 0 }
   | Some n -> exec program n
 
-let exec_safe program m max =
+let nexec max program m =
   if max <= 0 
   then raise (Invalid_argument "the number of clock-cycles must be positive ")
   else
@@ -66,3 +70,38 @@ let exec_safe program m max =
       else None
     in exec_safe_tail m 0
 
+module Parse = struct
+  open Genlex
+
+  exception Syntax_error of string
+
+  (** A simple lexer for instructions *)
+  let lex = make_lexes ["T"; "Z"; "S"; "J"; "("; ")"; ","]
+
+  (** Parse a single instruction *)
+  let parse_instruction (i: token list) : instruction =
+    match i with
+    | [ Kwd "T"; Kwd "("; Int r1; Kwd ","; Int r2; Kwd ")" ] ->
+        T (r1, r2)
+    | [ Kwd "Z"; Kwd "("; Int r; Kwd ")" ] ->
+        Z r
+    | [ Kwd "S"; Kwd "("; Int r; Kwd ")" ] ->
+        S r
+    | [ Kwd "J"; Kwd "("; Int r1; Kwd ","; Int r2; Kwd ","; Int i; Kwd ")" ]
+    when i > 0 ->
+        J (r1, r2, i)
+    | [ Kwd "J"; Kwd "("; Int _; Kwd ","; Int _; Kwd ","; Int i; Kwd ")" ] ->
+        raise (Syntax_error (Printf.sprintf "invalid jump address: %d" i))
+    | _ -> raise (Syntax_error "invalid syntax")
+
+  let parse (s : string) : instruction array =
+    let f s = 
+      if String.trim s <> "" then None
+      else
+        match Stream.npeek 8 (lex (Stream.of_string s)) with
+        | Some l -> 
+            Some (parse_instruction l)
+        | None -> 
+            raise (Syntax_error Prinf.sprintf "invalid syntax: %s" s)
+    in Array.of_list (filter_map (String.split_on_char '\n' s))
+end
diff --git a/urm.mli b/urm.mli
@@ -1,18 +1,45 @@
-(** Functions and types for manipulation unlimited register machines. *)
+(** Unlimited register machines 
+
+ This module provides various types and functions for manipulating unlimited
+ register machines (URMs). This implementation is besed on the specification
+ provided in Ramin Naimi's URM simulator, which is in turn based on that of 
+ {i Computability, An introduction to recursive function theory } by Nigel J.
+ Cutland. For example, the instruction set is given by: 
+
+ - [T(n, m)] transfers the contents of the [n]-th register to [m]-th register.
+ - [Z(n)] sets the value of the [n]-th register to zero.
+ - [S(n)] increments the value of the [n]-th register.
+ - [J(n, m, i)] Jumps to the [i]-th instruction if the values of the [n]-th and
+ [m]-th coincide. The instruction count starts at 1, so [i = 1] jumps to the
+ first instruction.
+
+ The register set, however, is a bit different. In Nigel's specification the
+ registers are indexed by natural numbers (starting from 1), while in this
+ implementation the registers are indexed by arbitrary integers. Also, in
+ Nigel's specification store natural numbers (starting from 0) and in this
+ implementation they store arbitrary integers. Good luck getting a negative
+ integer from a machine whose registers are initially set to natural numbers
+ though.
+
+ @see <https://sites.oxy.edu/rnaimi/home/URMsim.htm> URM simulator. 
+ @see <libgen.li/file.php?md5=147d472c8ebc58b832a0d93d02c03f3a> Computability,
+ An introduction to recursive function theory. *)
 
 (** The type of instructions. *)
 type instruction = 
-  | T of int * int (** Transfers values between registers. *)
-  | Z of int (** Sets a register to zero. *)
-  | S of int (** Increments a register *)
+  | T of int * int 
+  | Z of int 
+  | S of int 
   | J of int * int * int 
-    (** Jumps to a given point if the values of two registers coincide. *)
 
 (** The type of all possible states of a URM. *)
+type t 
+
+(** An alias for {! t } *)
 type machine
 
 (** Returns a machine whose value of the [n]-th register is given by [f n]. *)
-val from_registers : f:(int -> int) -> machine
+val of_registers : f:(int -> int) -> machine
 
 (** Returns the value of the [i]-th register of [m]. *)
 val register : m:machine -> i:int -> int
@@ -24,15 +51,36 @@ val zeros : machine
     [m] with updates corresponding to the execution of [program]. 
 
     This function only returns when [program] actually halts. Please use {!
-    exec_safe } if you don't trust the [program]. *)
+    nexec } if you don't trust the [program]. 
+
+    @raise Invalid_argument if [program] attempts to jump to a non-positive
+    integer. *)
 val exec : program:instruction array -> m:machine -> machine
 
 (** A safe version of {! exec }, where the max number of clock-cycles
     allowed is controlled by [max]. Each instruction takes a single clock-cycle
     to execute.
 
-    Returns [Some _] if [program] halts in at most [max] clock-cycles and
-    [None] otherwise. Throws [Invalid_argument] if [max] is not a positive
-    integer. *)
-val exec_safe : program: instruction array -> 
-  machine -> max:int -> machine option
+    Returns [Some _] if [program] halts in at most [n] clock-cycles and
+    [None] otherwise. 
+
+    @raise Invalid_argument if [program] attempts to jump to a non-positive
+    integer.
+    @raise Invalid_argument if [n] is not a positive integer. *)
+val nexec : n:int -> program: instruction array -> machine -> machine option
+
+(** Parse URM programs from strings. The syntax used by the parser is based on
+    the one used by Ramin Naimi's URM simulator, but again the register indexes
+    can be arbitrary integers.
+
+    @see <https://sites.oxy.edu/rnaimi/home/URMsim.htm> URM simulator. *)
+module Parse : sig
+  (** Exception raised when parsing fails. *)
+  exception Syntax_error of string
+
+  (** Parse a program from a string. 
+
+      @raise Syntax_error in case any syntax error is encountered, including
+      invalid syntax and jumps to invalid addresses. *)
+  val parse : string -> instruction array
+end