- Commit
- 5179b2ff1775066145a8eb74679c4d7f647d7493
- Parent
- 09576bc799899f52dd768b2b459039ffa834715c
- Author
- Pablo <pablo-escobar@riseup.net>
- Date
Added functionality to parse programs from a string
Also improved the documentation
A OCaml module for manipulating unlimited register machines
Added functionality to parse programs from a string
Also improved the documentation
3 files changed, 105 insertions, 18 deletions
Status | File Name | N° Changes | Insertions | Deletions |
Modified | docs.pdf | 0 | 0 | 0 |
Modified | urm.ml | 51 | 45 | 6 |
Modified | urm.mli | 72 | 60 | 12 |
diff --git a/urm.ml b/urm.ml @@ -6,12 +6,12 @@ type instruction = | S of int | J of int * int * int -type machine = +type t = machine = { instruction_pointer : int; registers : nat -> nat; } -let from_registers f = +let of_registers f = { intruction_pointer = 0; registers = f } @@ -23,8 +23,7 @@ let register m = m.registers (** Executes the next instruction. Returns [Some _] is the instruction pointer points to a valid adress and [None] otherwise. *) let step (program : instruction array) (m : machine) : machine option = - if m.instruction_pointer >= length program || m.instruction_pointer < 0 - then None + if m.instruction_pointer >= length program then None else let updated = match program.(m.instruction_pointer) with @@ -43,7 +42,12 @@ let step (program : instruction array) (m : machine) : machine option = instruction_pointer = m.instruction_pointer + 1; } | J (r1, r2, i) when m.registers r1 == m.registers r2 -> - { m with instruction_pointer = i } + (* [i - 1] is used so that the instruction count starts at 1 instead of + 0 *) + if i > 0 then { m with instruction_pointer = i - 1 } + else + let e = Printf.sprintf "invalid jump address: J(%d, %d, %d)" r1 r2 i + in raise (Invalid_argument e) | J _ -> { m with instruction_pointer = m.instruction_pointer + 1 } in Some updated @@ -53,7 +57,7 @@ let rec exec program m = | None -> { m with instruction_pointer = 0 } | Some n -> exec program n -let exec_safe program m max = +let nexec max program m = if max <= 0 then raise (Invalid_argument "the number of clock-cycles must be positive ") else @@ -66,3 +70,38 @@ let exec_safe program m max = else None in exec_safe_tail m 0 +module Parse = struct + open Genlex + + exception Syntax_error of string + + (** A simple lexer for instructions *) + let lex = make_lexes ["T"; "Z"; "S"; "J"; "("; ")"; ","] + + (** Parse a single instruction *) + let parse_instruction (i: token list) : instruction = + match i with + | [ Kwd "T"; Kwd "("; Int r1; Kwd ","; Int r2; Kwd ")" ] -> + T (r1, r2) + | [ Kwd "Z"; Kwd "("; Int r; Kwd ")" ] -> + Z r + | [ Kwd "S"; Kwd "("; Int r; Kwd ")" ] -> + S r + | [ Kwd "J"; Kwd "("; Int r1; Kwd ","; Int r2; Kwd ","; Int i; Kwd ")" ] + when i > 0 -> + J (r1, r2, i) + | [ Kwd "J"; Kwd "("; Int _; Kwd ","; Int _; Kwd ","; Int i; Kwd ")" ] -> + raise (Syntax_error (Printf.sprintf "invalid jump address: %d" i)) + | _ -> raise (Syntax_error "invalid syntax") + + let parse (s : string) : instruction array = + let f s = + if String.trim s <> "" then None + else + match Stream.npeek 8 (lex (Stream.of_string s)) with + | Some l -> + Some (parse_instruction l) + | None -> + raise (Syntax_error Prinf.sprintf "invalid syntax: %s" s) + in Array.of_list (filter_map (String.split_on_char '\n' s)) +end
diff --git a/urm.mli b/urm.mli @@ -1,18 +1,45 @@ -(** Functions and types for manipulation unlimited register machines. *) +(** Unlimited register machines + + This module provides various types and functions for manipulating unlimited + register machines (URMs). This implementation is besed on the specification + provided in Ramin Naimi's URM simulator, which is in turn based on that of + {i Computability, An introduction to recursive function theory } by Nigel J. + Cutland. For example, the instruction set is given by: + + - [T(n, m)] transfers the contents of the [n]-th register to [m]-th register. + - [Z(n)] sets the value of the [n]-th register to zero. + - [S(n)] increments the value of the [n]-th register. + - [J(n, m, i)] Jumps to the [i]-th instruction if the values of the [n]-th and + [m]-th coincide. The instruction count starts at 1, so [i = 1] jumps to the + first instruction. + + The register set, however, is a bit different. In Nigel's specification the + registers are indexed by natural numbers (starting from 1), while in this + implementation the registers are indexed by arbitrary integers. Also, in + Nigel's specification store natural numbers (starting from 0) and in this + implementation they store arbitrary integers. Good luck getting a negative + integer from a machine whose registers are initially set to natural numbers + though. + + @see <https://sites.oxy.edu/rnaimi/home/URMsim.htm> URM simulator. + @see <libgen.li/file.php?md5=147d472c8ebc58b832a0d93d02c03f3a> Computability, + An introduction to recursive function theory. *) (** The type of instructions. *) type instruction = - | T of int * int (** Transfers values between registers. *) - | Z of int (** Sets a register to zero. *) - | S of int (** Increments a register *) + | T of int * int + | Z of int + | S of int | J of int * int * int - (** Jumps to a given point if the values of two registers coincide. *) (** The type of all possible states of a URM. *) +type t + +(** An alias for {! t } *) type machine (** Returns a machine whose value of the [n]-th register is given by [f n]. *) -val from_registers : f:(int -> int) -> machine +val of_registers : f:(int -> int) -> machine (** Returns the value of the [i]-th register of [m]. *) val register : m:machine -> i:int -> int @@ -24,15 +51,36 @@ val zeros : machine [m] with updates corresponding to the execution of [program]. This function only returns when [program] actually halts. Please use {! - exec_safe } if you don't trust the [program]. *) + nexec } if you don't trust the [program]. + + @raise Invalid_argument if [program] attempts to jump to a non-positive + integer. *) val exec : program:instruction array -> m:machine -> machine (** A safe version of {! exec }, where the max number of clock-cycles allowed is controlled by [max]. Each instruction takes a single clock-cycle to execute. - Returns [Some _] if [program] halts in at most [max] clock-cycles and - [None] otherwise. Throws [Invalid_argument] if [max] is not a positive - integer. *) -val exec_safe : program: instruction array -> - machine -> max:int -> machine option + Returns [Some _] if [program] halts in at most [n] clock-cycles and + [None] otherwise. + + @raise Invalid_argument if [program] attempts to jump to a non-positive + integer. + @raise Invalid_argument if [n] is not a positive integer. *) +val nexec : n:int -> program: instruction array -> machine -> machine option + +(** Parse URM programs from strings. The syntax used by the parser is based on + the one used by Ramin Naimi's URM simulator, but again the register indexes + can be arbitrary integers. + + @see <https://sites.oxy.edu/rnaimi/home/URMsim.htm> URM simulator. *) +module Parse : sig + (** Exception raised when parsing fails. *) + exception Syntax_error of string + + (** Parse a program from a string. + + @raise Syntax_error in case any syntax error is encountered, including + invalid syntax and jumps to invalid addresses. *) + val parse : string -> instruction array +end