Module BTL

The BTL intermediate language: abstract syntax and semantics. BTL stands for "Block Transfer Language". Informally, a block is a piece of "loop-free" code, with a single entry-point, hence, such that transformation preserving locally the semantics of each block, preserve also globally the semantics of the function. a BTL function is a CFG where each node is such a block, represented by structured code. BTL gives a structured view of RTL code. It is dedicated to optimizations validated by "symbolic simulation" over blocks.

Require Import Coqlib Maps.
Require Import AST Integers Values Events Memory Globalenvs Smallstep.
Require Import RTL Op Registers OptionMonad.
Require Import BTL_Invariants.
Require Import ValueDomain.

Import ListNotations.

Abstract syntax


Definition exit := node.

Parameter inst_info: Set.
Extract Constant inst_info => "BTLtypes.inst_info".

Parameter block_info: Set.
Extract Constant block_info => "BTLtypes.block_info".

Parameter function_info: Set.
Extract Constant function_info => "BTLtypes.function_info".

final instructions (that stops block execution)
Inductive final: Type :=
  | Bgoto (succ:exit)
  | Breturn (res: option reg)
terminates the execution of the current function. It returns the value of the given register, or Vundef if none is given.
  | Bcall (sig: signature) (fn: reg + ident) (args:list reg) (dest:reg) (succ:exit)
invokes the function determined by fn (either a function pointer found in a register or a function name), giving it the values of registers args as arguments. It stores the return value in dest and branches to succ.
  | Btailcall (sig:signature) (fn: reg + ident) (args: list reg)
performs a function invocation in tail-call position (the current function terminates after the call, returning the result of the callee)
  | Bbuiltin (ef:external_function) (args:list (builtin_arg reg)) (dest:builtin_res reg) (succ:exit)
calls the built-in function identified by ef, giving it the values of args as arguments. It stores the return value in dest and branches to succ.
  | Bjumptable (arg:reg) (tbl:list exit)
Bjumptable arg tbl transitions to the node that is the n-th element of the list tbl, where n is the unsigned integer value of register arg.
.

Inductive iblock: Type :=
  | BF (fi: final) (iinfo: inst_info)
  | Bnop (oiinfo: option inst_info)
  | Bop (op:operation) (args:list reg) (dest:reg) (iinfo: inst_info)
performs the arithmetic operation op over the values of registers args, stores the result in dest
  | Bload (trap:trapping_mode) (chunk:memory_chunk) (addr:addressing) (args:list reg) (aaddr:aval)
          (dest:reg) (iinfo: inst_info)
loads a chunk quantity from the address determined by the addressing mode addr and the values of the args registers, stores the quantity just read into dest. If trap=NOTRAP, then failures lead to a default value written to dest.
  | Bstore (chunk:memory_chunk) (addr:addressing) (args:list reg) (ainfo:store_info) (src:reg) (iinfo: inst_info)
stores the value of register src in the chunk quantity at the the address determined by the addressing mode addr and the values of the args registers.
  | Bseq (b1 b2: iblock)
starts by running b1 and stops here if execution of b1 has reached a final instruction or aborted or continue with b2 otherwise
  | Bcond (cond:condition) (args:list reg) (ifso ifnot: iblock) (iinfo: inst_info)
evaluates the boolean condition cond over the values of registers args. If the condition is true, it continues on ifso. If the condition is false, it continues on ifnot. info is a ghost field there to provide information relative to branch prediction.
  .
Coercion BF: final >-> Funclass.


NB: - a RTL (Inop pc) ending a branch of block is encoded by (Bseq Bnop (Bgoto pc)). - a RTL (Inop pc) in the middle of a branch is simply encoded by Bnop. - the same trick appears for all "basic" instructions and Icond.

Record iblock_info := mk_ibinfo {
  entry: iblock;
  binfo: block_info
}.

Definition code: Type := PTree.t iblock_info.

Record function: Type := mkfunction {
  fn_sig: signature;
  fn_params: list reg;
  fn_stacksize: Z;
  fn_code: code;
  fn_entrypoint: node;
  fn_gm: gluemap;
  fn_info: function_info
}.

A function description comprises a control-flow graph (CFG) fn_code (a partial finite mapping from nodes to instructions). As in Cminor, fn_sig is the function signature and fn_stacksize the number of bytes for its stack-allocated activation record. fn_params is the list of registers that are bound to the values of arguments at call time. fn_entrypoint is the node of the first instruction of the function in the CFG.

Definition fundef := AST.fundef function.

Definition program := AST.program fundef unit.

Definition funsig (fd: fundef) :=
  match fd with
  | Internal f => fn_sig f
  | External ef => ef_sig ef
  end.

Operational semantics


Definition genv := Genv.t fundef unit.

The dynamic semantics of BTL is similar to RTL, except that the step of one instruction is generalized into the run of one iblock.

Inductive stackframe : Type :=
  | Stackframe:
      forall (res: reg) (* where to store the result *)
             (f: function) (* calling function *)
             (sp: val) (* stack pointer in calling function *)
             (succ: exit) (* program point in calling function *)
             (rs: regset), (* register state in calling function *)
      stackframe.

Inductive state : Type :=
  | State:
      forall (stack: list stackframe) (* call stack *)
             (f: function) (* current function *)
             (sp: val) (* stack pointer *)
             (pc: node) (* current program point in c *)
             (rs: regset) (* register state *)
             (m: mem) (* memory state *)
             (bc: block_classification),
      state
  | Callstate:
      forall (stack: list stackframe) (* call stack *)
             (f: fundef) (* function to call *)
             (args: list val) (* arguments to the call *)
             (m: mem) (* memory state *)
             (bc: block_classification),
      state
  | Returnstate:
      forall (stack: list stackframe) (* call stack *)
             (v: val) (* return value for the call *)
             (m: mem) (* memory state *)
             (bc: block_classification),
      state.

Definition dummy_block_classification : block_classification.
Proof.
  exists (fun _ => BCother); discriminate 1.
Qed.

outcome of a block execution
Record outcome := out {
   _rs: regset;
   _m: mem;
   _fin: option final
}.

Definition reg_builtin_res (res: builtin_res reg): option reg :=
  match res with
  | BR r => Some r
  | _ => None
  end.

Section RELSEM.

Variable strict : bool.
Variable ge: genv.

Definition find_function (ros: reg + ident) (rs: regset) : option fundef :=
  match ros with
  | inl r => Genv.find_funct ge rs#r
  | inr symb =>
      match Genv.find_symbol ge symb with
      | None => None
      | Some b => Genv.find_funct_ptr ge b
      end
  end.

Local Open Scope option_monad_scope.

Local Hint Constructors has_loaded: core.

Definition vmatch_opt (bc : option block_classification) (v : val) (a : aval) : Prop :=
  match bc with
  | Some bc => vmatch bc v a
  | None => True
  end.

Inductive has_loaded (sp : val) (bc : option block_classification) (rs : Regmap.t val) (m : mem)
    (chunk : memory_chunk) (addr : addressing) (args : list positive) (aaddr : aval) (v : val):
    trapping_mode -> Prop :=
  | has_loaded_normal (a : val) (trap : trapping_mode)
      (EVAL: eval_addressing ge sp addr rs ## args = Some a)
      (VMATCH: vmatch_opt bc a aaddr)
      (LOAD: Mem.loadv chunk m a = Some v):
      has_loaded sp bc rs m chunk addr args aaddr v trap
  | has_loaded_default
      (LOAD: forall a : val,
        eval_addressing ge sp addr rs ## args = Some a ->
        vmatch_opt bc a aaddr ->
        Mem.loadv chunk m a = None)
      (DEFAULT: v = Vundef):
      has_loaded sp bc rs m chunk addr args aaddr v NOTRAP.

Lemma has_loaded_iff_RTL sp rs m chunk addr args aaddr v trap:
  has_loaded sp None rs m chunk addr args aaddr v trap <->
  RTL.has_loaded ge sp rs m chunk addr args v trap.
Proof.
  split; (inversion 1; [econstructor 1 | econstructor 2]; simpl in *; eauto).
Qed.

internal big-step execution of one iblock
Inductive iblock_istep sp (bc : option block_classification):
      regset -> mem -> iblock -> regset -> mem -> option final -> Prop :=
  | exec_final rs m fin iinfo: iblock_istep sp bc rs m (BF fin iinfo) rs m (Some fin)
  | exec_nop rs m oiinfo: iblock_istep sp bc rs m (Bnop oiinfo) rs m None
  | exec_op rs m op args res v iinfo
     (EVAL: eval_operation ge sp op rs##args m = Some v)
     : iblock_istep sp bc rs m (Bop op args res iinfo) (rs#res <- v) m None
  | exec_load rs m trap chunk addr args aaddr dst v iinfo
     (LOAD: has_loaded sp bc rs m chunk addr args aaddr v trap)
     : iblock_istep sp bc rs m (Bload trap chunk addr args aaddr dst iinfo) (rs#dst <- v) m None
  | exec_store rs m chunk addr args sinfo src a m' iinfo
     (EVAL: eval_addressing ge sp addr rs##args = Some a)
     (VMATCH: vmatch_opt bc a (store_aaddr sinfo))
     (STORE: Mem.storev chunk m a rs#src = Some m')
     : iblock_istep sp bc rs m (Bstore chunk addr args sinfo src iinfo) rs m' None
  | exec_seq_stop rs m b1 b2 rs' m' fin
     (EXEC: iblock_istep sp bc rs m b1 rs' m' (Some fin))
     : iblock_istep sp bc rs m (Bseq b1 b2) rs' m' (Some fin)
  | exec_seq_continue rs m b1 b2 rs1 m1 rs' m' ofin
     (EXEC1: iblock_istep sp bc rs m b1 rs1 m1 None)
     (EXEC2: iblock_istep sp bc rs1 m1 b2 rs' m' ofin)
     : iblock_istep sp bc rs m (Bseq b1 b2) rs' m' ofin
  | exec_cond rs m cond args ifso ifnot b rs' m' ofin iinfo
     (EVAL: eval_condition cond rs##args m = Some b)
     (EXEC: iblock_istep sp bc rs m (if b then ifso else ifnot) rs' m' ofin)
     : iblock_istep sp bc rs m (Bcond cond args ifso ifnot iinfo) rs' m' ofin
  .
Local Hint Constructors iblock_istep: core.

A functional variant iblock_istep_run of iblock_istep. Lemma iblock_istep_run_equiv below provides a proof that "relation" iblock_istep is a "partial function".


Lemma Z_bounded_dec (lb : Z) (ub : Z) (P : Z -> Prop) (dec : forall m, {P m}+{~P m}):
  let PI := forall m, lb <= m < ub -> P m in
  {PI}+{~PI}.
Proof.
  intro PI.
  pose (l := Intv.elements_rec lb ub).
  assert (PI <-> Forall P l). {
    unfold PI, l; rewrite Forall_forall.
    setoid_rewrite Intv.In_elements_rec.
    reflexivity.
  }
  case (Forall_dec P dec l) as [|]; [left|right]; rewrite H; assumption.
Qed.

Lemma is_uns_dec (n : Z) (i : int):
  {is_uns n i}+{~is_uns n i}.
Proof.
  apply Z_bounded_dec; intro m.
  case (Z_ge_dec m n) as [|]. case (Int.testbit i m) as [|].
  1:right; discriminate 1; auto.
  all:left; intros; try reflexivity; exfalso; auto.
Qed.

Lemma is_sgn_dec (n : Z) (i : int):
  {is_sgn n i}+{~is_sgn n i}.
Proof.
  apply Z_bounded_dec; intro m.
  case (Z_ge_dec m (n - 1)) as [|].
  case (bool_dec (Int.testbit i m) (Int.testbit i (Int.zwordsize - 1))) as [|NEQ].
  2:right; intro; apply NEQ; auto.
  all:left; intros; auto; exfalso; auto.
Qed.

Lemma pmatch_dec bc b ofs a:
  {pmatch bc b ofs a}+{~pmatch bc b ofs a}.
Proof.
  case (bc b) eqn:BC, a;
    try solve [right; inversion 1; congruence | left; constructor; auto];
    try (case (Ptrofs.eq_dec ofs ofs0) as [|]; [subst ofs0|]);
    try (case (Pos.eq_dec id id0) as [|]; [subst id0|]);
    solve [right; inversion 1; congruence | left; econstructor; eauto; congruence].
Qed.

Lemma vmatch_dec bc v a:
  {vmatch bc v a}+{~vmatch bc v a}.
Proof.
  case v, a; try solve [right; inversion 1 | left; constructor].
  - case (Int.eq_dec i n) as [<-|]; [left; constructor | right; inversion 1; congruence].
  - case (Z_le_dec 0 n) as [|].
    case (is_uns_dec n i) as [|].
    1:left; constructor; auto. all:right; inversion 1; auto.
  - case (Z_lt_dec 0 n) as [|].
    case (is_sgn_dec n i) as [|].
    1:left; constructor; auto. all:right; inversion 1; auto.
  - case (Int64.eq_dec i n) as [<-|]; [left; constructor | right; inversion 1; congruence].
  - case (Floats.Float.eq_dec f f0) as [<-|]; [left; constructor | right; inversion 1; congruence].
  - case (Floats.Float32.eq_dec f f0) as [<-|]; [left; constructor | right; inversion 1; congruence].
  - case (pmatch_dec bc b i p) as [|]; [left; constructor | right; inversion 1]; auto.
  - case (pmatch_dec bc b i p) as [|]; [left; constructor | right; inversion 1]; auto.
Qed.

Definition vmatch_opt_dec bc v a:
  {vmatch_opt bc v a}+{~vmatch_opt bc v a}.
Proof.
  case bc as [bc|]; simpl.
  - apply vmatch_dec.
  - left; constructor.
Defined.

Fixpoint iblock_istep_run sp bc ib rs m: option outcome :=
  match ib with
  | BF fin _ =>
      Some {| _rs := rs; _m := m; _fin := Some fin |}
  | Bnop _ =>
      Some {| _rs := rs; _m:= m; _fin := None |}
  | Bop op args res _ =>
      SOME v <- eval_operation ge sp op rs##args m IN
      Some {| _rs := rs#res <- v; _m:= m; _fin := None |}
  | Bload TRAP chunk addr args aaddr dst _ =>
      SOME a <- eval_addressing ge sp addr rs##args IN
      ASSERT (vmatch_opt_dec bc a aaddr) IN
      SOME v <- Mem.loadv chunk m a IN
      Some {| _rs := rs#dst <- v; _m:= m; _fin := None |}
  | Bload NOTRAP chunk addr args aaddr dst _ =>
      match eval_addressing ge sp addr rs##args with
      | Some a =>
          match vmatch_opt_dec bc a aaddr, Mem.loadv chunk m a with
          | left _, Some v => Some {| _rs := rs#dst <- v; _m:= m; _fin := None |}
          | _, _ =>
              Some {| _rs := rs#dst <- Vundef; _m:= m; _fin := None |}
          end
      | None =>
          Some {| _rs := rs#dst <- Vundef; _m:= m; _fin := None |}
      end
  | Bstore chunk addr args sinfo src _ =>
      SOME a <- eval_addressing ge sp addr rs##args IN
      ASSERT (vmatch_opt_dec bc a (store_aaddr sinfo)) IN
      SOME m' <- Mem.storev chunk m a rs#src IN
      Some {| _rs := rs; _m:= m'; _fin := None |}
  | Bseq b1 b2 =>
      SOME out1 <- iblock_istep_run sp bc b1 rs m IN
      match out1.(_fin) with
      | None => iblock_istep_run sp bc b2 out1.(_rs) out1.(_m)
      | _ => Some out1
      end
  | Bcond cond args ifso ifnot _ =>
      SOME b <- eval_condition cond rs##args m IN
      iblock_istep_run sp bc (if b then ifso else ifnot) rs m
  end.

Lemma iblock_istep_run_equiv_load sp bc ib v rs rs' m trap chunk addr args aaddr dst iinfo ofin
  (IB: ib = (Bload trap chunk addr args aaddr dst iinfo))
  (RS': rs' = rs # dst <- v)
  (LOAD: has_loaded sp bc rs m chunk addr args aaddr v trap):
  iblock_istep sp bc rs m ib rs' m ofin <->
  iblock_istep_run sp bc ib rs m = Some {| _rs := rs'; _m := m; _fin := ofin |}.
Proof.
  subst; split; intro H.
  - inv H; clear LOAD0.
    simpl; inv LOAD.
    + rewrite EVAL, LOAD0; repeat autodestruct.
    + repeat autodestruct. intro LOAD; exfalso.
      rewrite (LOAD0 _ eq_refl) in LOAD; congruence.
  - replace ofin with (@None final); auto.
    revert H; simpl; repeat autodestruct.
Qed.
Local Hint Resolve iblock_istep_run_equiv_load: core.

Lemma iblock_istep_run_equiv sp bc rs m ib rs' m' ofin:
  iblock_istep sp bc rs m ib rs' m' ofin <->
  iblock_istep_run sp bc ib rs m = Some {| _rs := rs'; _m:= m'; _fin := ofin |}.
Proof.
  constructor.
  - induction 1; try solve [eapply iblock_istep_run_equiv_load; eauto];
    simpl; repeat autodestruct; try_simplify_someHyps.
  - revert rs m rs' m' ofin.
    induction ib; intros until ofin; intro R; generalize R;
    simpl; repeat autodestruct; try solve [try_simplify_someHyps]; intros;
    try (injection R0 as ?; subst).
    all:try solve [apply exec_load; eapply has_loaded_normal; eauto
                  |eapply iblock_istep_run_equiv_load; eauto; apply has_loaded_default; congruence].
    + (* Bseq *) destruct o; try_simplify_someHyps; subst; eauto.
Qed.

Local Open Scope list_scope.

Inductive final_step stack f sp rs m bc: final -> trace -> state -> Prop :=
  | exec_Bgoto pc:
      final_step stack f sp rs m bc (Bgoto pc) E0
                 (State stack f sp pc rs m bc)
  | exec_Breturn or stk m' bc':
      sp = (Vptr stk Ptrofs.zero) ->
      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
      final_step stack f sp rs m bc (Breturn or)
        E0 (Returnstate stack (regmap_optget or Vundef rs) m' bc')
  | exec_Bcall sig ros args res pc' fd bc':
      find_function ros rs = Some fd ->
      funsig fd = sig ->
      final_step stack f sp rs m bc (Bcall sig ros args res pc')
        E0 (Callstate (Stackframe res f sp pc' rs :: stack) fd rs##args m bc')
  | exec_Btailcall sig ros args stk m' bc' fd:
      find_function ros rs = Some fd ->
      funsig fd = sig ->
      sp = (Vptr stk Ptrofs.zero) ->
      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
      final_step stack f sp rs m bc (Btailcall sig ros args)
        E0 (Callstate stack fd rs##args m' bc')
  | exec_Bbuiltin ef args res pc' vargs t vres m' bc':
      eval_builtin_args ge (fun r => rs#r) sp m args vargs ->
      external_call ef ge vargs m t vres m' ->
      final_step stack f sp rs m bc (Bbuiltin ef args res pc')
         t (State stack f sp pc' (regmap_setres res vres rs) m' bc')
  | exec_Bjumptable arg tbl n pc':
      rs#arg = Vint n ->
      list_nth_z tbl (Int.unsigned n) = Some pc' ->
      final_step stack f sp rs m bc (Bjumptable arg tbl)
        E0 (State stack f sp pc' rs m bc)
.

big-step execution of one iblock
Definition iblock_step stack f sp rs m bc ib t s: Prop :=
  exists rs' m' fin,
    iblock_istep sp (ASSERT strict IN Some bc) rs m ib rs' m' (Some fin) /\
    final_step stack f sp rs' m' bc fin t s.

The transitions are presented as an inductive predicate step ge st1 t st2, where ge is the global environment, st1 the initial state, st2 the final state, and t the trace of system calls performed during this transition.

Inductive step: state -> trace -> state -> Prop :=
  | exec_iblock stack ib f sp pc rs m bc t s
      (PC: (fn_code f)!pc = Some ib)
      (STEP: iblock_step stack f sp rs m bc ib.(entry) t s)
      :step (State stack f sp pc rs m bc) t s
  | exec_function_internal stack f args m bc m' bc' stk
      (ALLOC: Mem.alloc m 0 f.(fn_stacksize) = (m', stk))
      :step (Callstate stack (Internal f) args m bc)
         E0 (State stack
                  f
                  (Vptr stk Ptrofs.zero)
                  f.(fn_entrypoint)
                  (init_regs args f.(fn_params))
                  m' bc')
  | exec_function_external stack ef args res t m bc m' bc'
      (EXTCALL: external_call ef ge args m t res m')
      :step (Callstate stack (External ef) args m bc)
          t (Returnstate stack res m' bc')
  | exec_return stack res f sp pc rs vres m bc bc'
      :step (Returnstate (Stackframe res f sp pc rs :: stack) vres m bc)
         E0 (State stack f sp pc (rs#res <- vres) m bc')
.

End RELSEM.

Execution of whole programs are described as sequences of transitions from an initial state to a final state. An initial state is a Callstate corresponding to the invocation of the ``main'' function of the program without arguments and with an empty call stack.

Inductive initial_state (p: program): state -> Prop :=
  | initial_state_intro: forall b f m0 bc0,
      let ge := Genv.globalenv p in
      Genv.init_mem p = Some m0 ->
      Genv.find_symbol ge p.(prog_main) = Some b ->
      Genv.find_funct_ptr ge b = Some f ->
      funsig f = signature_main ->
      initial_state p (Callstate nil f nil m0 bc0).

A final state is a Returnstate with an empty call stack.

Inductive final_state: state -> int -> Prop :=
  | final_state_intro: forall r m bc,
      final_state (Returnstate nil (Vint r) m bc) r.

Definition sem (strict : bool) (p: program) :=
  Semantics (step strict) (initial_state p) final_state (Genv.globalenv p).

Auxiliary general purpose functions on BTL


Definition is_goto (ib: iblock): bool :=
  match ib with
  | BF (Bgoto _) _ => true
  | _ => false
  end.

Simulation of the strict semantics by the lax one


Additionnal lessdef lemmas

Inductive opt_lessdef [A] : option A -> option A -> Prop :=
  | opt_lessdef_refl x: opt_lessdef x x
  | opt_lessdef_None x: opt_lessdef None x.

Lemma has_loaded_lessdef ge sp bc bc' rs rs' m m' chk addr args aaddr v trap
  (LD_BC: opt_lessdef bc' bc)
  (LD_RS: regs_lessdef rs rs')
  (LD_M: Mem.extends m m')
  (LOAD: has_loaded ge sp bc rs m chk addr args aaddr v trap):
  exists v',
    has_loaded ge sp bc' rs' m' chk addr args aaddr v' trap /\
    Val.lessdef v v'.
Proof.
  inv LOAD.
  - eapply eval_addressing_lessdef in EVAL as (a' & EVAL' & LD_A); eauto using regs_lessdef_regs.
    assert (a' = a) by (inv LD_A; [reflexivity|discriminate LOAD0]); subst a'.
    eapply Mem.loadv_extends in LOAD0 as (v' & LOAD0' & LD_V); eauto.
    exists v'; split; auto.
    apply has_loaded_normal with a; auto.
    destruct bc' as [bc'|]; [|constructor]; inv LD_BC; auto.
  - assert (exists v', has_loaded ge sp bc' rs' m' chk addr args aaddr v' NOTRAP) as [v']. {
      clear.
      case (eval_addressing ge sp addr (rs' ## args)) as [a'|] eqn:EVAL.
      case (vmatch_opt_dec bc' a' aaddr) as [|].
      case (Mem.loadv chk m' a') as [v'|] eqn:LOAD.
      2-4:exists Vundef; apply has_loaded_default; congruence.
      exists v'; apply has_loaded_normal with a'; auto.
    }
    exists v'; auto.
Qed.

Lemma regmap_optget_lessdef r d d' rs rs'
  (LD_D : Val.lessdef d d')
  (LD_RS : regs_lessdef rs rs'):
  Val.lessdef (regmap_optget r d rs) (regmap_optget r d' rs').
Proof.
  case r as [r|]; simpl; eauto.
Qed.

Lemma find_function_lessdef ge ros rs rs' fd
  (LD_RS : regs_lessdef rs rs'):
  find_function ge ros rs = Some fd ->
  find_function ge ros rs' = Some fd.
Proof.
  unfold find_function.
  case ros as [r|symb]; auto.
  specialize (LD_RS r).
  unfold Genv.find_funct; autodestruct.
  inv LD_RS; auto.
Qed.

Lemma init_regs_lessdef vs vs' rs
  (LD_VS : Val.lessdef_list vs vs'):
  regs_lessdef (init_regs vs rs) (init_regs vs' rs).
Proof.
  revert vs vs' LD_VS; induction rs; intros ? ?; simpl.
  - constructor.
  - case vs; inversion 1; [constructor|].
    intro r; rewrite !Regmap.gsspec; case peq as [|]; auto.
    apply IHrs; assumption.
Qed.


Module Strict_to_Lax.
  
  Lemma transf_iblock_istep ge sp bc rs0 rs0' m0 m0' ib rs1 m1 fin
    (STEP : iblock_istep ge sp bc rs0 m0 ib rs1 m1 fin)
    (M_RS : regs_lessdef rs0 rs0')
    (M_M : Mem.extends m0 m0'):
    exists rs1' m1',
      iblock_istep ge sp None rs0' m0' ib rs1' m1' fin /\
      regs_lessdef rs1 rs1' /\ Mem.extends m1 m1'.
  Proof.
    Local Ltac slv :=
      solve [eexists _, _; split; [econstructor|split]; simpl; eauto using set_reg_lessdef].
    revert rs0' m0' M_RS M_M; induction STEP; intros ? ? ? ?.
    1,2:slv.
    - eapply eval_operation_lessdef in EVAL as (v' & ? & ?); eauto using regs_lessdef_regs. slv.
    - eapply has_loaded_lessdef with (bc':=None) in LOAD as (v' & ? & ?); eauto using opt_lessdef_None. slv.
    - eapply eval_addressing_lessdef in EVAL as (a' & ? & ?); eauto using regs_lessdef_regs.
      eapply Mem.storev_extends in STORE as (m1' & ? & ?); eauto. slv.
    - ecase IHSTEP as (rs1' & m1' & ? & ? & ?); eauto. slv.
    - ecase IHSTEP1 as (rs1' & m1' & ? & ? & ?); eauto.
      ecase IHSTEP2 as (rs2' & m2' & ? & ? & ?); eauto. slv.
    - eapply eval_condition_lessdef in EVAL; eauto using regs_lessdef_regs.
      ecase IHSTEP as (rs1' & m1' & ? & ? & ?); eauto. slv.
  Qed.

  Inductive match_stackframe: stackframe -> stackframe -> Prop :=
    | match_stackframe_intro
        sp res f pc rs rs'
        (M_RS: regs_lessdef rs rs')
        : match_stackframe (Stackframe res f sp pc rs)
                           (Stackframe res f sp pc rs').

  Definition match_stackframes : list stackframe -> list stackframe -> Prop :=
    list_forall2 match_stackframe.

  Inductive match_states: state -> state -> Prop :=
    | match_states_norm stk stk' f pc sp rs rs' m m' bc
        (STACK : match_stackframes stk stk')
        (M_RS : regs_lessdef rs rs')
        (M_M : Mem.extends m m')
        : match_states (State stk f sp pc rs m bc)
                       (State stk' f sp pc rs' m' bc)
    | match_states_call stk stk' f args args' m m' bc
        (STACK : match_stackframes stk stk')
        (M_ARGS : Val.lessdef_list args args')
        (M_M : Mem.extends m m')
        : match_states (Callstate stk f args m bc)
                       (Callstate stk' f args' m' bc)
    | match_states_return stk stk' v v' m m' bc
        (STACK : match_stackframes stk stk')
        (M_V : Val.lessdef v v')
        (M_M : Mem.extends m m')
        : match_states (Returnstate stk v m bc)
                       (Returnstate stk' v' m' bc).
  
  Lemma transf_final_step ge stk stk' f sp rs rs' m m' bc fin t s
    (STEP : final_step ge stk f sp rs m bc fin t s)
    (M_STK : match_stackframes stk stk')
    (M_RS : regs_lessdef rs rs')
    (M_M : Mem.extends m m'):
    exists s',
      final_step ge stk' f sp rs' m' bc fin t s' /\
      match_states s s'.
  Proof.
    Local Ltac slv ::=
      solve [eexists _; split; econstructor;
             repeat first [apply list_forall2_cons | apply match_stackframe_intro];
             eauto using set_res_lessdef, regs_lessdef_regs, regmap_optget_lessdef].
    inv STEP.
    - slv.
    - exploit Mem.free_parallel_extends; eauto. intros (m1' & ? & ?); slv.
    - exploit find_function_lessdef; eauto. slv.
    - exploit find_function_lessdef; eauto. intro.
      exploit Mem.free_parallel_extends; eauto. intros (m1' & ? & ?); slv.
    - exploit eval_builtin_args_lessdef. 1:apply M_RS. all:eauto. intros (vargs' & ? & ?).
      exploit external_call_mem_extends; eauto using regs_lessdef_regs. intros (vres' & m1' & ? & ? & ? & _).
      slv.
    - specialize (M_RS arg) as LD_ARG; rewrite H in LD_ARG; inv LD_ARG.
      slv.
  Qed.

  Lemma transf_step strict ge s0 s0' t s1
    (STEP : step strict ge s0 t s1)
    (MATCH : match_states s0 s0'):
    exists s1', step false ge s0' t s1' /\ match_states s1 s1'.
  Proof.
    inv STEP; inv MATCH.
    - case STEP0 as (rs1 & m1 & fin & ISTEP & FIN).
      eapply transf_iblock_istep in ISTEP as (rs1' & m1' & ISTEP & ? & ?); eauto.
      eapply transf_final_step in FIN as (s1' & ? & ?); eauto.
      exists s1'; split; auto.
      eapply exec_iblock; eauto.
      exists rs1', m1', fin; tauto.
    - eapply Mem.alloc_extends in ALLOC as (m1' & ? & ?); try apply Z.le_refl; eauto.
      eexists _; split; econstructor; eauto using init_regs_lessdef.
    - eapply external_call_mem_extends in EXTCALL as (vres' & m1' & ? & ? & ? & _); eauto.
      eexists _; split; econstructor; eauto.
    - inv STACK; inv H1.
      eexists _; split; econstructor; eauto using set_reg_lessdef.
  Qed.

  Lemma transf_program strict p:
    forward_simulation (sem strict p) (sem false p).
  Proof.
    apply forward_simulation_step with match_states; simpl; eauto using transf_step.
    - (* initial_state *)
      intros s INI; exists s; split; auto.
      inv INI; constructor; eauto using Mem.extends_refl; constructor.
    - (* final_state *)
      intros s s' r MATCH FINAL; inv FINAL; inv MATCH; inv STACK; inv M_V.
      constructor.
  Qed.

End Strict_to_Lax.


Other lemmas


Lemma find_function_match [match_fundef match_varinfo ctx p tp]
  (TRANSL: @Linking.match_program_gen program fundef unit fundef unit _ match_fundef match_varinfo ctx p tp)
  (fp : reg + ident) rs f:
  find_function (Genv.globalenv p) fp rs = Some f ->
  exists (cunit : program) (tf : fundef),
    find_function (Genv.globalenv tp) fp rs = Some tf /\
    match_fundef cunit f tf /\
    Linking.linkorder cunit ctx.
Proof.
  unfold find_function; case fp as [|].
  + intro F0. apply (Genv.find_funct_match TRANSL) in F0 as (cu & tf & ?).
    exists cu, tf; tauto.
  + rewrite (Genv.find_symbol_match TRANSL); case Genv.find_symbol as [|]; [|discriminate 1].
    intro F0. apply (Genv.find_funct_ptr_match TRANSL) in F0 as (cu & tf & ?).
    exists cu, tf; tauto.
Qed.