Module RTLtoBTLproof

Require Import Coqlib Maps.
Require Import AST Values Events Globalenvs Smallstep.
Require Import RTL Op OptionMonad.

Require Import Errors Linking RTLtoBTL.


Module RTLtoBTL_Translationproof (B: RTLtoBTL_TranslationOracle).

Include RTLtoBTL_Translation B.

Normalization of BTL iblock for simulation of RTL Below normRTL normalizes the representation of BTL blocks, in order to represent them as sequences of RTL instructions. This eases the expression of properties in the below proof.


Definition is_RTLatom (ib: iblock): bool :=
 match ib with
 | Bseq _ _ | Bcond _ _ _ _ _ | Bnop None => false
 | _ => true
 end.

Definition is_RTLbasic (ib: iblock): bool :=
 match ib with
 | Bseq _ _ | Bcond _ _ _ _ _ | Bnop None | BF _ _ => false
 | _ => true
 end.

The strict is_normRTL property specifying the ouput of normRTL below
Inductive is_normRTL: iblock -> Prop :=
  | norm_Bseq ib1 ib2:
     is_RTLbasic ib1 = true ->
     is_normRTL ib2 ->
     is_normRTL (Bseq ib1 ib2)
  | norm_Bcond cond args ib1 ib2 i:
     is_normRTL ib1 ->
     is_normRTL ib2 ->
     is_normRTL (Bcond cond args ib1 ib2 i)
  | norm_others ib:
     is_RTLatom ib = true ->
     is_normRTL ib
     .
Local Hint Constructors is_normRTL: core.

Weaker version allowing for trailing Bnop None.
Inductive is_wnormRTL: iblock -> Prop :=
  | wnorm_Bseq ib1 ib2:
     is_RTLbasic ib1 = true ->
     (ib2 <> Bnop None -> is_wnormRTL ib2) ->
     is_wnormRTL (Bseq ib1 ib2)
  | wnorm_Bcond cond args ib1 ib2 iinfo:
     (ib1 <> Bnop None -> is_wnormRTL ib1) ->
     (ib2 <> Bnop None -> is_wnormRTL ib2) ->
     is_wnormRTL (Bcond cond args ib1 ib2 iinfo)
  | wnorm_others ib:
     is_RTLatom ib = true ->
     is_wnormRTL ib
     .
Local Hint Constructors is_wnormRTL: core.

Fixpoint normRTLrec (ib: iblock) (k: iblock): iblock :=
  match ib with
  | Bseq ib1 ib2 => normRTLrec ib1 (normRTLrec ib2 k)
  | Bcond cond args ib1 ib2 iinfo =>
     Bcond cond args (normRTLrec ib1 k) (normRTLrec ib2 k) iinfo
  | BF fin iinfo => BF fin iinfo
  | Bnop None => k
  | ib => Bseq ib k
  end.

Definition normRTL ib := normRTLrec ib (Bnop None).

Lemma normRTLrec_wcorrect ib: forall k,
  (k <> (Bnop None) -> is_wnormRTL k) ->
  (normRTLrec ib k) <> Bnop None ->
  is_wnormRTL (normRTLrec ib k).
Proof.
  induction ib; simpl; intros; repeat autodestruct; auto.
Qed.

Lemma normRTL_wcorrect ib:
 (normRTL ib) <> Bnop None ->
 is_wnormRTL (normRTL ib).
Proof.
 intros; eapply normRTLrec_wcorrect; eauto.
Qed.

Lemma is_join_opt_None {A} (opc1 opc2: option A):
  is_join_opt opc1 opc2 None -> opc1 = None /\ opc2 = None.
Proof.
  intros X. inv X; auto.
Qed.

Lemma match_iblock_None_not_Bnop dupmap cfg cfg' indirect isfst pc ib:
  match_iblock dupmap cfg cfg' indirect isfst pc ib None -> ib <> Bnop None.
Proof.
  intros X; inv X; try congruence.
Qed.
Local Hint Resolve match_iblock_None_not_Bnop: core.

Lemma is_wnormRTL_normRTL dupmap cfg cfg' indirect ib:
  is_wnormRTL ib ->
  forall isfst pc
  (MIB: match_iblock dupmap cfg cfg' indirect isfst pc ib None),
  is_normRTL ib.
Proof.
  induction 1; simpl; intros; auto; try (inv MIB); eauto.
  (* Bcond *)
  destruct (is_join_opt_None opc1 opc2); subst; eauto.
  econstructor; eauto.
Qed.

Local Hint Constructors iblock_istep: core.
Lemma normRTLrec_iblock_istep_correct tge sp ib rs0 m0 rs1 m1 ofin1:
  forall (ISTEP: iblock_istep tge sp None rs0 m0 ib rs1 m1 ofin1)
  k ofin2 rs2 m2
  (CONT: match ofin1 with
         | None => iblock_istep tge sp None rs1 m1 k rs2 m2 ofin2
         | Some fin1 => rs2=rs1 /\ m2=m1 /\ ofin2=Some fin1
         end),
  iblock_istep tge sp None rs0 m0 (normRTLrec ib k) rs2 m2 ofin2.
Proof.
  induction 1; simpl; intuition subst; eauto.
  - (* Bnop *) autodestruct; eauto.
  - (* Bop *) repeat econstructor; eauto.
  - (* Bload *) inv LOAD.
    + repeat econstructor; eauto.
    + do 2 (econstructor; eauto).
      eapply has_loaded_default; eauto.
  - (* Bcond *) repeat econstructor; eauto.
    destruct ofin; intuition subst;
    destruct b; eapply IHISTEP; eauto.
Qed.

Lemma normRTL_iblock_istep_correct tge sp ib rs0 m0 rs1 m1 ofin:
  iblock_istep tge sp None rs0 m0 ib rs1 m1 ofin ->
  iblock_istep tge sp None rs0 m0 (normRTL ib) rs1 m1 ofin.
Proof.
  intros; eapply normRTLrec_iblock_istep_correct; eauto.
  destruct ofin; simpl; auto.
Qed.

Lemma normRTLrec_iblock_istep_run_None tge sp ib:
  forall rs0 m0 k
  (CONT: match iblock_istep_run tge sp None ib rs0 m0 with
         | Some (out rs1 m1 ofin) =>
             ofin = None /\
             iblock_istep_run tge sp None k rs1 m1 = None
         | _ => True
     end),
  iblock_istep_run tge sp None (normRTLrec ib k) rs0 m0 = None.
Proof.
  induction ib; simpl; intros; subst; intuition (try discriminate).
  - (* Bnop *)
    intros. autodestruct; auto.
  - (* Bop *)
    intros; repeat autodestruct; simpl; intuition congruence.
  - (* Bload *)
    intros; repeat autodestruct; simpl; intuition congruence.
  - (* Bstore *)
    intros; repeat autodestruct; simpl; intuition congruence.
  - (* Bseq *)
    intros.
    eapply IHib1; eauto.
    autodestruct; simpl in *; destruct o; simpl in *; intuition eauto.
    + destruct _fin; intuition eauto.
    + destruct _fin; intuition congruence || eauto.
  - (* Bcond *)
    intros; repeat autodestruct; simpl; intuition congruence || eauto.
Qed.

Lemma normRTL_preserves_iblock_istep_run_None tge sp ib:
  forall rs m, iblock_istep_run tge sp None ib rs m = None
  -> iblock_istep_run tge sp None (normRTL ib) rs m = None.
Proof.
  intros; eapply normRTLrec_iblock_istep_run_None; eauto.
  rewrite H; simpl; auto.
Qed.

Lemma normRTL_preserves_iblock_istep_run tge sp ib:
  forall rs m, iblock_istep_run tge sp None ib rs m =
  iblock_istep_run tge sp None (normRTL ib) rs m.
Proof.
  intros.
  destruct (iblock_istep_run tge sp None ib rs m) eqn:ISTEP.
  - destruct o. symmetry.
    rewrite <- iblock_istep_run_equiv in *.
    apply normRTL_iblock_istep_correct; auto.
  - symmetry.
    apply normRTL_preserves_iblock_istep_run_None; auto.
Qed.

Local Hint Constructors match_iblock: core.
Lemma normRTLrec_matchiblock_correct dupmap cfg cfg' indirect ib pc isfst:
  forall opc1
  (MIB: match_iblock dupmap cfg cfg' indirect isfst pc ib opc1) k opc2
  (CONT: match opc1 with
         | Some pc' =>
             match_iblock dupmap cfg cfg' indirect false pc' k opc2
         | None => opc2=opc1
         end),
  match_iblock dupmap cfg cfg' indirect isfst pc (normRTLrec ib k) opc2.
Proof.
  induction 1; simpl; intros; subst; eauto.
  (* Bcond *)
  intros. inv H0;
  econstructor; eauto; try econstructor.
  destruct opc0; econstructor.
Qed.

Lemma normRTL_matchiblock_correct dupmap cfg cfg' indirect ib pc isfst opc:
  match_iblock dupmap cfg cfg' indirect isfst pc ib opc ->
  match_iblock dupmap cfg cfg' indirect isfst pc (normRTL ib) opc.
Proof.
  intros.
  eapply normRTLrec_matchiblock_correct; eauto.
  destruct opc; simpl; auto.
Qed.

Lemma is_normRTL_correct dupmap cfg cfg' indirect ib pc
  (MI : match_iblock dupmap cfg cfg' indirect true pc ib None):
  is_normRTL (normRTL ib).
Proof.
  exploit normRTL_matchiblock_correct; eauto.
  intros MI2.
  eapply is_wnormRTL_normRTL; eauto.
  apply normRTL_wcorrect; try congruence.
  inv MI2; discriminate.
Qed.
Local Hint Resolve normRTL_matchiblock_correct is_normRTL_correct: core.

Matching relation on functions


Definition match_function dupmap (f:RTL.function) (tf: BTL.function): Prop :=
  BTLmatchRTL.match_function dupmap tf f true.

Inductive match_fundef: RTL.fundef -> BTL.fundef -> Prop :=
  | match_Internal dupmap f f': match_function dupmap f f' -> match_fundef (Internal f) (Internal f')
  | match_External ef: match_fundef (External ef) (External ef).

Inductive match_stackframes: RTL.stackframe -> BTL.stackframe -> Prop :=
  | match_stackframe_intro
      dupmap indirect res f sp pc rs f' pc'
      (TRANSF: match_function dupmap f f')
      (MN: match_nodes dupmap (fn_code f') indirect pc' pc)
      : match_stackframes (RTL.Stackframe res f sp pc rs) (BTL.Stackframe res f' sp pc' rs).

Lemma transf_function_correct f f':
  transf_function f = OK f' -> exists dupmap, match_function dupmap f f'.
Proof.
  unfold transf_function; unfold bind. repeat autodestruct.
  intros VER _ _ _ X. inv X. eexists.
  eapply verify_function_correct; simpl; eauto.
Qed.

Lemma transf_fundef_correct f f':
  transf_fundef f = OK f' -> match_fundef f f'.
Proof.
  intros TRANSF; destruct f; simpl; monadInv TRANSF.
  + exploit transf_function_correct; eauto.
    intros (dupmap & MATCH_F).
    eapply match_Internal; eauto.
  + eapply match_External.
Qed.

Definition match_prog (p: RTL.program) (tp: program) :=
  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.

Lemma transf_program_match:
  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
Proof.
  intros. eapply match_transform_partial_program_contextual; eauto.
Qed.

Section BTL_SIMULATES_RTL.

Variable prog: RTL.program.
Variable tprog: program.

Hypothesis TRANSL: match_prog prog tprog.

Let ge := Genv.globalenv prog.
Let tge := Genv.globalenv tprog.

Local Open Scope nat_scope.

Match relation from a RTL state to a BTL state The "option iblock" parameter represents the current BTL execution state. Thus, each RTL single step is symbolized by a new BTL "option iblock" starting at the equivalent PC, which is always pcB below. The entry of this block is obtained by the ATpcB hypothesis. The simulation diagram for match_states_intro is as follows:

      If there is no synthetic node, then pcX = pcB
      else, pcX is pointing to the synthetic node and pcB to the original one.

        RTL state       match_states_intro        BTL state
      [pcR0,rs0,m0] --------------------------- [pcX,rs0,m0]
           |                                         |
           |                                         | BTL_RUN of the synth node
           |                                         | if it exists
           |                                         |
           |                                    [pcB,rs0,m0]
           |                                         |
   RTL_RUN | *E0                                     | BTL_RUN of the orig node
           |                                         |
           |                   MIB                   |
      [pcR1,rs1,m1] ------------------------------- [ib]


Inductive match_strong_state dupmap st st' f f' sp rs1 m1 rs0 m0 pcX pcB pcR0 pcR1 ib ib0 isfst: Prop :=
  | match_strong_state_intro indirect
      (STACKS: list_forall2 match_stackframes st st')
      (TRANSF: match_function dupmap f f')
      (MSN: match_synthetic_node dupmap (fn_code f') indirect pcX pcB pcR0)
      (ATpcB: (fn_code f')!pcB = Some ib0)
      (MIB: match_iblock dupmap (fn_code f') (RTL.fn_code f) indirect isfst pcR1 ib None)
      (IS_EXPD: is_normRTL ib)
      (RTL_RUN: star RTL.step ge (RTL.State st f sp pcR0 rs0 m0) E0 (RTL.State st f sp pcR1 rs1 m1))
      (BTL_RUN: iblock_istep_run tge sp None ib0.(entry) rs0 m0 = iblock_istep_run tge sp None ib rs1 m1)
      : match_strong_state dupmap st st' f f' sp rs1 m1 rs0 m0 pcX pcB pcR0 pcR1 ib ib0 isfst
  .

Inductive match_states: (option iblock) -> RTL.state -> state -> Prop :=
  | match_states_intro
      dupmap st st' f f' sp rs1 m1 rs0 m0 bc pcX pcB pcR0 pcR1 ib ib0 isfst
      (MSTRONG: match_strong_state dupmap st st' f f' sp rs1 m1 rs0 m0 pcX pcB pcR0 pcR1 ib ib0 isfst)
      (NGOTO: is_goto ib = false)
      : match_states (Some ib) (RTL.State st f sp pcR1 rs1 m1) (State st' f' sp pcX rs0 m0 bc)
  | match_states_call
      st st' f f' args m bc
      (STACKS: list_forall2 match_stackframes st st')
      (TRANSF: match_fundef f f')
      : match_states None (RTL.Callstate st f args m) (Callstate st' f' args m bc)
  | match_states_return
      st st' v m bc
      (STACKS: list_forall2 match_stackframes st st')
      : match_states None (RTL.Returnstate st v m) (Returnstate st' v m bc)
  .

Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
Proof.
  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
Qed.

Lemma senv_preserved: Senv.equiv ge tge.
Proof.
  eapply (Genv.senv_match TRANSL).
Qed.

Lemma functions_translated (v: val) (f: RTL.fundef):
  Genv.find_funct ge v = Some f ->
  exists tf cunit, transf_fundef f = OK tf /\ Genv.find_funct tge v = Some tf /\ linkorder cunit prog.
Proof.
  intros. exploit (Genv.find_funct_match TRANSL); eauto.
  intros (cu & tf & A & B & C).
  repeat eexists; intuition eauto.
  + unfold incl; auto.
  + eapply linkorder_refl.
Qed.

Lemma function_ptr_translated v f:
  Genv.find_funct_ptr ge v = Some f ->
  exists tf,
  Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
Proof.
  intros.
  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
Qed.

Lemma function_sig_translated f tf: transf_fundef f = OK tf -> funsig tf = RTL.funsig f.
Proof.
  intros H; apply transf_fundef_correct in H; destruct H; simpl; eauto.
  erewrite preserv_fnsig; eauto.
Qed.

Lemma transf_initial_states s1:
  RTL.initial_state prog s1 ->
  exists ib s2, initial_state tprog s2 /\ match_states ib s1 s2.
Proof.
  intros. inv H.
  exploit function_ptr_translated; eauto. intros (tf & FIND & TRANSF).
  eexists. eexists. split.
  - econstructor; eauto.
    + eapply (Genv.init_mem_transf_partial TRANSL); eauto.
    + replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved. eauto.
      symmetry. eapply match_program_main. eauto.
    + erewrite function_sig_translated; eauto.
  - constructor; eauto.
    constructor.
    apply transf_fundef_correct; auto.
Unshelve. exact dummy_block_classification.
Qed.

Lemma transf_final_states ib s1 s2 r:
  match_states ib s1 s2 -> RTL.final_state s1 r -> final_state s2 r.
Proof.
  intros. inv H0. inv H. inv STACKS. constructor.
Qed.

Lemma find_function_preserved ri rs0 fd
  (FIND : RTL.find_function ge ri rs0 = Some fd)
  : exists fd', find_function tge ri rs0 = Some fd'
                /\ transf_fundef fd = OK fd'.
Proof.
  pose symbols_preserved as SYMPRES.
  destruct ri.
  + simpl in FIND; apply functions_translated in FIND.
    destruct FIND as (tf & cunit & TFUN & GFIND & LO).
    eexists; split. eauto. assumption.
  + simpl in FIND. destruct (Genv.find_symbol _ _) eqn:GFS; try discriminate.
    apply function_ptr_translated in FIND. destruct FIND as (tf & GFF & TF).
    eexists; split. simpl. rewrite symbols_preserved.
    rewrite GFS. eassumption. assumption.
Qed.

Representing an intermediate BTL state We keep a measure of code that remains to be executed with the omeasure type defined below. Intuitively, each RTL step corresponds to either - a single BTL step if we are on the last instruction of the block - no BTL step (as we use a "big step" semantics) but a change in the measure which represents the new intermediate state of the BTL code
Fixpoint measure ib: nat :=
  match ib with
  | Bseq ib1 ib2
  | Bcond _ _ ib1 ib2 _ => measure ib1 + measure ib2
  | ib => 1
  end.

Definition omeasure (oib: option iblock): nat :=
 match oib with
 | None => 0
 | Some ib => measure ib
 end.

Remark measure_pos: forall ib,
  measure ib > 0.
Proof.
  induction ib; simpl; auto; lia.
Qed.

Lemma match_iblock_true_isnt_goto dupmap cfg cfg' indirect pc ib opc:
  match_iblock dupmap cfg cfg' indirect true pc ib opc ->
  is_goto ib = false.
Proof.
  intros MIB; inversion MIB as [d1 d2 d3 d4 d5 H H0| | | | | | | |]; subst; simpl; try congruence.
  inv H0; congruence.
Qed.

Local Hint Resolve match_iblock_true_isnt_goto normRTL_preserves_iblock_istep_run star_refl star_right: core.
Local Hint Constructors match_strong_state RTL.step: core.

At entry in a block: we init match_states on normRTL to normalize the block. The "direct" lemma match the RTL state on pcR to ib, the corresponding iblock starting at pcB, and the "indirect" one match pcR to the synthetic node at pcX whose successor is ib at pcB.

Lemma match_states_entry_direct dupmap st f sp pcR ib rs m bc st' f' pcB indirect
  (STACKS : list_forall2 match_stackframes st st')
  (TRANSF : match_function dupmap f f')
  (FN : (fn_code f') ! pcB = Some ib)
  (MI : match_iblock dupmap (fn_code f') (RTL.fn_code f) indirect true pcR (entry ib) None)
  (DUP: dupmap ! pcB = Some pcR)
  :match_states (Some (normRTL (entry ib))) (RTL.State st f sp pcR rs m) (State st' f' sp pcB rs m bc).
Proof.
  exploit is_normRTL_correct; eauto.
  econstructor; eauto.
Qed.

Lemma match_states_entry_indirect dupmap st f sp pcX pcB pcR ib rs m bc st' f' indirect
  (STACKS : list_forall2 match_stackframes st st')
  (TRANSF : match_function dupmap f f')
  (FN : (fn_code f') ! pcB = Some ib)
  (MI : match_iblock dupmap (fn_code f') (RTL.fn_code f) indirect true pcR (entry ib) None)
  (MSN: match_synthetic_node dupmap (fn_code f') indirect pcX pcB pcR)
  :match_states (Some (normRTL (entry ib))) (RTL.State st f sp pcR rs m) (State st' f' sp pcX rs m bc).
Proof.
  inversion MSN; subst.
  - eapply match_states_entry_direct; eauto.
  - repeat (econstructor; eauto).
Qed.

Lemma list_nth_z_rev_dupmap:
  forall dupmap code indirect ln ln' (pc pc': node) val,
  list_nth_z ln val = Some pc ->
  list_forall2 (fun n' n => match_nodes dupmap code indirect n' n) ln' ln ->
  exists (pc': node),
     list_nth_z ln' val = Some pc'
  /\ match_nodes dupmap code indirect pc' pc.
Proof.
  induction ln; intros until val; intros LNZ LFA.
  - inv LNZ.
  - inv LNZ. destruct (zeq val 0) eqn:ZEQ.
    + inv H0. destruct ln'; inv LFA.
      simpl. exists n. split; auto.
    + inv LFA. simpl. rewrite ZEQ. exploit IHln. 2: eapply H0. all: eauto.
Qed.

Match strong state property Used when executing non-atomic instructions such as Bseq/Bcond(ib1,ib2). Two possible executions:

 **ib2 is a Bgoto (left side):**

      RTL state                MSS1             BTL state
     [pcR1,rs1,m1] -------------------------- [ib1,pcX,rs0,m0]
           |                                         |
           |                                         | Either one or two
           |                                         | BTL_STEP for direct and
           |                                         | indirect cases, resp.
           |                                         |
  RTL_STEP | *E0                       [ib2,pc=(Bgoto succ),rs2,m2]
           |                          /              |
           |             MSS2        /               |
           |       _________________/                | BTL_GOTO
           |      /                                  |
           |     /   GOAL: match_states              |
    [pcR2,rs2,m2] ------------------------ [ib?,pc=succ,rs2,m2]


 **ib2 is any other instruction (right side):**

See explanations of opt_simu below.


pcB is always defined in the reverse mapping.
Remark match_synthetic_node_dupmap dupmap f' indirect pcX pcB pcR0
  (MSN: match_synthetic_node dupmap (fn_code f') indirect pcX pcB pcR0)
  :dupmap ! pcB = Some pcR0.
Proof.
  inv MSN; auto.
Qed.
Hint Resolve match_synthetic_node_dupmap: core.

It always exists an iblock at pcX (which is either the real or the synthetic node).
Remark match_synthetic_node_code dupmap f f' indirect pcX pcB pcR0
  (TRANSF: match_function dupmap f f')
  (MSN: match_synthetic_node dupmap (fn_code f') indirect pcX pcB pcR0)
  :exists ib, (fn_code f') ! pcX = Some ib.
Proof.
  inv MSN; eauto.
  exploit dupmap_correct; eauto.
  intros (ib & FNC & MI); eauto.
Qed.
Hint Resolve match_synthetic_node_dupmap: core.

Main lemma for match_strong_state with both execution cases represented: - left (last instruction): one or two BTL (big-)step(s) to execute the BTL code that may start with a synthetic node; - right (internal instruction): no step but a decrease in the measure.
Lemma match_strong_state_simu
  dupmap st st' f f' sp rs2 m2 rs1 m1 rs0 m0 bc0 pcX pcB pcR0 pcR1 pcR2 isfst ib1 ib2 ib0 n t s1'
  (EQt: t=E0)
  (EQs1': s1'=(RTL.State st f sp pcR2 rs2 m2))
  (STEP : RTL.step ge (RTL.State st f sp pcR1 rs1 m1) t s1')
  (MSS1 : match_strong_state dupmap st st' f f' sp rs1 m1 rs0 m0 pcX pcB pcR0 pcR1 ib1 ib0 isfst)
  (MSS2 : match_strong_state dupmap st st' f f' sp rs2 m2 rs0 m0 pcX pcB pcR0 pcR2 ib2 ib0 false)
  (MES : measure ib2 < n)
  : exists (oib' : option iblock),
      (exists s2', plus (step false) tge (State st' f' sp pcX rs0 m0 bc0) E0 s2'
          /\ match_states oib' s1' s2')
          \/ (omeasure oib' < n /\ t=E0
          /\ match_states oib' s1' (State st' f' sp pcX rs0 m0 bc0)).
Proof.
  subst.
  destruct (is_goto ib2) eqn:GT.
  destruct ib2; try destruct fi; try discriminate.
  - (* Bgoto *)
    inv MSS2.
    inversion MIB; subst; try inv H4.
    inversion MSN; subst.
    + destruct H2 as (pcS & MSN'). inversion MSN'; subst.
      * exploit dupmap_correct; eauto;
        intros [ib [FNC MI]].
        eexists; left; eexists; split;
        [| eapply match_states_entry_direct; eauto ].
        apply plus_one.
        repeat econstructor; eauto;
        apply iblock_istep_run_equiv in BTL_RUN; eauto.
      * exploit dupmap_correct; eauto;
        intros [ib [FNC MI]].
        eexists; left; eexists; split;
        [| eapply match_states_entry_indirect; eauto ].
        apply plus_one.
        repeat econstructor; eauto;
        apply iblock_istep_run_equiv in BTL_RUN; eauto.
    + destruct H2 as (pcS & MSN'). inversion MSN'; subst.
      * exploit dupmap_correct; eauto;
        intros [ib [FNC MI]].
        eexists; left; eexists; split;
        [| eapply match_states_entry_direct; eauto ].
        eapply plus_two. econstructor; eauto.
        rewrite H1. repeat econstructor; eauto.
        repeat econstructor; eauto;
        apply iblock_istep_run_equiv in BTL_RUN; eauto.
        eauto.
      * exploit dupmap_correct; eauto;
        intros [ib [FNC MI]].
        eexists; left; eexists; split;
        [| eapply match_states_entry_indirect; eauto ].
        eapply plus_two. econstructor; eauto.
        rewrite H1. repeat econstructor; eauto.
        repeat econstructor; eauto;
        apply iblock_istep_run_equiv in BTL_RUN; eauto.
        eauto.
  - (* Others *)
    exists (Some ib2); right; split.
    simpl; auto.
    split; auto. econstructor; eauto.
Qed.

Executing the BTL step corresponding to a synthetic node.
Lemma opt_simu_indirect
  dupmap indirect st' f' sp rs0 m0 bc0 pcX pcB ibf (fin: final) inst iinfo0
  (PC: (fn_code f') ! pcX = Some ibf)
  (BLK: entry ibf = Bgoto pcB iinfo0)
  (MFI: match_final_inst dupmap (fn_code f') indirect fin inst)
  :step false tge (State st' f' sp pcX rs0 m0 bc0) E0 (State st' f' sp pcB rs0 m0 bc0).
Proof.
  inv MFI; econstructor; eauto.
  all: rewrite BLK; repeat econstructor; eauto.
Qed.
Hint Resolve opt_simu_indirect: core.

Lemma opt_simu_direct
  dupmap indirect st st' f f' sp rs m rs0 m0 bc0 pcB pcR0 pcR1 ib0 s1' t (fin: final) inst iinfo
  (TRANSF: match_function dupmap f f')
  (STACKS: list_forall2 match_stackframes st st')
  (STEP: RTL.step ge (RTL.State st f sp pcR1 rs m) t s1')
  (NGOTO: is_goto (fin iinfo) = false)
  (DUP: dupmap ! pcB = Some pcR0)
  (MFI: match_final_inst dupmap (fn_code f') indirect fin inst)
  (ATpcB : (fn_code f') ! pcB = Some ib0)
  (RTL_RUN : star RTL.step ge (RTL.State st f sp pcR0 rs0 m0) E0
             (RTL.State st f sp pcR1 rs m))
  (BTL_RUN : iblock_istep_run tge sp None (entry ib0) rs0 m0 =
             iblock_istep_run tge sp None (fin iinfo) rs m)
  (PC: (RTL.fn_code f) ! pcR1 = Some inst)
  :exists oib' s2', step false tge (State st' f' sp pcB rs0 m0 bc0) t s2' /\ match_states oib' s1' s2'.
Proof.
  inv MFI; inversion STEP; subst; try_simplify_someHyps; intros.
  - do 2 eexists; split.
    econstructor; eauto; do 3 econstructor.
    split. eapply iblock_istep_run_equiv in BTL_RUN.
    eapply BTL_RUN. econstructor; eauto.
    erewrite preserv_fnstacksize; eauto.
    econstructor; eauto.
  - rename H9 into FIND.
    eapply find_function_preserved in FIND.
    destruct FIND as (fd' & FF & TRANSFUN).
    do 2 eexists; split.
    econstructor; eauto; do 3 econstructor.
    split. eapply iblock_istep_run_equiv in BTL_RUN.
    eapply BTL_RUN. econstructor; eauto.
    eapply function_sig_translated; eauto.
    repeat (econstructor; eauto).
    apply transf_fundef_correct; auto.
  - rename H8 into FIND.
    eapply find_function_preserved in FIND.
    destruct FIND as (fd' & FF & TRANSFUN).
    do 2 eexists; split.
    econstructor; eauto; do 3 econstructor.
    split. eapply iblock_istep_run_equiv in BTL_RUN.
    eapply BTL_RUN. econstructor; eauto.
    eapply function_sig_translated; eauto.
    erewrite preserv_fnstacksize; eauto.
    econstructor; eauto.
    apply transf_fundef_correct; auto.
  - pose symbols_preserved as SYMPRES.
    destruct H as (pcS & MSN); inv MSN;
    exploit dupmap_correct; eauto;
    intros [ib [FNC MI]];
    (*rewrite ATpcB in FNC; inv FNC.*)
    do 2 eexists; split.
    1,3:
      econstructor; eauto; do 3 econstructor;
      split; eapply iblock_istep_run_equiv in BTL_RUN; eauto.
    1,2: econstructor; eauto.
    1,3: eapply eval_builtin_args_preserved; eauto.
    1,2: eapply external_call_symbols_preserved; eauto; eapply senv_preserved.
    all: econstructor; eauto.
  - exploit list_nth_z_rev_dupmap; eauto.
    intros (pc'0 & LNZ & MN).
    destruct MN as (pcS & MSN); inv MSN;
    exploit dupmap_correct; eauto;
    intros [ib [FNC MI]];
    do 2 eexists; split.
    1,3:
      econstructor; eauto; do 3 econstructor;
      split; eapply iblock_istep_run_equiv in BTL_RUN; eauto.
    all: econstructor; eauto.
Unshelve. all:exact dummy_block_classification.
Qed.
 
opt_simu_intro combines the two lemmas above.
Lemma opt_simu_intro
  dupmap st st' f f' sp rs m rs0 m0 bc0 pcX pcB pcR0 pcR1 ib ib0 isfst s1' t
  (STEP: RTL.step ge (RTL.State st f sp pcR1 rs m) t s1')
  (MSTRONG: match_strong_state dupmap st st' f f' sp rs m rs0 m0 pcX pcB pcR0 pcR1 ib ib0 isfst)
  (NGOTO: is_goto ib = false)
  : exists (oib' : option iblock),
     (exists s2', plus (step false) tge (State st' f' sp pcX rs0 m0 bc0) t s2' /\ match_states oib' s1' s2')
  \/ (omeasure oib' < omeasure (Some ib) /\ t=E0 /\ match_states oib' s1' (State st' f' sp pcX rs0 m0 bc0)).
Proof.
  inv MSTRONG; subst. inv MIB.
  - (* mib_BF *)
    exploit opt_simu_direct; eauto; intros (oib' & s2' & BSTEP & MS).
    inversion H0;
    inversion STEP; subst; try_simplify_someHyps; intros.
    all:
      inv MSN;
      eexists; left; exists s2'; split;
      [ apply plus_one | idtac | eapply plus_two | idtac ]; eauto.
  - (* mib_exit *)
    discriminate.
  - (* mib_seq *)
    inv IS_EXPD; try discriminate.
    inv H; simpl in *; try congruence;
    inv STEP; try_simplify_someHyps;
    intros; eapply match_strong_state_simu; eauto;
    econstructor; eauto.
    { (* Bop *)
      erewrite eval_operation_preserved in H12.
      erewrite H12 in BTL_RUN; simpl in BTL_RUN; auto.
      intros; rewrite <- symbols_preserved; trivial. }
    (* Bload/Bstore *)
    inv H12; [ idtac | destruct (eval_addressing) eqn:EVAL in LOAD;[ specialize (LOAD v) |] ];
    rename LOAD into MEMT.
    4: rename H12 into EVAL; rename H13 into MEMT.
    all:
      erewrite eval_addressing_preserved in EVAL;
      try erewrite EVAL in BTL_RUN; try erewrite MEMT in BTL_RUN;
      simpl in BTL_RUN; try destruct trap; auto;
      intros; rewrite <- symbols_preserved; trivial.
  - (* mib_cond *)
    inv IS_EXPD; try discriminate.
    inversion STEP; subst; try_simplify_someHyps; intros.
    destruct (is_join_opt_None opc1 opc2); eauto. subst.
    eapply match_strong_state_simu with (ib1:=Bcond c lr bso bnot iinfo) (ib2:=(if b then bso else bnot)); eauto.
    + intros; rewrite H14 in BTL_RUN; destruct b; econstructor; eauto.
    + assert (measure (if b then bnot else bso) > 0) by apply measure_pos; destruct b; simpl; lia.
  Unshelve.
     all: eauto.
Qed.

Main RTL to BTL simulation theorem Two possible executions:

 **Last instruction (left side):**

    RTL state         match_states          BTL state
       s1 ------------------------------------ s2
       |                                       |
  STEP |    Classical (right) plus simu        | +
       |                                       |
       s1' ----------------------------------- s2'


 **Middle instruction (right side):**

    RTL state         match_states [oib]    BTL state
       s1 ------------------------------------ s2
       |                               _______/
  STEP | *E0       ___________________/
       |          / match_states [oib']
       s1' ______/
   Where omeasure oib' < omeasure oib


Theorem opt_simu s1 t s1' oib s2:
 RTL.step ge s1 t s1' ->
 match_states oib s1 s2 ->
 exists (oib' : option iblock),
     (exists s2', plus (step false) tge s2 t s2' /\ match_states oib' s1' s2')
  \/ (omeasure oib' < omeasure oib /\ t=E0 /\ match_states oib' s1' s2)
 .
Proof.
  inversion 2; subst; clear H0.
  - (* State *)
    exploit opt_simu_intro; eauto.
  - (* Callstate *)
    inv H.
    + (* Internal function *)
      inv TRANSF.
      rename H0 into TRANSF.
      exploit dupmap_entrypoint; eauto; intros MN; inv MN; inv H;
      exploit dupmap_correct; eauto; intros [ib [CENTRY MI]];
      exists (Some (normRTL (entry ib))); left; eexists; split.
      * apply plus_one.
        eapply exec_function_internal.
        erewrite preserv_fnstacksize; eauto.
      * erewrite preserv_fnparams; eauto.
        eapply match_states_entry_direct; eauto.
      * eapply plus_two; eauto. eapply exec_function_internal.
        erewrite preserv_fnstacksize; eauto.
        econstructor; eauto. rewrite H2.
        repeat econstructor; eauto. eauto.
      * erewrite preserv_fnparams; eauto.
        eapply match_states_entry_direct; eauto.
    + (* External function *)
      inv TRANSF.
      eexists; left; eexists; split.
      * apply plus_one.
        eapply exec_function_external.
        eapply external_call_symbols_preserved.
        eapply senv_preserved. eauto.
      * econstructor; eauto.
  - (* Returnstate *)
    inv H. inv STACKS. inv H1.
    destruct MN as (pcS & MSN); inv MSN;
    exploit dupmap_correct; eauto;
    intros [ib [FNC MI]];
    eexists; left; eexists; split; eauto.
    + apply plus_one; eapply exec_return.
    + eapply match_states_entry_direct; eauto.
    + eapply plus_two. eapply exec_return.
      econstructor; eauto. rewrite H1.
      repeat econstructor; eauto. eauto.
    + eapply match_states_entry_indirect; eauto.
Unshelve. all:exact dummy_block_classification.
Qed.

Local Hint Resolve plus_one star_refl: core.

Theorem transf_program_correct:
  forward_simulation (RTL.semantics prog) (BTL.sem false tprog).
Proof.
  eapply (Forward_simulation (L1:=RTL.semantics prog) (L2:=BTL.sem false tprog) (ltof _ omeasure) match_states).
  constructor 1; simpl.
  - apply well_founded_ltof.
  - eapply transf_initial_states.
  - eapply transf_final_states.
  - intros s1 t s1' STEP i s2 MATCH. exploit opt_simu; eauto. clear MATCH STEP.
    destruct 1 as (oib' & [ (s2' & STEP & MATCH) | (MEASURE & TRACE & MATCH) ]).
    + repeat eexists; eauto.
    + subst. repeat eexists; eauto.
  - eapply senv_preserved.
Qed.

End BTL_SIMULATES_RTL.

End RTLtoBTL_Translationproof.