Module Asmblockgenproof


Require Import Coqlib Errors.
Require Import Integers Floats AST Linking.
Require Import Values Memory Events Globalenvs Smallstep.
Require Import Op Locations Machblock Conventions Asmblock.
Require Import Asmblockgen Asmblockgenproof0 Asmblockgenproof1 Asmblockprops.
Require Import Lia.
Require Import Memcpy.

Local Transparent Archi.ptr64.

Module MB := Machblock.
Module AB := Asmblock.

Definition match_prog (p: MB.program) (tp: AB.program) :=
  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.

Lemma transf_program_match:
  forall p tp, transf_program p = OK tp -> match_prog p tp.
Proof.
  intros. eapply match_transform_partial_program; eauto.
Qed.

Section PRESERVATION.

Variable prog: Machblock.program.
Variable tprog: Asmblock.program.
Hypothesis TRANSF: match_prog prog tprog.
Let ge := Genv.globalenv prog.
Let tge := Genv.globalenv tprog.

Lemma symbols_preserved:
  forall (s: qualident), Genv.find_symbol tge s = Genv.find_symbol ge s.
Proof (Genv.find_symbol_match TRANSF).

Lemma senv_preserved:
  Senv.equiv ge tge.
Proof (Genv.senv_match TRANSF).

Lemma functions_translated:
  forall b f,
  Genv.find_funct_ptr ge b = Some f ->
  exists tf,
  Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
Proof (Genv.find_funct_ptr_transf_partial TRANSF).

Lemma functions_transl:
  forall fb f tf,
  Genv.find_funct_ptr ge fb = Some (Internal f) ->
  transf_function f = OK tf ->
  Genv.find_funct_ptr tge fb = Some (Internal tf).
Proof.
  intros. exploit functions_translated; eauto. intros [tf' [A B]].
  monadInv B. rewrite H0 in EQ; inv EQ; auto.
Qed.

Lemma transf_function_no_overflow:
  forall f tf,
  transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned.
Proof.
  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0.
  lia.
Qed.

Proof of semantic preservation


Semantic preservation is proved using a complex simulation diagram of the following form.
                                     MB.step
                      ---------------------------------------->
                      header      body          exit
                  st1 -----> st2 -----> st3 ------------------> st4
                   |          |          |                       |
                   |   (A)    |   (B)    |         (C)           |
   match_codestate |          |          |                       |
                   |  header  |   body1  |  body2                |  match_states
                  cs1 -----> cs2 -----> cs3 ------> cs4          |
                   |                  /                \  exit   |
   match_asmstate  |   ---------------                  --->---  |
                   |  /   match_asmstate                       \ |
                  st'1 ---------------------------------------> st'2
                                     AB.step                  *
The invariant between each MB.step/AB.step is the match_states predicate below. However, we also need to introduce an intermediary state Codestate which allows us to reason on a finer grain, executing header, body and exit separately. This Codestate consists in a state like Asmblock.State, except that the code is directly stored in the state, much like Machblock.State. It also features additional useful elements to keep track of while executing a bblock.

Inductive match_states: Machblock.state -> Asm.state -> Prop :=
  | match_states_intro:
      forall s fb sp c ep ms m m' rs f tf tc
        (STACKS: match_stack ge s)
        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
        (MEXT: Mem.extends m m')
        (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
        (AG: agree ms sp rs)
        (DXP: ep = true -> rs#IR12 = parent_sp s)
        (RAIR14: function_preserves_IR14 f = true -> rs#IR14 = parent_ra s)
        (TAIL: is_tail c (MB.fn_code f)),
      match_states (Machblock.State s fb sp c ms m)
                   (Asm.State rs m')
  | match_states_call:
      forall s fb ms m m' rs
        (STACKS: match_stack ge s)
        (MEXT: Mem.extends m m')
        (AG: agree ms (parent_sp s) rs)
        (ATPC: rs PC = Vptr fb Ptrofs.zero)
        (ATLR: rs RA = parent_ra s),
      match_states (Machblock.Callstate s fb ms m)
                   (Asm.State rs m')
  | match_states_return:
      forall s ms m m' rs
        (STACKS: match_stack ge s)
        (MEXT: Mem.extends m m')
        (AG: agree ms (parent_sp s) rs)
        (ATPC: rs PC = parent_ra s),
      match_states (Machblock.Returnstate s ms m)
                   (Asm.State rs m').

Section TRANSL_LABEL.

Lemma cons_bblocks_label:
  forall hd bdy ex tbb tc,
  cons_bblocks hd bdy ex = tbb::tc ->
  header tbb = hd.
Proof.
  intros until tc. intros CONSB. unfold cons_bblocks in CONSB.
  destruct ex; try destruct bdy; try destruct c; try destruct i.
  all: inv CONSB; simpl; auto.
Qed.

Lemma cons_bblocks_label2:
  forall hd bdy ex tbb1 tbb2,
  cons_bblocks hd bdy ex = tbb1::tbb2::nil ->
  header tbb2 = nil.
Proof.
  intros until tbb2. intros CONSB. unfold cons_bblocks in CONSB.
  destruct ex; try destruct bdy; try destruct c; try destruct i.
  all: inv CONSB; simpl; auto.
Qed.

Remark in_dec_transl:
  forall lbl hd,
  (if in_dec lbl hd then true else false) = (if MB.in_dec lbl hd then true else false).
Proof.
  intros. destruct (in_dec lbl hd), (MB.in_dec lbl hd). all: tauto.
Qed.

Lemma transl_is_label:
  forall lbl bb tbb f ep tc near_labels,
  transl_block f bb ep near_labels = OK (tbb::tc) ->
  is_label lbl tbb = MB.is_label lbl bb.
Proof.
  intros until near_labels. intros TLB.
  destruct tbb as [thd tbdy tex]; simpl in *.
  monadInv TLB.
  unfold is_label. simpl.
  apply cons_bblocks_label in H0. simpl in H0. subst.
  rewrite in_dec_transl. auto.
Qed.

Lemma transl_is_label_false2:
  forall lbl bb f ep near_labels tbb1 tbb2,
  transl_block f bb ep near_labels = OK (tbb1::tbb2::nil) ->
  is_label lbl tbb2 = false.
Proof.
  intros until tbb2. intros TLB.
  destruct tbb2 as [thd tbdy tex]; simpl in *.
  monadInv TLB. apply cons_bblocks_label2 in H0. simpl in H0. subst.
  apply is_label_correct_false. simpl. auto.
Qed.

Lemma transl_block_nonil:
  forall f c ep near_labels tc,
  transl_block f c ep near_labels = OK tc ->
  tc <> nil.
Proof.
  intros. monadInv H. unfold cons_bblocks.
  destruct x0; try destruct (x1 @@ x); try destruct c0; try destruct i.
  all: discriminate.
Qed.

Lemma transl_block_limit: forall f bb ep near_labels tbb1 tbb2 tbb3 tc,
  ~transl_block f bb ep near_labels = OK (tbb1 :: tbb2 :: tbb3 :: tc).
Proof.
  intros. intro. monadInv H.
  unfold cons_bblocks in H0.
  destruct x0; try destruct (x1 @@ x); try destruct c0; try destruct i.
  all: discriminate.
Qed.

Lemma find_label_transl_false:
  forall x f lbl bb ep near_labels x',
  transl_block f bb ep near_labels = OK x ->
  MB.is_label lbl bb = false ->
  find_label lbl (x @@ x') = find_label lbl x'.
Proof.
  intros until x'. intros TLB MBis; simpl; auto.
  destruct x as [|x0 x1]; simpl; auto.
  destruct x1 as [|x1 x2]; simpl; auto.
  - erewrite <- transl_is_label in MBis; eauto. rewrite MBis. auto.
  - destruct x2 as [|x2 x3]; simpl; auto.
    + erewrite <- transl_is_label in MBis; eauto. rewrite MBis.
      erewrite transl_is_label_false2; eauto.
    + apply transl_block_limit in TLB. destruct TLB.
Qed.

Lemma cons_bblocks_singleton:
  forall hd bdy ex,
  exists tbb, cons_bblocks hd bdy ex = tbb :: nil.
Proof.
  intros. unfold cons_bblocks.
  destruct ex; destruct bdy; eauto.
Qed.

Lemma transl_block_singleton:
  forall f bb ep nl lb,
  transl_block f bb ep nl = OK lb ->
  exists tbb, lb = tbb :: nil.
Proof.
  intros. monadInv H. apply cons_bblocks_singleton.
Qed.

Lemma transl_blocks_cons:
  forall f bb c ep tc,
  transl_blocks f (bb :: c) ep = OK tc ->
  exists lb nl tc_tail,
    transl_blocks0 f (pool_cap f) c false nil = OK (tc_tail, nl) /\
    transl_blocks f c false = OK tc_tail /\
    transl_block f bb (if MB.header bb then ep else false) nl = OK lb /\
    tc = lb @@ tc_tail.
Proof.
  intros. unfold transl_blocks in H. monadInv H.
  destruct x as [lb_all nl_all]. inv EQ0.
  simpl in EQ. monadInv EQ.
  destruct x as [tc_tail nl].
  monadInv EQ1.
  eexists _, _, _. repeat split; eauto.
  unfold transl_blocks. rewrite EQ0. simpl. auto.
Qed.

Lemma list_same: forall {A T : Type} (l : list A) (x : T),
  (if l then x else x) = x.
Proof.
destruct l; trivial. Qed.

Lemma indexed_memory_access_success:
  forall mk_instr mk_immed base ofs k1 k2,
  (exists c1, indexed_memory_access mk_instr mk_immed base ofs k1 = c1) ->
  exists c2, indexed_memory_access mk_instr mk_immed base ofs k2 = c2.
Proof.
unfold indexed_memory_access. intros. destruct (Int.eq _ _); eauto. Qed.

Lemma storeind_success:
  forall src base ofs ty k1 k2 c1,
  storeind src base ofs ty k1 = OK c1 ->
  exists c2, storeind src base ofs ty k2 = OK c2.
Proof.
  unfold storeind. intros.
  destruct ty; destruct (preg_of src) as [[?|?]|?|]; try discriminate; exact (ex_intro _ _ eq_refl).
Qed.

Lemma loadind_success:
  forall base ofs ty dst k1 k2 c1,
  loadind base ofs ty dst k1 = OK c1 ->
  exists c2, loadind base ofs ty dst k2 = OK c2.
Proof.
  unfold loadind. intros.
  destruct ty; destruct (preg_of dst) as [[?|?]|?|]; try discriminate; exact (ex_intro _ _ eq_refl).
Qed.

Ltac solve_transl_success_core H :=
  repeat (match goal with H: (match ?x with _ => _ end) = OK _ |- _ => destruct x; try discriminate end);
  repeat (match goal with H: bind _ _ = OK _ |- _ =>
    apply bind_inversion in H; destruct H as (?x & ?Hx & ?H) end);
  try (match goal with H: OK _ = OK _ |- _ => inv H end);
  repeat (match goal with Hx: ?f = OK ?v |- context[?f] => rewrite Hx; simpl end);
  try (exact (ex_intro _ _ eq_refl));
  repeat (match goal with H: (match ?x with _ => _ end) = OK _ |- _ => destruct x; try discriminate end);
  repeat (match goal with H: bind _ _ = OK _ |- _ =>
    apply bind_inversion in H; destruct H as (?x & ?Hx & ?H) end);
  try (match goal with H: OK _ = OK _ |- _ => inv H end);
  repeat (match goal with Hx: ?f = OK ?v |- context[?f] => rewrite Hx; simpl end);
  try exact (ex_intro _ _ eq_refl).

Ltac solve_transl_success := let H := fresh "H" in intro H; solve_transl_success_core H.

Lemma transl_cond_success:
  forall cond args k1 k2 c1,
  transl_cond cond args k1 = OK c1 ->
  exists c2, transl_cond cond args k2 = OK c2.
Proof.
  unfold transl_cond. intros cond args k1 k2 c1.
  destruct cond; solve_transl_success.
Qed.

Helper: the three lowering paths preserve OK-ness across continuations. Proved by inversion on each special-case definition.
Lemma transl_op_cmp_cne_success:
  forall cmp args r k1 k2 c1,
  transl_op_cmp_cne cmp args r k1 = Some (OK c1) ->
  exists c2, transl_op_cmp_cne cmp args r k2 = Some (OK c2).
Proof.
  unfold transl_op_cmp_cne. intros cmp args r k1 k2 c1.
  destruct cmp; try discriminate.
  all: destruct c; try discriminate.
  all: intro H.
  all: destruct args as [|? [|? [|? ?]]]; try discriminate.
  2: destruct (is_immed_arith OTHER i);
       [|destruct (is_immed_arith OTHER (Int.neg i))]; try discriminate.
  all: inv H; solve_transl_success_core H1.
Qed.

Lemma transl_op_cmp_ceq_clz_success:
  forall cmp args r k1 k2 c1,
  transl_op_cmp_ceq_clz cmp args r k1 = Some (OK c1) ->
  exists c2, transl_op_cmp_ceq_clz cmp args r k2 = Some (OK c2).
Proof.
  unfold transl_op_cmp_ceq_clz. intros cmp args r k1 k2 c1.
  destruct cmp; try discriminate.
  all: destruct c; try discriminate.
  all: intro H.
  all: destruct args as [|? [|? [|? ?]]]; try discriminate.
  3: destruct (Int.eq i Int.zero);
       [|destruct (is_immed_arith OTHER i);
         [|destruct (is_immed_arith OTHER (Int.neg i))]]; try discriminate.
  all: inv H; solve_transl_success_core H1.
Qed.

Lemma transl_op_cmp_default_success:
  forall cmp args r k1 k2 c1,
  transl_op_cmp_default cmp args r k1 = OK c1 ->
  exists c2, transl_op_cmp_default cmp args r k2 = OK c2.
Proof.
  unfold transl_op_cmp_default. intros cmp args r k1 k2 c1.
  unfold transl_cond.
  destruct cmp; solve_transl_success;
    try (destruct c; solve_transl_success_core H).
Qed.

Lemma transl_op_cmp_success:
  forall cmp args r k1 k2 c1,
  transl_op_cmp cmp args r k1 = OK c1 ->
  exists c2, transl_op_cmp cmp args r k2 = OK c2.
Proof.
  intros cmp args r k1 k2 c1 H.
  unfold transl_op_cmp in H |- *.
  destruct (transl_op_cmp_cne cmp args r k1) as [res1|] eqn:CNE1.
  - (* k1 hit Cne; show k2 also does *)
    subst res1.
    edestruct transl_op_cmp_cne_success as [c2 EQ]; [exact CNE1|].
    rewrite EQ. eauto.
  - (* k1 didn't hit Cne path; show k2 also doesn't *)
    assert (transl_op_cmp_cne cmp args r k2 = None) as CNE2.
    { revert CNE1. unfold transl_op_cmp_cne.
      destruct cmp; try discriminate; auto.
      all: destruct c; try discriminate; auto.
      all: destruct args as [|? [|? [|? ?]]]; try discriminate; auto.
      destruct (is_immed_arith OTHER i); [discriminate|].
      destruct (is_immed_arith OTHER (Int.neg i)); [discriminate|]; auto. }
    rewrite CNE2.
    destruct (Compopts.cmp_via_clz tt).
    + destruct (transl_op_cmp_ceq_clz cmp args r k1) as [res1|] eqn:CEQ1.
      * destruct res1 as [bc1|]; [|discriminate]. inv H.
        edestruct transl_op_cmp_ceq_clz_success as [c2 EQ]; [exact CEQ1|].
        rewrite EQ. eauto.
      * assert (transl_op_cmp_ceq_clz cmp args r k2 = None) as CEQ2.
        { revert CEQ1. unfold transl_op_cmp_ceq_clz.
          destruct cmp; try discriminate; auto.
          all: destruct c; try discriminate; auto.
          all: destruct args as [|? [|? [|? ?]]]; try discriminate; auto.
          destruct (Int.eq i Int.zero); [discriminate|].
          destruct (is_immed_arith OTHER i); [discriminate|].
          destruct (is_immed_arith OTHER (Int.neg i)); [discriminate|]; auto. }
        rewrite CEQ2.
        eapply transl_op_cmp_default_success; exact H.
    + eapply transl_op_cmp_default_success; exact H.
Qed.

Lemma transl_op_success:
  forall (f: Machblock.function) op args res k1 k2 c1,
  transl_op op args res k1 = OK c1 ->
  exists c2, transl_op op args res k2 = OK c2.
Proof.
  unfold transl_op. intros f op args res k1 k2 c1.
  destruct op; try (solve_transl_success; fail).
  (* Ocmp: dispatch to transl_op_cmp_success *)
  all: try (intro H;
            apply bind_inversion in H; destruct H as (x & Hx & H);
            rewrite Hx; simpl;
            edestruct transl_op_cmp_success as [c2 EQ]; [exact H|];
            rewrite EQ; eauto; fail).
  (* Osel/Oselimm/Oselimm2: more aggressive destructuring *)
  all: intro H; unfold transl_op_cmp in H |- *;
       unfold transl_op_cmp_cne, transl_op_cmp_ceq_clz, transl_op_cmp_default in H |- *;
       destruct (Compopts.cmp_via_clz tt);
       unfold transl_cond in H |- *;
       solve_transl_success_core H;
       try (destruct c; solve_transl_success_core H).
Qed.

Lemma transl_load_success:
  forall trap chunk addr args dst k1 k2 c1,
  transl_load trap chunk addr args dst k1 = OK c1 ->
  exists c2, transl_load trap chunk addr args dst k2 = OK c2.
Proof.
intros trap chunk addr args dst k1 k2 c1 H.
  unfold transl_load, transl_memory_access_int, transl_memory_access_float, transl_memory_access in H |- *.
  destruct trap; [| discriminate].
  destruct chunk; destruct addr; solve_transl_success_core H.
Qed.
Lemma transl_store_success:
  forall chunk addr args src k1 k2 c1,
  transl_store chunk addr args src k1 = OK c1 ->
  exists c2, transl_store chunk addr args src k2 = OK c2.
Proof.
intros chunk addr args src k1 k2 c1 H.
  unfold transl_store, transl_memory_access_int, transl_memory_access_float, transl_memory_access in H |- *.
  destruct chunk; destruct addr; solve_transl_success_core H.
Qed.

Lemma transl_memcpy_success:
  forall sz bargs k1 k2 c1,
  transl_memcpy sz bargs k1 = OK c1 ->
  exists c2, transl_memcpy sz bargs k2 = OK c2.
Proof.
intros sz bargs k1 k2 c1 H.
  unfold transl_memcpy in H |- *.
  repeat (match goal with H: (match ?x with _ => _ end) = OK _ |- _ => destruct x; try discriminate end).
  apply bind_inversion in H. destruct H as ([? ?] & Hx1 & H).
  apply bind_inversion in H. destruct H as ([? ?] & Hx2 & H). inv H.
  rewrite Hx1. simpl. rewrite Hx2. simpl. exact (ex_intro _ _ eq_refl).
Qed.

Lemma transl_savecallee_success:
  forall f ofs l k1 k2 c1,
  transl_savecallee f ofs l k1 = OK c1 ->
  exists c2, transl_savecallee f ofs l k2 = OK c2.
Proof.
  intros f ofs l k1 k2 c1 H.
  unfold transl_savecallee in *.
  destruct l as [|r l']; [discriminate|].
  destruct (mreg_type r); try discriminate.
  - monadInv H. rewrite EQ; simpl; eauto.
  - apply bind_inversion in H. destruct H as (fl & Hfl & Hbr).
    rewrite Hfl. simpl. destruct (fregs_contiguous fl); inv Hbr; eauto.
Qed.

Lemma transl_restorecallee_success:
  forall f ofs l k1 k2 c1,
  transl_restorecallee f ofs l k1 = OK c1 ->
  exists c2, transl_restorecallee f ofs l k2 = OK c2.
Proof.
  intros f ofs l k1 k2 c1 H.
  unfold transl_restorecallee in *.
  destruct l as [|r l']; [discriminate|].
  destruct (mreg_type r); try discriminate.
  - monadInv H. rewrite EQ; simpl; eauto.
  - apply bind_inversion in H. destruct H as (fl & Hfl & Hbr).
    rewrite Hfl. simpl. destruct (fregs_contiguous fl); inv Hbr; eauto.
Qed.

Lemma transl_instr_basic_ep:
  forall f i ep1 ep2 k1 k2 c1,
  transl_instr_basic f i ep1 k1 = OK c1 ->
  exists c2, transl_instr_basic f i ep2 k2 = OK c2.
Proof.
  intros f i ep1 ep2 k1 k2 c1 H.
  Opaque loadind storeind.
  destruct i.
  - simpl in H |- *. exact (loadind_success _ _ _ _ _ k2 _ H).
  - simpl in H |- *. exact (storeind_success _ _ _ _ _ k2 _ H).
  - (* MBgetparam *)
    simpl in H |- *. apply bind_inversion in H. destruct H as (ld1 & Hld1 & _).
    pose proof (loadind_success _ _ _ _ _ k2 _ Hld1) as Hex.
    destruct Hex as (ld2 & Hld2).
    rewrite Hld2. simpl.
    destruct ep2; [exists ld2 | exists (loadind_int IR13 (fn_link_ofs f) IR12 ld2)]; reflexivity.
  Transparent loadind storeind.
  (* MBop, MBload, MBstore, MBmemcpy: ep is unused, success depends only on instr args *)
  - simpl in H |- *. exact (transl_op_success f _ _ _ _ k2 _ H).
  - simpl in H |- *. exact (transl_load_success _ _ _ _ _ _ k2 _ H).
  - simpl in H |- *. exact (transl_store_success _ _ _ _ _ k2 _ H).
  - simpl in H |- *. exact (transl_memcpy_success _ _ _ k2 _ H).
  - simpl in H |- *. exact (transl_savecallee_success _ _ _ _ k2 _ H).
  - simpl in H |- *. exact (transl_restorecallee_success _ _ _ _ k2 _ H).
Qed.

Lemma transl_basic_code_ep:
  forall f body ep1 ep2 bdy1,
  transl_basic_code f body ep1 = OK bdy1 ->
  exists bdy2, transl_basic_code f body ep2 = OK bdy2.
Proof.
  induction body; simpl; intros.
  - eauto.
  - monadInv H.
    exploit IHbody; eauto. intros (bdy2 & Hbdy2).
    rewrite Hbdy2. simpl.
    eapply (transl_instr_basic_ep f a _ ep2 _ bdy2); eauto.
Qed.

Lemma transl_block_ep_success:
  forall f bb ep1 ep2 nl lb1,
  transl_block f bb ep1 nl = OK lb1 ->
  exists lb2, transl_block f bb ep2 nl = OK lb2.
Proof.
  intros. unfold transl_block in *. monadInv H.
  exploit transl_basic_code_ep; eauto. intros (bdy2 & Hbdy2).
  rewrite EQ. simpl. rewrite Hbdy2. simpl. eauto.
Qed.

Lemma transl_blocks_reconstruct:
  forall f bb c ep nl tc_tail lb,
  transl_blocks0 f (pool_cap f) c false nil = OK (tc_tail, nl) ->
  transl_block f bb (if MB.header bb then ep else false) nl = OK lb ->
  transl_blocks f (bb :: c) ep = OK (lb @@ tc_tail).
Proof.
  intros. unfold transl_blocks. simpl. rewrite H. simpl. rewrite H0. simpl. auto.
Qed.

Lemma transl_blocks_label:
  forall lbl f c tc ep,
  transl_blocks f c ep = OK tc ->
  match MB.find_label lbl c with
  | None => find_label lbl tc = None
  | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_blocks f c' false = OK tc'
  end.
Proof.
  induction c; intros.
  - unfold transl_blocks in H. simpl in H. monadInv H. simpl. auto.
  - exploit transl_blocks_cons; eauto.
    intros (lb & nl & tc_tail & TB0 & TLBS_tail & TLB & Htc).
    exploit transl_block_singleton; eauto. intros (tbb & Hlb). subst lb.
    simpl in Htc. subst tc.
    assert (ABis: is_label lbl tbb = MB.is_label lbl a)
      by (eapply transl_is_label; eauto).
    simpl MB.find_label. destruct (MB.is_label lbl a) eqn:MBis.
    + (* is_label = true: label found at head, c' = a :: c *)
      simpl. eexists. split.
      * simpl. rewrite ABis. reflexivity.
      * (* transl_blocks f (a :: c) false = OK (tbb :: tc_tail) *)
        assert (HdrNE: MB.header a <> nil).
        { apply MB.is_label_correct_true in MBis.
          destruct (MB.header a). contradiction. discriminate. }
        destruct (MB.header a) eqn:Hhdr; try contradiction.
        (* TLB now has: transl_block f a ep nl, and if header = l::l0 then ep else false = ep *)
        (* For the goal, we need transl_block f a (if l::l0 then false else false) nl = transl_block f a false nl *)
        change (tbb :: tc_tail) with ((tbb :: nil) @@ tc_tail).
        apply transl_blocks_reconstruct with (ep := false) (nl := nl).
        exact TB0. rewrite list_same. exact TLB.
    + simpl. rewrite ABis.
      apply IHc in TLBS_tail. destruct (MB.find_label lbl c).
      * destruct TLBS_tail as (tc' & FIND & TLBS'). exists tc'. auto.
      * exact TLBS_tail.
Qed.

Lemma find_label_nil:
  forall bb lbl c,
  header bb = nil ->
  find_label lbl (bb::c) = find_label lbl c.
Proof.
  intros. destruct bb as [hd bdy ex]; simpl in *. subst.
  assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false).
  { erewrite <- is_label_correct_false. simpl. auto. }
  rewrite H. auto.
Qed.

Theorem transl_find_label:
  forall lbl f tf,
  transf_function f = OK tf ->
  match MB.find_label lbl f.(MB.fn_code) with
  | None => find_label lbl tf.(fn_blocks) = None
  | Some c => exists tc, find_label lbl tf.(fn_blocks) = Some tc /\ transl_blocks f c false = OK tc
  end.
Proof.
  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); inv EQ0. clear g.
  monadInv EQ. unfold make_prologue. simpl fn_blocks. repeat (rewrite find_label_nil); simpl; auto.
  eapply transl_blocks_label; eauto.
Qed.

End TRANSL_LABEL.

A valid branch in a piece of Machblock code translates to a valid ``go to'' transition in the generated Asmblock code.

Lemma find_label_goto_label:
  forall f tf lbl rs m c' b ofs,
  Genv.find_funct_ptr ge b = Some (Internal f) ->
  transf_function f = OK tf ->
  rs PC = Vptr b ofs ->
  MB.find_label lbl f.(MB.fn_code) = Some c' ->
  exists tc', exists rs',
    goto_label tf lbl rs m = Next rs' m
  /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
  /\ forall r, r <> PC -> rs'#r = rs#r.
Proof.
  intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
  intros (tc & A & B).
  exploit label_pos_code_tail; eauto. instantiate (1 := 0).
  intros [pos' [P [Q R]]].
  exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
  split. unfold goto_label. rewrite P. rewrite H1. auto.
  split. rewrite Pregmap.gss. constructor; auto.
  rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
  auto. lia.
  generalize (transf_function_no_overflow _ _ H0). lia.
  intros. apply Pregmap.gso; auto.
Qed.

Existence of return addresses

Lemma return_address_exists:
  forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
  exists ra, return_address_offset f c ra.
Proof.
  intros. eapply Asmblockgenproof0.return_address_exists; eauto.
  - intros. monadInv H0.
    destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. monadInv EQ. simpl.
    exists x. exists (save_lr_preserves_R12 (fn_retaddr_ofs f0)). split. auto.
    repeat constructor.
  - exact transf_function_no_overflow.
Qed.

Ltac exploreInst :=
  repeat match goal with
  | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var
  | [ H : OK _ = OK _ |- _ ] => monadInv H
  | [ |- context[if ?b then _ else _] ] => destruct b
  | [ |- context[match ?m with | _ => _ end] ] => destruct m
  | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m
  | [ H : bind _ _ = OK _ |- _ ] => monadInv H
  | [ H : Error _ = OK _ |- _ ] => inversion H
  end.

Some translation properties

Lemma transl_blocks_distrib:
  forall c f bb tbb tc ep,
  transl_blocks f (bb::c) ep = OK (tbb::tc)
  -> exists near_labels,
     transl_block f bb (if MB.header bb then ep else false) near_labels = OK (tbb :: nil)
  /\ transl_blocks f c false = OK tc.
Proof.
  intros until ep. intros TLBS.
  exploit transl_blocks_cons; eauto. intros (lb & nl & tc_tail & _ & TLBS_tail & TLB & Htc).
  exploit transl_block_singleton; eauto. intros (tbb' & Hlb). subst lb.
  simpl in Htc. inv Htc.
  exists nl. auto.
Qed.

Lemma cons_bblocks_decomp:
  forall thd tbdy tex tbb,
  (tbdy <> nil \/ tex <> None) ->
  cons_bblocks thd tbdy tex = tbb :: nil ->
     header tbb = thd
  /\ body tbb = tbdy
  /\ exit tbb = tex.
Proof.
  intros until tbb. intros Hnonil CONSB. unfold cons_bblocks in CONSB.
  destruct (tex) eqn:ECTL.
  - destruct tbdy; inv CONSB; simpl; auto.
  - inversion Hnonil.
    + destruct tbdy as [|bi tbdy]; [ contradiction H; auto | inv CONSB; auto].
    + contradict H; simpl; auto.
Qed.

Lemma transl_blocks_nonil:
  forall f bb c tc ep,
  transl_blocks f (bb::c) ep = OK tc ->
  exists tbb tc', tc = tbb :: tc'.
Proof.
  intros until ep. intros TLBS.
  exploit transl_blocks_cons; eauto. intros (lb & nl & tc_tail & _ & _ & TLB & Htc).
  subst. destruct lb as [|tbb lb'].
  - apply transl_block_nonil in TLB. contradiction.
  - simpl. eauto.
Qed.

Definition mb_remove_header bb := {| MB.header := nil; MB.body := MB.body bb; MB.exit := MB.exit bb |}.

Definition mb_remove_body (bb: MB.bblock) :=
  {| MB.header := MB.header bb; MB.body := nil; MB.exit := MB.exit bb |}.
  
Definition mbsize (bb: MB.bblock) := (length (MB.body bb) + length_opt (MB.exit bb))%nat.

Lemma mbsize_eqz:
  forall bb, mbsize bb = 0%nat -> MB.body bb = nil /\ MB.exit bb = None.
Proof.
  intros. destruct bb as [hd bdy ex]; simpl in *. unfold mbsize in H.
  remember (length _) as a. remember (length_opt _) as b.
  assert (a = 0%nat) by lia. assert (b = 0%nat) by lia. subst. clear H.
  inv H0. inv H1. destruct bdy; destruct ex; auto.
  all: try discriminate.
Qed.

Lemma mbsize_neqz:
  forall bb, mbsize bb <> 0%nat -> (MB.body bb <> nil \/ MB.exit bb <> None).
Proof.
  intros. destruct bb as [hd bdy ex]; simpl in *.
  destruct bdy; destruct ex; try (right; discriminate); try (left; discriminate).
  contradict H. unfold mbsize. simpl. auto.
Qed.

Record codestate :=
  Codestate {
    pstate: state; (* projection to Asmblock.state *)
    pheader: list label;
    pbody1: list basic; (* list of basic instructions coming from the translation of the Machblock body *)
    pbody2: list basic; (* list of basic instructions coming from the translation of the Machblock exit *)
    pctl: option control; (* exit instruction, coming from the translation of the Machblock exit *)
    ep: bool; (* reflects the ep variable used in the translation *)
    rem: list AB.bblock; (* remaining bblocks to execute *)
    cur: bblock (* current bblock to execute - to keep track of in_dec_transl size when incrementing PC *)
  }.

Inductive match_codestate fb: Machblock.state -> codestate -> Prop :=
  | match_codestate_intro:
      forall s sp ms m rs0 m0 f tc ep c bb tbb tbc1 tbc2 ex
        (STACKS: match_stack ge s)
        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
        (MEXT: Mem.extends m m0)
        (TBC: transl_basic_code f (MB.body bb) (if MB.header bb then ep else false) = OK tbc1)
        near_labels
        (TIC: transl_exit f (MB.exit bb) near_labels = OK (tbc2, ex))
        (TBLS: transl_blocks f c false = OK tc)
        (AG: agree ms sp rs0)
        (DXP: (if MB.header bb then ep else false) = true ->
               rs0#IR12 = parent_sp s)
        (RAIR14: function_preserves_IR14 f = true -> rs0#IR14 = parent_ra s)
        (BBIR14: function_preserves_IR14 f = true -> bblock_preserves_IR14 bb = true)
        (CODETAIL: is_tail c (MB.fn_code f)),
      match_codestate fb (Machblock.State s fb sp (bb::c) ms m)
        {| pstate := (Asm.State rs0 m0);
           pheader := (MB.header bb);
           pbody1 := tbc1;
           pbody2 := tbc2;
           pctl := ex;
           ep := ep;
           rem := tc;
           cur := tbb
        |}.

Lemma exec_straight_body:
  forall c c' rs1 m1 rs2 m2,
  exec_straight tge c rs1 m1 c' rs2 m2 ->
  exists l,
     c = l ++ c'
  /\ exec_body tge l rs1 m1 = Next rs2 m2.
Proof.
  induction c; try (intros; inv H; fail).
  intros until m2. intros EXES. inv EXES.
  - exists (a :: nil). repeat (split; simpl; auto). rewrite H6. auto.
  - eapply IHc in H7; eauto. destruct H7 as (l' & Hc & EXECB). subst.
    exists (a :: l'). repeat (split; simpl; auto).
    rewrite H1. auto.
Qed.

Lemma exec_straight_body2:
  forall c rs1 m1 c' rs2 m2,
  exec_straight tge c rs1 m1 c' rs2 m2 ->
  exists body,
     exec_body tge body rs1 m1 = Next rs2 m2
  /\ body ++ c' = c.
Proof.
  intros until m2. induction 1.
  - exists (i1::nil). split; auto. simpl. rewrite H. auto.
  - destruct IHexec_straight as (bdy & EXEB & BTC).
    exists (i:: bdy). split; simpl.
    + rewrite H. auto.
    + congruence.
Qed.

Lemma exec_straight_opt_body2:
  forall c rs1 m1 c' rs2 m2,
  exec_straight_opt tge c rs1 m1 c' rs2 m2 ->
  exists body,
     exec_body tge body rs1 m1 = Next rs2 m2
  /\ body ++ c' = c.
Proof.
  intros until m2. intros EXES.
  inv EXES.
  - exists nil. split; auto.
  - eapply exec_straight_body2. auto.
Qed.

Lemma PC_not_data_preg: forall r ,
  data_preg r = true ->
  r <> PC.
Proof.
  intros. destruct (PregEq.eq r PC); [ rewrite e in H; simpl in H; discriminate | auto ].
Qed.

Inductive match_asmstate fb: codestate -> Asm.state -> Prop :=
  | match_asmstate_some:
      forall rs f tf tc m tbb ofs ep tbdy1 tbdy2 tex lhd
        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
        (TRANSF: transf_function f = OK tf)
        (PCeq: rs PC = Vptr fb ofs)
        (TAIL: code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (tbb::tc))
        ,
      match_asmstate fb
        {| pstate := (Asm.State rs m);
           pheader := lhd;
           pbody1 := tbdy1;
           pbody2 := tbdy2;
           pctl := tex;
           ep := ep;
           rem := tc;
           cur := tbb |}
        (Asm.State rs m).

Lemma indexed_memory_access_nonil: forall f ofs r i k,
  indexed_memory_access f ofs r i k <> nil.
Proof.
  intros.
  unfold indexed_memory_access, addimm, iterate_op.
  desif; try congruence.
  all: destruct decompose_int; try congruence.
Qed.

Helper lemmas: each transl_op_cmp sub-path produces non-empty code.
Lemma transl_op_cmp_cne_nonil:
  forall cmp args r k bc,
  transl_op_cmp_cne cmp args r k = Some (OK bc) -> bc <> nil.
Proof.
  unfold transl_op_cmp_cne. intros cmp args r k bc H.
  destruct cmp; try discriminate.
  all: destruct c; try discriminate.
  all: destruct args as [|? [|? [|? ?]]]; try discriminate.
  2: destruct (is_immed_arith OTHER i);
       [|destruct (is_immed_arith OTHER (Int.neg i))]; try discriminate.
  all: inv H; monadInv H1; discriminate.
Qed.

Lemma transl_op_cmp_ceq_clz_nonil:
  forall cmp args r k bc,
  transl_op_cmp_ceq_clz cmp args r k = Some (OK bc) -> bc <> nil.
Proof.
  unfold transl_op_cmp_ceq_clz. intros cmp args r k bc H.
  destruct cmp; try discriminate.
  all: destruct c; try discriminate.
  all: destruct args as [|? [|? [|? ?]]]; try discriminate.
  3: destruct (Int.eq i Int.zero);
       [|destruct (is_immed_arith OTHER i);
         [|destruct (is_immed_arith OTHER (Int.neg i))]]; try discriminate.
  all: inv H; monadInv H1; discriminate.
Qed.

Lemma transl_op_cmp_default_nonil:
  forall cmp args r k bc,
  transl_op_cmp_default cmp args r k = OK bc -> bc <> nil.
Proof.
  unfold transl_op_cmp_default, transl_cond. intros cmp args r k bc H.
  destruct cmp; exploreInst; try discriminate;
    unfold loadimm, loadimm_word, addimm, rsubimm, andimm, orimm, xorimm,
    iterate_op; desif; try discriminate; destruct decompose_int; discriminate.
Qed.

Lemma transl_op_cmp_nonil:
  forall cmp args r k bc,
  transl_op_cmp cmp args r k = OK bc -> bc <> nil.
Proof.
  intros cmp args r k bc H.
  unfold transl_op_cmp in H.
  destruct (transl_op_cmp_cne cmp args r k) as [res|] eqn:CNE.
  - subst res. eapply transl_op_cmp_cne_nonil; eauto.
  - destruct (Compopts.cmp_via_clz tt).
    + destruct (transl_op_cmp_ceq_clz cmp args r k) as [res|] eqn:CEQ.
      * subst res. eapply transl_op_cmp_ceq_clz_nonil; eauto.
      * eapply transl_op_cmp_default_nonil; eauto.
    + eapply transl_op_cmp_default_nonil; eauto.
Qed.

Generic helper: transl_cond applied to any non-empty continuation is non-empty. Used when exploreInst decomposes Osel/Oselimm constructions into a residual transl_cond ... = OK x hypothesis.
Lemma transl_cond_cons_nonil:
  forall cmp args bi k0 bc,
  transl_cond cmp args (bi ::bi k0) = OK bc -> bc <> nil.
Proof.
  intros cmp args bi k0 bc H.
  unfold transl_cond in H.
  exploreInst; try discriminate;
    unfold loadimm, loadimm_word, addimm, rsubimm, andimm, orimm, xorimm,
    iterate_op; desif; try discriminate; destruct decompose_int; discriminate.
Qed.

Lemma transl_instr_basic_nonil:
  forall k f bi ep x,
  transl_instr_basic f bi ep k = OK x ->
  x <> nil.
Proof.
  intros until x. intros TIB.
  destruct bi; simpl in TIB.
  - unfold loadind in TIB;
    exploreInst; apply indexed_memory_access_nonil.
  - unfold storeind in TIB;
    exploreInst; apply indexed_memory_access_nonil.
  - monadInv TIB. unfold loadind in EQ. exploreInst;
    unfold loadind_int; apply indexed_memory_access_nonil.
  - unfold transl_op in TIB.
    (* All Ocmp paths go through [transl_op_cmp]; non-Ocmp paths land in
       the catch-all branch and use the same loadimm/addimm/etc helpers
       as before.  We dispatch Ocmp via [transl_op_cmp_nonil] and the
       residual [Osel]/[Oselimm] cases via [transl_cond_cons_nonil]. *)

    exploreInst; try discriminate;
      try (eapply transl_op_cmp_nonil; eassumption);
      try (eapply transl_cond_cons_nonil; eassumption);
      try (unfold loadimm, loadimm_word, addimm, rsubimm, andimm, orimm, xorimm,
           iterate_op; desif; try discriminate;
           destruct decompose_int; discriminate).
  - unfold transl_load, transl_memory_access_int, transl_memory_access_float,
    transl_memory_access, indexed_memory_access, addimm, iterate_op in TIB.
    exploreInst; discriminate.
  - unfold transl_store, transl_memory_access_int, transl_memory_access_float,
    transl_memory_access, indexed_memory_access, addimm, iterate_op in TIB.
    exploreInst; discriminate.
  - unfold transl_memcpy in TIB.
    do 3 (try destruct l); try congruence;
    monadInv TIB.
    unfold select_memcpy_inst, indexed_memcpy_access, addimm, iterate_op.
    exploreInst; congruence.
  - (* MBsavecallee *)
    unfold transl_savecallee in TIB.
    destruct l as [|r l']; [discriminate|].
    destruct (mreg_type r); try discriminate.
    + monadInv TIB; unfold addimm, iterate_op; desif; try congruence;
        destruct decompose_int; congruence.
    + apply bind_inversion in TIB. destruct TIB as (? & ? & TIB).
      destruct fregs_contiguous; inv TIB;
        unfold addimm, iterate_op; desif; try congruence;
        destruct decompose_int; congruence.
  - (* MBrestorecallee *)
    unfold transl_restorecallee in TIB.
    destruct l as [|r l']; [discriminate|].
    destruct (mreg_type r); try discriminate.
    + monadInv TIB; unfold addimm, iterate_op; desif; try congruence;
        destruct decompose_int; congruence.
    + apply bind_inversion in TIB. destruct TIB as (? & ? & TIB).
      destruct fregs_contiguous; inv TIB;
        unfold addimm, iterate_op; desif; try congruence;
        destruct decompose_int; congruence.
Qed.

Lemma transl_basic_code_nonil:
  forall bdy f x ep,
  bdy <> nil ->
  transl_basic_code f bdy ep = OK x ->
  x <> nil.
Proof.
  induction bdy as [|bi bdy].
    intros. contradict H0; auto.
  destruct bdy as [|bi2 bdy].
  - clear IHbdy. intros f x b _ TBC. simpl in TBC.
    eapply transl_instr_basic_nonil; eauto.
  - intros f x b Hnonil TBC. remember (bi2 :: bdy) as bdy'.
    monadInv TBC.
    assert (x0 <> nil).
    eapply IHbdy; eauto. subst bdy'. discriminate.
    eapply transl_instr_basic_nonil; eauto.
Qed.

Lemma transl_exit_nonil:
  forall ex f near_labels bdy x,
  ex <> None ->
  transl_exit f ex near_labels = OK(bdy, x) ->
  x <> None.
Proof.
  intros until x. intros Hnonil TIC.
  destruct ex as [ex|].
  - clear Hnonil. destruct ex.
    all: try (simpl in TIC; try monadInv TIC; exploreInst; discriminate).
  - contradict Hnonil; auto.
Qed.

Theorem app_nonil: forall A (l1 l2: list A),
  l1 <> nil ->
  l1 @@ l2 <> nil.
Proof.
  induction l2.
  - intros; rewrite app_nil_r; auto.
  - intros. unfold not. intros. symmetry in H0.
    generalize (app_cons_not_nil); intros. unfold not in H1.
    generalize (H0). apply H1.
Qed.

Theorem match_state_codestate:
  forall mbs abs s fb sp bb c ms m,
  (MB.body bb <> nil \/ MB.exit bb <> None) ->
  mbs = (Machblock.State s fb sp (bb::c) ms m) ->
  match_states mbs abs ->
  exists cs fb f tbb tc ep,
    match_codestate fb mbs cs /\ match_asmstate fb cs abs
    /\ Genv.find_funct_ptr ge fb = Some (Internal f)
    /\ transl_blocks f (bb::c) ep = OK (tbb::tc)
    /\ body tbb = pbody1 cs ++ pbody2 cs
    /\ exit tbb = pctl cs
    /\ cur cs = tbb /\ rem cs = tc
    /\ pstate cs = abs.
Proof.
  intros until m. intros Hnotempty Hmbs MS. subst. inv MS.
  inv AT. clear H0. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst.
  exploit transl_blocks_distrib; eauto. intros (near_labels' & TLB & TLBS).
  monadInv TLB. exploit cons_bblocks_decomp; eauto.
  - inversion Hnotempty.
    + destruct (MB.body bb) as [|bi bdy]; try (contradict H0; simpl; auto; fail).
      left. apply app_nonil. eapply transl_basic_code_nonil; eauto.
    + destruct (MB.exit bb) as [ei|]; try (contradict H0; simpl; auto; fail).
      right. eapply transl_exit_nonil; eauto.
  - intros (Hth & Htbdy & Htexit).
    exists {| pstate := (State rs m'); pheader := (Machblock.header bb);
              pbody1 := x1; pbody2 := x; pctl := x0; ep := ep0; rem := tc';
              cur := tbb |}, fb, f, tbb, tc', ep0.
    repeat split. 1-2: econstructor; eauto.
    { destruct (MB.header bb). eauto. discriminate. } eauto.
    { intro HFPI. unfold function_preserves_IR14 in HFPI.
      rewrite forallb_forall in HFPI. apply HFPI.
      eapply is_tail_incl; eauto. left; auto. }
    { eapply is_tail_trans; [|eauto]. constructor. constructor. }
    all: simpl; auto.
Qed.

Theorem exec_body_concat:
  forall ge
    (body1 body2: list basic)
    (rs rs': Asmblockgenproof1.AB.regset)
    (m m': mem)
    (EXEC1: exec_body ge body1 rs m = Next rs' m'),
  exec_body ge (body1 @@ body2) rs m =
    exec_body ge body2 rs' m'.
Proof.
  induction body1; cbn; intros.
  { inv EXEC1. reflexivity. }
  destruct (exec_basic ge0 a rs m); cbn.
  2: discriminate.
  erewrite IHbody1.
  reflexivity.
  assumption.
Qed.
 
Theorem step_simu_control:
  forall bb' fb fn s sp c ms' m' rs2 m2 t S'' rs1 m1 tbb tbdy2 tex cs2,
  MB.body bb' = nil ->
  Genv.find_funct_ptr tge fb = Some (Internal fn) ->
  pstate cs2 = (State rs2 m2) ->
  pbody1 cs2 = nil -> pbody2 cs2 = tbdy2 -> pctl cs2 = tex ->
  cur cs2 = tbb ->
  match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2 ->
  match_asmstate fb cs2 (State rs1 m1) ->
  exit_step return_address_offset ge (MB.exit bb') (MB.State s fb sp (bb'::c) ms' m') t S'' ->
  (exists rs3 m3 rs4 m4,
      exec_body tge tbdy2 rs2 m2 = Next rs3 m3
  /\ exec_exit tge fn (Ptrofs.repr (size tbb)) rs3 m3 tex t rs4 m4
  /\ match_states S'' (State rs4 m4)).
Proof.
  intros until cs2.
  intros Hbody FIND Hpstate Hpbody1 Hpbody2 Hpctl Hcur MCS MAS ESTEP.
  inv ESTEP.
  - inv MCS. inv MAS. simpl in *.
    inv Hpstate.
    destruct ctl.
    + (* MBcall *)
      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
      inv TBC. inv TIC. inv H0.
      assert (f0 = f) by congruence. subst f0.
      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
      eapply transf_function_no_overflow; eauto.
      destruct s1 as [rf|fid]; simpl in H1.
      * (* Indirect call *)
        monadInv H1. monadInv EQ.
        assert (ms' rf = Vptr f' Ptrofs.zero).
        { unfold find_function_ptr in H12. destruct (ms' rf); try discriminate.
          revert H12; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero;
          intros; congruence. }
        assert (rs2 x = Vptr f' Ptrofs.zero).
        { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
        remember (Ptrofs.add _ _) as ofs'.
        assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
        { econstructor; eauto. }
        assert (f1 = f) by congruence. subst f1.
        exploit return_address_offset_correct; eauto. intros; subst ra.
        assert (FPIF: function_preserves_IR14 f = false).
        { destruct (function_preserves_IR14 f) eqn:E; auto.
          exfalso. specialize (BBIR14 eq_refl). unfold bblock_preserves_IR14 in BBIR14.
          simpl in BBIR14. discriminate. }
        repeat eexists.
        econstructor; eauto. econstructor.
        econstructor; eauto. econstructor; eauto.
        eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros.
        unfold incrPC; repeat Simpl; auto.
        simpl. unfold incrPC; rewrite Pregmap.gso; auto; try discriminate.
        rewrite !Pregmap.gss. rewrite PCeq. rewrite Heqofs'. simpl. auto.
      * (* Direct call *)
        monadInv H1.
        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
        remember (Ptrofs.add _ _) as ofs'.
        assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
        econstructor; eauto.
        assert (f1 = f) by congruence. subst f1.
        exploit return_address_offset_correct; eauto. intros; subst ra.
        assert (FPIF: function_preserves_IR14 f = false).
        { destruct (function_preserves_IR14 f) eqn:E; auto.
          exfalso. specialize (BBIR14 eq_refl). unfold bblock_preserves_IR14 in BBIR14.
          simpl in BBIR14. discriminate. }
        repeat eexists.
        econstructor; eauto. econstructor.
        econstructor; eauto. econstructor; eauto. eapply agree_sp_def; eauto.
        simpl. eapply agree_exten; eauto. intros.
        unfold incrPC; repeat Simpl; auto. unfold Genv.symbol_address.
        rewrite symbols_preserved. simpl in H12. rewrite H12. auto.
        unfold incrPC; simpl; rewrite Pregmap.gso; try discriminate.
        rewrite !Pregmap.gss. subst. unfold Val.offset_ptr. rewrite PCeq. auto.
    + (* MBtailcall *)
      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
      inv TBC. inv TIC. inv H0.
      assert (f0 = f) by congruence. subst f0.
      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
      eapply transf_function_no_overflow; eauto.
      exploit Mem.loadv_extends. eauto. eexact H13. auto. simpl.
      intros [parent' [A B]]. destruct s1 as [rf|fid]; simpl in H11.
      * monadInv H1. monadInv EQ.
        assert (ms' rf = Vptr f' Ptrofs.zero).
        { destruct (ms' rf); try discriminate. revert H11.
          predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
        assert (rs2 x = Vptr f' Ptrofs.zero).
        { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
        assert (f = f1) by congruence. subst f1. clear FIND1. clear H12.
        exploit make_epilogue_correct; eauto.
        intros (rs1 & m1 & U & V & W & X & Y & Z).
        exploit exec_straight_body; eauto.
        intros (l & MKEPI & EXEB).
        destruct (Compopts.branch_target_protection tt); inv H0.
        ** (* BTI *)
           repeat eexists. rewrite app_nil_r in MKEPI.
           rewrite <- MKEPI in EXEB.
           exact EXEB.
           { eauto. econstructor. simpl. unfold incrPC.
             rewrite Pregmap.gso by discriminate.
             reflexivity. }
           econstructor; eauto.
           { apply agree_set_other.
            - econstructor; auto with asmgen.
              + apply V.
              + intro r. destruct r; apply V; auto.
            - eauto with asmgen. }
           rewrite Pregmap.gss. rewrite Z; auto; try discriminate.
           eauto with asmgen; eauto.
           eauto with asmgen; eauto.
        ** (* no BTI *)
          repeat eexists. rewrite app_nil_r in MKEPI.
          rewrite <- MKEPI in EXEB.
          eauto. econstructor. simpl. unfold incrPC.
          rewrite !Pregmap.gso; try discriminate. eauto.
          econstructor; eauto.
          { apply agree_set_other.
            - econstructor; auto with asmgen.
              + apply V.
              + intro r. destruct r; apply V; auto.
            - eauto with asmgen. }
          rewrite Pregmap.gss. rewrite Z; auto; try discriminate.
          eauto with asmgen; eauto.
          eauto with asmgen; eauto.
      * monadInv H1. assert (f = f1) by congruence. subst f1.
        clear FIND1. clear H12.
        exploit make_epilogue_correct; eauto.
        intros (rs1 & m1 & U & V & W & X & Y & Z).
        exploit exec_straight_body; eauto.
        intros (l & MKEPI & EXEB).
        repeat eexists. inv EQ. rewrite app_nil_r in MKEPI.
        rewrite <- MKEPI in EXEB.
        eauto. inv EQ. econstructor. simpl. unfold incrPC.
        eauto.
        econstructor; eauto.
        { apply agree_set_other.
          - econstructor; auto with asmgen.
            + apply V.
            + intro r. destruct r; apply V; auto.
          - eauto with asmgen. }
        { rewrite Pregmap.gss. unfold Genv.symbol_address.
          rewrite symbols_preserved. rewrite H11. auto. }
    + (* MBbuiltin *)
      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
      assert (f0 = f) by congruence. subst f0.
      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
      eapply transf_function_no_overflow; eauto.
      generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
      remember (Ptrofs.add _ _) as ofs'.
      assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
      econstructor; eauto.
      monadInv TBC. monadInv TIC. inv H0.
      exploit builtin_args_match; eauto. intros [vargs' [P Q]].
      exploit external_call_mem_extends; eauto.
      intros [vres' [m2' [A [B [C D]]]]].
      repeat eexists. econstructor. erewrite <- sp_val by eauto.
      eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
      eapply external_call_symbols_preserved; eauto. apply senv_preserved. eauto.
      econstructor; eauto.
      unfold incrPC. rewrite Pregmap.gss.
      rewrite set_res_other. rewrite undef_regs_other. unfold Val.offset_ptr.
      rewrite PCeq.
      eauto. intros; simpl in *;
      destruct H as [HR14 | HDES]; subst; try discriminate;
      exploit list_in_map_inv; eauto; intros [mr [E F]]; subst;
      auto with asmgen. auto. apply agree_nextblock. eapply agree_set_res; auto.
      eapply agree_undef_regs; eauto. intros.
      apply undef_regs_other. intros. simpl in H1.
      destruct H1 as [HR14 | HDES]; subst. auto with asmgen.
      exploit list_in_map_inv; eauto. intros [mr [E F]]; subst.
      rewrite preg_notin_charact in H0. auto.
      intros. discriminate.
      (* RAIR14: MBbuiltin exit makes function_preserves_IR14 = false *)
      { intro HFPI. exfalso. specialize (BBIR14 HFPI). unfold bblock_preserves_IR14 in BBIR14.
        simpl in BBIR14. discriminate. }
    + (* MBgoto *)
      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
      inv TBC. inv TIC. inv H0.
      assert (f0 = f) by congruence. subst f0. assert (f1 = f) by congruence.
      subst f1. clear H9.
      remember (incrPC (Ptrofs.repr (size tbb)) rs2) as rs2'.
      exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND'.
      assert (tf = fn) by congruence. subst tf.
      exploit find_label_goto_label.
      eauto. eauto.
      instantiate (2 := rs2').
      subst. unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr.
      rewrite PCeq. eauto.
      eauto.
      intros (tc' & rs' & GOTO & AT2 & INV).
      assert (Hrs_eq: forall r : preg, r <> PC -> rs' r = rs2 r).
      { intros. rewrite Heqrs2' in INV.
        rewrite INV; unfold incrPC; try rewrite Pregmap.gso; auto. }
      eexists. eexists. repeat eexists. repeat split.
      econstructor; eauto.
      rewrite Heqrs2' in INV. unfold incrPC in INV.
      rewrite Heqrs2' in GOTO; simpl; eauto.
      econstructor; eauto.
      eapply agree_exten; eauto with asmgen. eauto with asmgen.
      intros; discriminate.
      intro HFPI; rewrite Hrs_eq by discriminate; exact (RAIR14 HFPI).
      eapply find_label_tail; eauto.
    + (* MBcond *)
      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
      inv TBC. inv TIC. inv H0. monadInv H1. monadInv EQ.
      * (* MBcond true *)
        assert (f0 = f) by congruence. subst f0.
        exploit eval_condition_lessdef.
        eapply preg_vals; eauto. all: eauto. intros EC.
        exploit (transl_cbranch_correct_gen tge fn). exact EQ0. apply EC.
        intros (rs' & cfi & wrap & CTLeq & ES & ECFI & WSTUCK & WNEXT & REGS).
        subst x0.
        exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
        assert (PCeq': rs2 PC = rs' PC). { inv ES; auto. match goal with H: exec_straight _ _ _ _ _ _ _ |- _ => erewrite <- exec_straight_pc; [|eexact H]; eauto end. }
        rewrite PCeq' in PCeq.
        assert (f1 = f) by congruence. subst f1.
        exploit find_label_goto_label.
        4: eapply H14. 1-2: eauto. instantiate (2 := (incrPC (Ptrofs.repr (size tbb)) rs')).
        unfold incrPC, Val.offset_ptr. rewrite PCeq. rewrite Pregmap.gss. eauto.
        intros (tc' & rs3 & GOTOL & TLPC & Hrs3).
        exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
        assert (tf = fn) by congruence. subst tf.
        (* goto_label preserves memory; GOTOL: goto_label ... = Next rs3 <mem> *)
        match type of GOTOL with
        | goto_label _ _ _ ?mem = Next _ _ =>
          destruct (WNEXT rs3 mem) as (rs_w & Hwrap_eq & Hwrap_data & Hwrap_PC & Hwrap_IR14)
        end.

        repeat eexists.
        rewrite <- BTC. simpl. rewrite app_nil_r. eauto.
        rewrite <- BTC. simpl. econstructor. rewrite ECFI. rewrite GOTOL. exact Hwrap_eq.
        econstructor; eauto.
        { rewrite Hwrap_PC. eauto. }
        eapply agree_exten with rs2; eauto with asmgen.
        { intros. rewrite Hwrap_data by auto. rewrite Hrs3; unfold incrPC. Simpl. apply PC_not_data_preg. auto. }
        intros; discriminate.
        { intro HFPI.
          specialize (BBIR14 HFPI). unfold bblock_preserves_IR14 in BBIR14. simpl in BBIR14.
          rewrite Hwrap_IR14. rewrite Hrs3 by discriminate.
          unfold incrPC. rewrite Pregmap.gso by discriminate.
          erewrite transl_cbranch_preserves_IR14; eauto. }
        eapply find_label_tail; eauto.
      * (* MBcond false *)
        assert (f0 = f) by congruence. subst f0. monadInv H1. monadInv EQ.
        exploit eval_condition_lessdef.
        eapply preg_vals; eauto.
        all: eauto.
        intros EC.
        exploit (transl_cbranch_correct_gen tge fn). exact EQ0. apply EC.
        intros (rs' & cfi & wrap & CTLeq & ES & ECFI & WSTUCK & WNEXT & REGS).
        subst x0.
        exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
        assert (PCeq': rs2 PC = rs' PC).
        { inv ES; auto. match goal with H: exec_straight _ _ _ _ _ _ _ |- _ => erewrite <- exec_straight_pc; [|eexact H]; eauto end. }
        rewrite PCeq' in PCeq.
        exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
        assert (tf = fn) by congruence. subst tf.
        assert (NOOV: size_blocks fn.(fn_blocks) <= Ptrofs.max_unsigned).
        eapply transf_function_no_overflow; eauto.
        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
        destruct (WNEXT (incrPC (Ptrofs.repr (size tbb)) rs') m2) as (rs_w & Hwrap_eq & Hwrap_data & Hwrap_PC & Hwrap_IR14).
        repeat eexists.
        rewrite <- BTC. simpl. rewrite app_nil_r. eauto.
        rewrite <- BTC. simpl. econstructor. rewrite ECFI. exact Hwrap_eq.
        econstructor; eauto.
        rewrite Hwrap_PC. unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr.
        rewrite PCeq. econstructor; eauto.
        eapply agree_exten with rs2; eauto with asmgen.
        { intros. rewrite Hwrap_data by auto. unfold incrPC. Simpl. }
        intros; discriminate.
        { intro HFPI.
          specialize (BBIR14 HFPI). unfold bblock_preserves_IR14 in BBIR14. simpl in BBIR14.
          rewrite Hwrap_IR14. unfold incrPC. rewrite Pregmap.gso by discriminate.
          erewrite transl_cbranch_preserves_IR14; eauto. }
    + (* MBjumptable *)
      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
      inv TBC. inv TIC. inv H0.
      assert (f0 = f) by congruence. subst f0.
      monadInv H1. monadInv EQ.
      generalize (transf_function_no_overflow _ _ TRANSF0); intro NOOV.
      assert (f1 = f) by congruence. subst f1.
      exploit find_label_goto_label. 4: eapply H14. 1-2: eauto.
      instantiate (2 := (incrPC (Ptrofs.repr (size tbb)) rs2) # IR14 <- Vundef).
      unfold incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. reflexivity.
      exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND3.
      assert (fn = tf) by congruence. subst fn.
      intros (tc' & rs' & A & B & C).
      exploit ireg_val; eauto. rewrite H11. intros LD; inv LD.
      repeat eexists. econstructor. simpl.
      unfold incrPC. rewrite Pregmap.gso; try discriminate. rewrite <- H1.
      unfold Mach.label in H12. unfold label. rewrite H12.
      eapply A.
      econstructor; eauto.
      eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen.
      { unfold incrPC. repeat Simpl; auto. }
      intros; discriminate.
      { intro HFPI. exfalso. specialize (BBIR14 HFPI). unfold bblock_preserves_IR14 in BBIR14.
        simpl in BBIR14. discriminate. }
      eapply find_label_tail; eauto.
    + (* MBreturn *)
      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
      inv TBC. inv TIC. inv H0.
      assert (f0 = f) by congruence. subst f0.
      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
      eapply transf_function_no_overflow; eauto.
      assert (f1 = f) by congruence. subst f1.
      exploit make_epilogue_correct. eexact H9. eexact H10. eexact H11. eauto. eauto. eauto. exact RAIR14.
      intros (rs1 & m1 & U & V & W & X & Y & Z).
      exploit exec_straight_body; eauto.
      simpl. eauto.
      intros EXEB. destruct EXEB as [l [MKEPI EXEB]].
      repeat eexists.
      rewrite app_nil_r in MKEPI. rewrite <- MKEPI in EXEB. eauto.
      econstructor. simpl. reflexivity.
      econstructor; eauto.
      unfold incrPC. repeat apply agree_set_other; auto with asmgen.
  - inv MCS. inv MAS. simpl in *. subst. inv Hpstate.
    destruct bb' as [hd' bdy' ex']; simpl in *. subst.
    monadInv TBC. monadInv TIC. simpl in *.
    assert (f0 = f) by congruence. subst f0.
    simpl. repeat eexists.
    econstructor. econstructor. 4: instantiate (3 := false). all:eauto.
      unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq.
      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
        eapply transf_function_no_overflow; eauto.
      econstructor; eauto.
      generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. eauto.
    eapply agree_exten; eauto. intros. unfold incrPC; Simpl; auto.
    discriminate.
Qed.

Lemma bbir14_cons:
  forall bi bdy bb,
  MB.body bb = bi :: bdy ->
  bblock_preserves_IR14 bb = true ->
  preserves_IR14 bi = true /\
  bblock_preserves_IR14 {| MB.header := nil; MB.body := bdy; MB.exit := MB.exit bb |} = true.
Proof.
  intros. unfold bblock_preserves_IR14 in *. rewrite H in H0. simpl in *.
  apply andb_true_iff in H0. destruct H0 as [H0 H1].
  apply andb_true_iff in H0. destruct H0 as [H0a H0b].
  split; auto. apply andb_true_iff. auto.
Qed.


Lemma val_add_int_repr_chain_vptr:
  Archi.ptr64 = false ->
  forall b o a c,
  Val.add (Val.add (Vptr b o) (Vint (Int.repr a))) (Vint (Int.repr c)) =
  Val.offset_ptr (Vptr b o) (Ptrofs.repr (a + c)).
Proof.
  intros HArchi b o a c. simpl.
  f_equal. rewrite Ptrofs.add_assoc. f_equal.
  unfold Ptrofs.of_int.
  rewrite Ptrofs.add_unsigned.
  apply Ptrofs.eqm_samerepr.
  apply Ptrofs.eqm_add;
    apply Ptrofs.eqm_unsigned_repr_l, Ptrofs.eqm_unsigned_repr_l, Ptrofs.eqm_refl.
Qed.

Lemma val_offset_ptr_lessdef_add_chain:
  Archi.ptr64 = false ->
  forall sp a c,
  Val.lessdef (Val.offset_ptr sp (Ptrofs.repr (a + c)))
              (Val.add (Val.add sp (Vint (Int.repr a))) (Vint (Int.repr c))).
Proof.
  intros HArchi sp a c. destruct sp;
    try (simpl; constructor).
  rewrite val_add_int_repr_chain_vptr by assumption. apply Val.lessdef_refl.
Qed.

Lemma store_callee_saves_int_to_multi_aux:
  forall (l: list mreg) (il: list ireg) (m_M m_A m_M': mem) (sp: val)
         (base_ofs delta: Z) (ms: Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  Archi.ptr64 = false ->
  (forall r, In r l -> mreg_type r = Tany32) ->
  mmap ireg_of l = OK il ->
  (4 | base_ofs) ->
  (4 | delta) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr base_ofs)) ->
  Mem.extends m_M m_A ->
  Mach.store_callee_saves m_M sp (base_ofs + delta) l ms = Some m_M' ->
  exists m_A',
    exec_store_multi_i (rs (DR (IR IR14))) delta
      (map (fun ir => (ir, AST.Many32)) il) rs m_A = Some m_A'
    /\ Mem.extends m_M' m_A'.
Proof.
  induction l as [|r l_rest IH]; intros il m_M m_A m_M' sp base_ofs delta ms rs
    HArchi Hty Hmmap Hbdiv Hddiv AG HIR14 MEXT HMach.
  - simpl in Hmmap. inv Hmmap. simpl in HMach. inv HMach.
    exists m_A. simpl. split; auto.
  - simpl in Hmmap. monadInv Hmmap.
    rename x into i. rename x0 into il'.
    cbn [Mach.store_callee_saves] in HMach.
    rewrite (Hty r) in HMach by (left; reflexivity).
    cbn [AST.typesize] in HMach.
    rewrite (Coqlib.align_same (base_ofs + delta) 4) in HMach
      by (auto using Z.divide_add_r; lia).
    unfold Mach.store_stack in HMach. cbn [chunk_of_type] in HMach.
    destruct (Mem.storev Many32 m_M (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta)))
                          (ms r)) eqn:HSV; try discriminate.
    rename m into m_M_inter.
    (* Asm side: storev at the matching address (lessdef) *)
    assert (HVal: Val.lessdef (ms r) (rs (DR (IR i)))).
    { pose proof (agree_mregs _ _ _ AG r) as HV.
      rewrite (ireg_of_eq _ _ EQ) in HV. exact HV. }
    assert (HAddr: Val.lessdef
              (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta)))
              (Val.add (rs (DR (IR IR14))) (Vint (Int.repr delta)))).
    { rewrite HIR14. apply val_offset_ptr_lessdef_add_chain. assumption. }
    edestruct (Mem.storev_extends _ _ _ _ _ _ _ _ MEXT HSV HAddr HVal)
      as [m_A_inter [HSV_A MEXT_inter]].
    (* IH on l_rest with delta+4 *)
    assert (Hty_rest : forall r0, In r0 l_rest -> mreg_type r0 = Tany32).
    { intros. apply Hty. right. assumption. }
    assert (Hddiv4 : (4 | delta + 4)).
    { apply Z.divide_add_r; auto. apply Z.divide_refl. }
    edestruct (IH il' m_M_inter m_A_inter m_M' sp base_ofs (delta + 4) ms rs
                HArchi Hty_rest EQ1 Hbdiv Hddiv4 AG HIR14 MEXT_inter)
      as [m_A' [EX_recurse EX_extends]].
    { replace (base_ofs + (delta + 4)) with (base_ofs + delta + 4) by lia. exact HMach. }
    exists m_A'. split; auto.
    (* Combine: exec_store_multi_i with one storev step + recursion *)
    cbn [exec_store_multi_i map].
    rewrite HSV_A. cbn [size_chunk] in EX_recurse |- *. exact EX_recurse.
Qed.

Lemma store_callee_saves_int_to_multi:
  forall (l: list mreg) (il: list ireg) (m_M m_A m_M': mem) (sp: val)
         (ofs: Z) (ms: Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  (forall r, In r l -> mreg_type r = Tany32) ->
  mmap ireg_of l = OK il ->
  (4 | ofs) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr ofs)) ->
  Mem.extends m_M m_A ->
  Mach.store_callee_saves m_M sp ofs l ms = Some m_M' ->
  exists m_A',
    exec_store_multi_i (rs (DR (IR IR14))) 0
      (map (fun ir => (ir, AST.Many32)) il) rs m_A = Some m_A'
    /\ Mem.extends m_M' m_A'.
Proof.
  intros l il m_M m_A m_M' sp ofs ms rs Hty Hmmap Hdiv AG HIR14 MEXT HMach.
  apply (store_callee_saves_int_to_multi_aux l il m_M m_A m_M' sp ofs 0 ms rs);
    auto; try (apply Z.divide_0_r).
  rewrite Z.add_0_r. exact HMach.
Qed.

Lemma store_callee_saves_fp_to_multi_aux:
  forall (l: list mreg) (fl: list freg) (m_M m_A m_M': mem) (sp: val)
         (base_ofs delta: Z) (ms: Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  Archi.ptr64 = false ->
  (forall r, In r l -> mreg_type r = Tany64) ->
  mmap freg_of l = OK fl ->
  (8 | base_ofs) ->
  (8 | delta) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr base_ofs)) ->
  Mem.extends m_M m_A ->
  Mach.store_callee_saves m_M sp (base_ofs + delta) l ms = Some m_M' ->
  exists m_A',
    exec_store_multi_f (rs (DR (IR IR14))) delta
      (map (fun fr => (fr, AST.Many64)) fl) rs m_A = Some m_A'
    /\ Mem.extends m_M' m_A'.
Proof.
  induction l as [|r l_rest IH]; intros fl m_M m_A m_M' sp base_ofs delta ms rs
    HArchi Hty Hmmap Hbdiv Hddiv AG HIR14 MEXT HMach.
  - simpl in Hmmap. inv Hmmap. simpl in HMach. inv HMach.
    exists m_A. simpl. split; auto.
  - simpl in Hmmap. monadInv Hmmap.
    rename x into f. rename x0 into fl'.
    cbn [Mach.store_callee_saves] in HMach.
    rewrite (Hty r) in HMach by (left; reflexivity).
    cbn [AST.typesize] in HMach.
    rewrite (Coqlib.align_same (base_ofs + delta) 8) in HMach
      by (auto using Z.divide_add_r; lia).
    unfold Mach.store_stack in HMach. cbn [chunk_of_type] in HMach.
    destruct (Mem.storev Many64 m_M (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta)))
                          (ms r)) eqn:HSV; try discriminate.
    rename m into m_M_inter.
    assert (HVal: Val.lessdef (ms r) (rs (DR (FR f)))).
    { pose proof (agree_mregs _ _ _ AG r) as HV.
      rewrite (freg_of_eq _ _ EQ) in HV. exact HV. }
    assert (HAddr: Val.lessdef
              (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta)))
              (Val.add (rs (DR (IR IR14))) (Vint (Int.repr delta)))).
    { rewrite HIR14. apply val_offset_ptr_lessdef_add_chain. assumption. }
    edestruct (Mem.storev_extends _ _ _ _ _ _ _ _ MEXT HSV HAddr HVal)
      as [m_A_inter [HSV_A MEXT_inter]].
    assert (Hty_rest : forall r0, In r0 l_rest -> mreg_type r0 = Tany64).
    { intros. apply Hty. right. assumption. }
    assert (Hddiv8 : (8 | delta + 8)).
    { apply Z.divide_add_r; auto. apply Z.divide_refl. }
    edestruct (IH fl' m_M_inter m_A_inter m_M' sp base_ofs (delta + 8) ms rs
                HArchi Hty_rest EQ1 Hbdiv Hddiv8 AG HIR14 MEXT_inter)
      as [m_A' [EX_recurse EX_extends]].
    { replace (base_ofs + (delta + 8)) with (base_ofs + delta + 8) by lia. exact HMach. }
    exists m_A'. split; auto.
    cbn [exec_store_multi_f map].
    rewrite HSV_A. cbn [size_chunk] in EX_recurse |- *. exact EX_recurse.
Qed.

Lemma store_callee_saves_fp_to_multi:
  forall (l: list mreg) (fl: list freg) (m_M m_A m_M': mem) (sp: val)
         (ofs: Z) (ms: Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  (forall r, In r l -> mreg_type r = Tany64) ->
  mmap freg_of l = OK fl ->
  (8 | ofs) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr ofs)) ->
  Mem.extends m_M m_A ->
  Mach.store_callee_saves m_M sp ofs l ms = Some m_M' ->
  exists m_A',
    exec_store_multi_f (rs (DR (IR IR14))) 0
      (map (fun fr => (fr, AST.Many64)) fl) rs m_A = Some m_A'
    /\ Mem.extends m_M' m_A'.
Proof.
  intros l fl m_M m_A m_M' sp ofs ms rs Hty Hmmap Hdiv AG HIR14 MEXT HMach.
  apply (store_callee_saves_fp_to_multi_aux l fl m_M m_A m_M' sp ofs 0 ms rs);
    auto; try (apply Z.divide_0_r).
  rewrite Z.add_0_r. exact HMach.
Qed.

Lemma load_callee_saves_int_to_multi_aux:
  forall (l: list mreg) (il: list ireg) (m_M m_A: mem) (sp: val)
         (base_ofs delta: Z) (ms ms': Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  Archi.ptr64 = false ->
  (forall r, In r l -> mreg_type r = Tany32) ->
  mmap ireg_of l = OK il ->
  (4 | base_ofs) ->
  (4 | delta) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr base_ofs)) ->
  Mem.extends m_M m_A ->
  Mach.load_callee_saves m_M sp (base_ofs + delta) l ms = Some ms' ->
  exists rs',
    exec_load_multi_i (rs (DR (IR IR14))) delta
      (map (fun ir => (ir, AST.Many32)) il) rs m_A = Some rs'
    /\ agree ms' sp rs'.
Proof.
  induction l as [|r l_rest IH]; intros il m_M m_A sp base_ofs delta ms ms' rs
    HArchi Hty Hmmap Hbdiv Hddiv AG HIR14 MEXT HMach.
  - simpl in Hmmap. inv Hmmap. simpl in HMach. inv HMach.
    exists rs. simpl. split; auto.
  - simpl in Hmmap. monadInv Hmmap.
    rename x into i. rename x0 into il'.
    cbn [Mach.load_callee_saves] in HMach.
    rewrite (Hty r) in HMach by (left; reflexivity).
    cbn [AST.typesize] in HMach.
    rewrite (Coqlib.align_same (base_ofs + delta) 4) in HMach
      by (auto using Z.divide_add_r; lia).
    unfold Mach.load_stack in HMach. cbn [chunk_of_type] in HMach.
    destruct (Mem.loadv Many32 m_M (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta))))
             as [v_M|] eqn:HLV; try discriminate.
    assert (HAddr: Val.lessdef
              (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta)))
              (Val.add (rs (DR (IR IR14))) (Vint (Int.repr delta)))).
    { rewrite HIR14. apply val_offset_ptr_lessdef_add_chain. assumption. }
    edestruct (Mem.loadv_extends _ _ _ _ _ _ MEXT HLV HAddr)
      as [v_A [HLV_A HVlessdef]].
    set (rs_new := rs # (IR i) <- v_A).
    assert (AG_new : agree (Mach.Regmap.set r v_M ms) sp rs_new).
    { unfold rs_new. rewrite <- (ireg_of_eq _ _ EQ).
      apply agree_set_mreg_parallel; assumption. }
    assert (HIR14_new : rs_new (DR (IR IR14)) = Val.add sp (Vint (Int.repr base_ofs))).
    { unfold rs_new. rewrite Pregmap.gso by
        (apply not_eq_sym; apply (ireg_of_not_R14' _ _ EQ)).
      exact HIR14. }
    assert (Hty_rest : forall r0, In r0 l_rest -> mreg_type r0 = Tany32).
    { intros. apply Hty. right. assumption. }
    assert (Hddiv4 : (4 | delta + 4)).
    { apply Z.divide_add_r; auto. apply Z.divide_refl. }
    edestruct (IH il' m_M m_A sp base_ofs (delta + 4) (Mach.Regmap.set r v_M ms) ms' rs_new
                HArchi Hty_rest EQ1 Hbdiv Hddiv4 AG_new HIR14_new MEXT)
      as [rs' [EX_recurse AG_final]].
    { replace (base_ofs + (delta + 4)) with (base_ofs + delta + 4) by lia. exact HMach. }
    exists rs'. split; auto.
    cbn [exec_load_multi_i map].
    rewrite HLV_A. cbn [size_chunk] in EX_recurse |- *.
    replace (rs (DR (IR IR14))) with (rs_new (DR (IR IR14)))
      by (rewrite HIR14_new; rewrite HIR14; reflexivity).
    exact EX_recurse.
Qed.

Lemma load_callee_saves_int_to_multi:
  forall (l: list mreg) (il: list ireg) (m_M m_A: mem) (sp: val)
         (ofs: Z) (ms ms': Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  (forall r, In r l -> mreg_type r = Tany32) ->
  mmap ireg_of l = OK il ->
  (4 | ofs) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr ofs)) ->
  Mem.extends m_M m_A ->
  Mach.load_callee_saves m_M sp ofs l ms = Some ms' ->
  exists rs',
    exec_load_multi_i (rs (DR (IR IR14))) 0
      (map (fun ir => (ir, AST.Many32)) il) rs m_A = Some rs'
    /\ agree ms' sp rs'.
Proof.
  intros l il m_M m_A sp ofs ms ms' rs Hty Hmmap Hdiv AG HIR14 MEXT HMach.
  apply (load_callee_saves_int_to_multi_aux l il m_M m_A sp ofs 0 ms ms' rs);
    auto; try (apply Z.divide_0_r).
  rewrite Z.add_0_r. exact HMach.
Qed.

Lemma load_callee_saves_fp_to_multi_aux:
  forall (l: list mreg) (fl: list freg) (m_M m_A: mem) (sp: val)
         (base_ofs delta: Z) (ms ms': Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  Archi.ptr64 = false ->
  (forall r, In r l -> mreg_type r = Tany64) ->
  mmap freg_of l = OK fl ->
  (8 | base_ofs) ->
  (8 | delta) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr base_ofs)) ->
  Mem.extends m_M m_A ->
  Mach.load_callee_saves m_M sp (base_ofs + delta) l ms = Some ms' ->
  exists rs',
    exec_load_multi_f (rs (DR (IR IR14))) delta
      (map (fun fr => (fr, AST.Many64)) fl) rs m_A = Some rs'
    /\ agree ms' sp rs'.
Proof.
  induction l as [|r l_rest IH]; intros fl m_M m_A sp base_ofs delta ms ms' rs
    HArchi Hty Hmmap Hbdiv Hddiv AG HIR14 MEXT HMach.
  - simpl in Hmmap. inv Hmmap. simpl in HMach. inv HMach.
    exists rs. simpl. split; auto.
  - simpl in Hmmap. monadInv Hmmap.
    rename x into f. rename x0 into fl'.
    cbn [Mach.load_callee_saves] in HMach.
    rewrite (Hty r) in HMach by (left; reflexivity).
    cbn [AST.typesize] in HMach.
    rewrite (Coqlib.align_same (base_ofs + delta) 8) in HMach
      by (auto using Z.divide_add_r; lia).
    unfold Mach.load_stack in HMach. cbn [chunk_of_type] in HMach.
    destruct (Mem.loadv Many64 m_M (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta))))
             as [v_M|] eqn:HLV; try discriminate.
    assert (HAddr: Val.lessdef
              (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta)))
              (Val.add (rs (DR (IR IR14))) (Vint (Int.repr delta)))).
    { rewrite HIR14. apply val_offset_ptr_lessdef_add_chain. assumption. }
    edestruct (Mem.loadv_extends _ _ _ _ _ _ MEXT HLV HAddr)
      as [v_A [HLV_A HVlessdef]].
    set (rs_new := rs # (FR f) <- v_A).
    assert (AG_new : agree (Mach.Regmap.set r v_M ms) sp rs_new).
    { unfold rs_new. rewrite <- (freg_of_eq _ _ EQ).
      apply agree_set_mreg_parallel; assumption. }
    assert (HIR14_new : rs_new (DR (IR IR14)) = Val.add sp (Vint (Int.repr base_ofs))).
    { unfold rs_new. rewrite Pregmap.gso by discriminate. exact HIR14. }
    assert (Hty_rest : forall r0, In r0 l_rest -> mreg_type r0 = Tany64).
    { intros. apply Hty. right. assumption. }
    assert (Hddiv8 : (8 | delta + 8)).
    { apply Z.divide_add_r; auto. apply Z.divide_refl. }
    edestruct (IH fl' m_M m_A sp base_ofs (delta + 8) (Mach.Regmap.set r v_M ms) ms' rs_new
                HArchi Hty_rest EQ1 Hbdiv Hddiv8 AG_new HIR14_new MEXT)
      as [rs' [EX_recurse AG_final]].
    { replace (base_ofs + (delta + 8)) with (base_ofs + delta + 8) by lia. exact HMach. }
    exists rs'. split; auto.
    cbn [exec_load_multi_f map].
    rewrite HLV_A. cbn [size_chunk] in EX_recurse |- *.
    replace (rs (DR (IR IR14))) with (rs_new (DR (IR IR14)))
      by (rewrite HIR14_new; rewrite HIR14; reflexivity).
    exact EX_recurse.
Qed.

Lemma load_callee_saves_fp_to_multi:
  forall (l: list mreg) (fl: list freg) (m_M m_A: mem) (sp: val)
         (ofs: Z) (ms ms': Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  (forall r, In r l -> mreg_type r = Tany64) ->
  mmap freg_of l = OK fl ->
  (8 | ofs) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr ofs)) ->
  Mem.extends m_M m_A ->
  Mach.load_callee_saves m_M sp ofs l ms = Some ms' ->
  exists rs',
    exec_load_multi_f (rs (DR (IR IR14))) 0
      (map (fun fr => (fr, AST.Many64)) fl) rs m_A = Some rs'
    /\ agree ms' sp rs'.
Proof.
  intros l fl m_M m_A sp ofs ms ms' rs Hty Hmmap Hdiv AG HIR14 MEXT HMach.
  apply (load_callee_saves_fp_to_multi_aux l fl m_M m_A sp ofs 0 ms ms' rs);
    auto; try (apply Z.divide_0_r).
  rewrite Z.add_0_r. exact HMach.
Qed.


Lemma ireg_of_type:
  forall r r', ireg_of r = OK r' -> mreg_type r = Tany32.
Proof.
  unfold ireg_of, preg_of, dreg_of. intros. destruct r; inv H; reflexivity.
Qed.

Lemma freg_of_type:
  forall r r', freg_of r = OK r' -> mreg_type r = Tany64.
Proof.
  unfold freg_of, preg_of, dreg_of. intros. destruct r; inv H; reflexivity.
Qed.

Lemma mmap_ireg_of_type:
  forall l il, mmap ireg_of l = OK il ->
  forall r, In r l -> mreg_type r = Tany32.
Proof.
  induction l; simpl; intros il MM r0 IN.
  - contradiction.
  - monadInv MM. destruct IN as [HE | HI].
    + subst. eapply ireg_of_type; eauto.
    + eapply IHl; eauto.
Qed.

Lemma mmap_freg_of_type:
  forall l fl, mmap freg_of l = OK fl ->
  forall r, In r l -> mreg_type r = Tany64.
Proof.
  induction l; simpl; intros fl MM r0 IN.
  - contradiction.
  - monadInv MM. destruct IN as [HE | HI].
    + subst. eapply freg_of_type; eauto.
    + eapply IHl; eauto.
Qed.


Lemma store_callee_saves_align_first_int:
  forall m sp ofs r l_rest ms,
  mreg_type r = Tany32 ->
  Mach.store_callee_saves m sp ofs (r :: l_rest) ms =
  Mach.store_callee_saves m sp (align ofs 4) (r :: l_rest) ms.
Proof.
  intros. cbn [Mach.store_callee_saves]. rewrite H. cbn [AST.typesize].
  rewrite (Coqlib.align_same (align ofs 4) 4); auto.
  - lia.
  - apply Coqlib.align_divides; lia.
Qed.

Lemma store_callee_saves_align_first_fp:
  forall m sp ofs r l_rest ms,
  mreg_type r = Tany64 ->
  Mach.store_callee_saves m sp ofs (r :: l_rest) ms =
  Mach.store_callee_saves m sp (align ofs 8) (r :: l_rest) ms.
Proof.
  intros. cbn [Mach.store_callee_saves]. rewrite H. cbn [AST.typesize].
  rewrite (Coqlib.align_same (align ofs 8) 8); auto.
  - lia.
  - apply Coqlib.align_divides; lia.
Qed.

Lemma load_callee_saves_align_first_int:
  forall m sp ofs r l_rest ms,
  mreg_type r = Tany32 ->
  Mach.load_callee_saves m sp ofs (r :: l_rest) ms =
  Mach.load_callee_saves m sp (align ofs 4) (r :: l_rest) ms.
Proof.
  intros. cbn [Mach.load_callee_saves]. rewrite H. cbn [AST.typesize].
  rewrite (Coqlib.align_same (align ofs 4) 4); auto.
  - lia.
  - apply Coqlib.align_divides; lia.
Qed.

Lemma load_callee_saves_align_first_fp:
  forall m sp ofs r l_rest ms,
  mreg_type r = Tany64 ->
  Mach.load_callee_saves m sp ofs (r :: l_rest) ms =
  Mach.load_callee_saves m sp (align ofs 8) (r :: l_rest) ms.
Proof.
  intros. cbn [Mach.load_callee_saves]. rewrite H. cbn [AST.typesize].
  rewrite (Coqlib.align_same (align ofs 8) 8); auto.
  - lia.
  - apply Coqlib.align_divides; lia.
Qed.

Well-formedness discharge for the new stm_iregs_wf/ldm_iregs_wf/ vstm_fregs_wf/vldm_fregs_wf gates added to Asmblock.exec_load/ exec_store. The chunk-uniformity components are trivial because the lowering transl_savecallee/transl_restorecallee hard-codes Many32/Many64. The sortedness/contiguity components rely on the source list being sorted in IndexedMreg.index order — a fact established by Stacking via RegSet.elements but not currently threaded through the simulation. Those discharges are admitted here pending a follow-up that lifts the sortedness invariant to step_simu_basic.


Lemma stm_iregs_chunks_ok_const_Many32:
  forall il, Asm.stm_iregs_chunks_ok (map (fun ir : ireg => (ir, AST.Many32)) il) = true.
Proof.
  induction il; simpl; auto.
Qed.

Lemma vstm_fregs_chunks_ok_const_Many64:
  forall fl, Asm.vstm_fregs_chunks_ok (map (fun fr : freg => (fr, AST.Many64)) fl) = true.
Proof.
  induction fl; simpl; auto.
Qed.

fregs_contiguous fl = true gives strict consecutive freg_indexes, matching exactly what vstm_fregs_sorted (-1) requires.
Lemma vstm_fregs_sorted_of_contiguous:
  forall fl,
  fregs_contiguous fl = true ->
  Asm.vstm_fregs_sorted (-1) (map (fun fr : freg => (fr, AST.Many64)) fl) = true.
Proof.
  assert (HELPER: forall fl prev,
    match fl with
    | nil => True
    | f :: _ => Z.ltb prev 0 = true \/ Z.eqb (prev + 1) (Asm.freg_index f) = true
    end ->
    fregs_contiguous fl = true ->
    Asm.vstm_fregs_sorted prev (map (fun fr : freg => (fr, AST.Many64)) fl) = true).
  { induction fl as [|f tl IH]; intros prev Hstart Hcont; simpl; auto.
    assert (HHEAD: (Z.ltb prev 0 || Z.eqb (prev + 1) (Asm.freg_index f)) = true).
    { destruct Hstart as [H|H]; [rewrite H | rewrite H, Bool.orb_true_r]; reflexivity. }
    rewrite HHEAD. cbn.
    apply IH.
    - destruct tl as [|f2 tl']; auto.
      simpl in Hcont. apply andb_prop in Hcont. destruct Hcont as [Hpair _].
      right. apply Z.eqb_eq in Hpair. apply Z.eqb_eq. lia.
    - simpl in Hcont. destruct tl; auto. apply andb_prop in Hcont; tauto. }
  intros fl Hcont. apply HELPER; auto.
  destruct fl; [exact I | left; reflexivity].
Qed.

Sortedness of the ireg list constructed by mmap ireg_of in transl_savecallee/transl_restorecallee. Requires that the source l: list mreg is sorted in IndexedMreg.index order; that hypothesis arrives via the Mach/Machblock per-inst precondition on Msavecallee/Mrestorecallee (established by Stacking from used_callee_save_sorted).
Lemma stm_iregs_sorted_of_mmap:
  forall l il,
  Bounds.mregs_strictly_ascending l = true ->
  mmap ireg_of l = OK il ->
  Asm.stm_iregs_sorted (-1) (map (fun ir : ireg => (ir, AST.Many32)) il) = true.
Proof.
  assert (HELPER: forall l il prev,
    Bounds.mregs_strictly_ascending l = true ->
    mmap ireg_of l = OK il ->
    match il with
    | nil => True
    | ir :: _ => Z.ltb prev (Asm.ireg_index ir) = true
    end ->
    Asm.stm_iregs_sorted prev (map (fun ir : ireg => (ir, AST.Many32)) il) = true).
  { induction l as [|r l_rest IH]; intros il prev HSORT MM Hstart; simpl in MM.
    - inv MM. simpl. reflexivity.
    - monadInv MM. simpl. rewrite Hstart. simpl.
      apply IH.
      + eapply Bounds.mregs_strictly_ascending_tail; eauto.
      + exact EQ1.
      + destruct x0 as [|ir2 rest']; [exact I|].
        destruct l_rest as [|r2 rest_l]; [simpl in EQ1; discriminate|].
        simpl in EQ1. monadInv EQ1.
        simpl in HSORT. apply andb_prop in HSORT. destruct HSORT as [Hlt _].
        apply Pos.ltb_lt in Hlt.
        unfold ireg_of, preg_of, dreg_of in EQ, EQ0.
        destruct r, r2; inv EQ; inv EQ0; cbn in Hlt |- *;
          try (exfalso; lia); reflexivity. }
  intros l il HSORT MM. apply HELPER with (l := l); auto.
  destruct il as [|ir tl]; [exact I|].
  destruct l; [discriminate|]. simpl in MM. monadInv MM.
  unfold ireg_of, preg_of, dreg_of in EQ. destruct m; inv EQ; cbn; reflexivity.
Qed.

Lemma ireg_of_not_IR14:
  forall m r, ireg_of m = OK r -> r <> IR14.
Proof.
  intros m r H. pose proof (Asmblockgenproof1.ireg_of_not_R14 _ _ H) as Hne.
  intro Heq. apply Hne. rewrite Heq. reflexivity.
Qed.

Lemma ldm_iregs_no_base_IR14_of_mmap:
  forall l il,
  mmap ireg_of l = OK il ->
  Asm.stm_iregs_no_base IR14 (map (fun ir : ireg => (ir, AST.Many32)) il) = true.
Proof.
  induction l as [|r l_rest IH]; intros il MM; simpl in MM.
  - inv MM. reflexivity.
  - monadInv MM.
    pose proof (ireg_of_not_IR14 _ _ EQ) as Hne.
    cbn [map Asm.stm_iregs_no_base].
    apply Bool.andb_true_iff. split.
    + apply Bool.negb_true_iff. apply Bool.not_true_iff_false.
      intro Heq. apply Hne. symmetry. apply (proj_sumbool_true _ Heq).
    + eapply IH; eauto.
Qed.

Convenience corollaries packaging the conjuncts of the _wf booleans.
Lemma stm_iregs_wf_callee_saves_int:
  forall l il,
  Bounds.mregs_strictly_ascending l = true ->
  mmap ireg_of l = OK il ->
  Asm.stm_iregs_wf IR14 (map (fun ir : ireg => (ir, AST.Many32)) il) = true.
Proof.
  intros. unfold Asm.stm_iregs_wf.
  rewrite stm_iregs_chunks_ok_const_Many32, Bool.andb_true_r.
  eapply stm_iregs_sorted_of_mmap; eauto.
Qed.

Lemma ldm_iregs_wf_callee_saves_int:
  forall l il,
  Bounds.mregs_strictly_ascending l = true ->
  mmap ireg_of l = OK il ->
  Asm.ldm_iregs_wf IR14 (map (fun ir : ireg => (ir, AST.Many32)) il) = true.
Proof.
  intros. unfold Asm.ldm_iregs_wf.
  rewrite (stm_iregs_wf_callee_saves_int _ _ H H0), Bool.andb_true_l.
  eapply ldm_iregs_no_base_IR14_of_mmap; eauto.
Qed.

Lemma vstm_fregs_wf_contiguous_callee_saves:
  forall fl,
  fregs_contiguous fl = true ->
  Asm.vstm_fregs_wf (map (fun fr : freg => (fr, AST.Many64)) fl) = true.
Proof.
  intros. unfold Asm.vstm_fregs_wf.
  rewrite vstm_fregs_chunks_ok_const_Many64, Bool.andb_true_r.
  eapply vstm_fregs_sorted_of_contiguous; eauto.
Qed.

Lemma vldm_fregs_wf_contiguous_callee_saves:
  forall fl,
  fregs_contiguous fl = true ->
  Asm.vldm_fregs_wf (map (fun fr : freg => (fr, AST.Many64)) fl) = true.
Proof.
unfold Asm.vldm_fregs_wf. apply vstm_fregs_wf_contiguous_callee_saves. Qed.

Per-register fp save/restore correctness for the non-contiguous fallback path emitted by transl_savecallee/transl_restorecallee when fregs_contiguous fl = false. These mirror the store/load_callee_saves_fp_to_multi lemmas but target the store_fp_per_reg/load_fp_per_reg expansions instead of Pcvstm/Pcvldm.


Lemma store_callee_saves_fp_to_per_reg_aux:
  forall (l: list mreg) (fl: list freg) (m_M m_A m_M': mem) (sp: val)
         (base_ofs delta: Z) (ms: Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  Archi.ptr64 = false ->
  (forall r, In r l -> mreg_type r = Tany64) ->
  mmap freg_of l = OK fl ->
  (8 | base_ofs) ->
  (8 | delta) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr base_ofs)) ->
  Mem.extends m_M m_A ->
  Mach.store_callee_saves m_M sp (base_ofs + delta) l ms = Some m_M' ->
  exists m_A',
    exec_body tge (store_fp_per_reg fl delta nil) rs m_A = Next rs m_A'
    /\ Mem.extends m_M' m_A'.
Proof.
  induction l as [|r l_rest IH]; intros fl m_M m_A m_M' sp base_ofs delta ms rs
    HArchi Hty Hmmap Hbdiv Hddiv AG HIR14 MEXT HMach.
  - simpl in Hmmap. inv Hmmap. simpl in HMach. inv HMach.
    exists m_A. simpl. split; auto.
  - simpl in Hmmap. monadInv Hmmap.
    rename x into f. rename x0 into fl'.
    cbn [Mach.store_callee_saves] in HMach.
    rewrite (Hty r) in HMach by (left; reflexivity).
    cbn [AST.typesize] in HMach.
    rewrite (Coqlib.align_same (base_ofs + delta) 8) in HMach
      by (auto using Z.divide_add_r; lia).
    unfold Mach.store_stack in HMach. cbn [chunk_of_type] in HMach.
    destruct (Mem.storev Many64 m_M (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta)))
                          (ms r)) eqn:HSV; try discriminate.
    rename m into m_M_inter.
    assert (HVal: Val.lessdef (ms r) (rs (DR (FR f)))).
    { pose proof (agree_mregs _ _ _ AG r) as HV.
      rewrite (freg_of_eq _ _ EQ) in HV. exact HV. }
    assert (HAddr: Val.lessdef
              (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta)))
              (Val.add (rs (DR (IR IR14))) (Vint (Int.repr delta)))).
    { rewrite HIR14. apply val_offset_ptr_lessdef_add_chain. assumption. }
    edestruct (Mem.storev_extends _ _ _ _ _ _ _ _ MEXT HSV HAddr HVal)
      as [m_A_inter [HSV_A MEXT_inter]].
    assert (Hty_rest : forall r0, In r0 l_rest -> mreg_type r0 = Tany64).
    { intros. apply Hty. right. assumption. }
    assert (Hddiv8 : (8 | delta + 8)).
    { apply Z.divide_add_r; auto. apply Z.divide_refl. }
    edestruct (IH fl' m_M_inter m_A_inter m_M' sp base_ofs (delta + 8) ms rs
                HArchi Hty_rest EQ1 Hbdiv Hddiv8 AG HIR14 MEXT_inter)
      as [m_A' [EX_recurse EX_extends]].
    { replace (base_ofs + (delta + 8)) with (base_ofs + delta + 8) by lia. exact HMach. }
    exists m_A'. split; auto.
    cbn [store_fp_per_reg exec_body].
    cbn [exec_basic exec_store chunk_stf].
    unfold exec_store_aux. rewrite HSV_A. exact EX_recurse.
Qed.

Lemma store_callee_saves_fp_to_per_reg:
  forall (l: list mreg) (fl: list freg) (m_M m_A m_M': mem) (sp: val)
         (ofs: Z) (ms: Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  (forall r, In r l -> mreg_type r = Tany64) ->
  mmap freg_of l = OK fl ->
  (8 | ofs) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr ofs)) ->
  Mem.extends m_M m_A ->
  Mach.store_callee_saves m_M sp ofs l ms = Some m_M' ->
  exists m_A',
    exec_body tge (store_fp_per_reg fl 0 nil) rs m_A = Next rs m_A'
    /\ Mem.extends m_M' m_A'.
Proof.
  intros l fl m_M m_A m_M' sp ofs ms rs Hty Hmmap Hdiv AG HIR14 MEXT HMach.
  apply (store_callee_saves_fp_to_per_reg_aux l fl m_M m_A m_M' sp ofs 0 ms rs);
    auto; try (apply Z.divide_0_r).
  rewrite Z.add_0_r. exact HMach.
Qed.

Lemma load_callee_saves_fp_to_per_reg_aux:
  forall (l: list mreg) (fl: list freg) (m_M m_A: mem) (sp: val)
         (base_ofs delta: Z) (ms ms': Mach.regset)
         (rs: Asmblockgenproof1.AB.regset),
  Archi.ptr64 = false ->
  (forall r, In r l -> mreg_type r = Tany64) ->
  mmap freg_of l = OK fl ->
  (8 | base_ofs) ->
  (8 | delta) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr base_ofs)) ->
  Mem.extends m_M m_A ->
  Mach.load_callee_saves m_M sp (base_ofs + delta) l ms = Some ms' ->
  exists rs',
    exec_body tge (load_fp_per_reg fl delta nil) rs m_A = Next rs' m_A
    /\ agree ms' sp rs'
    /\ rs' (DR (IR IR14)) = rs (DR (IR IR14)).
Proof.
  induction l as [|r l_rest IH]; intros fl m_M m_A sp base_ofs delta ms ms' rs
    HArchi Hty Hmmap Hbdiv Hddiv AG HIR14 MEXT HMach.
  - simpl in Hmmap. inv Hmmap. simpl in HMach. inv HMach.
    exists rs. simpl. split; [reflexivity|]. split; [exact AG | reflexivity].
  - simpl in Hmmap. monadInv Hmmap.
    rename x into f. rename x0 into fl'.
    cbn [Mach.load_callee_saves] in HMach.
    rewrite (Hty r) in HMach by (left; reflexivity).
    cbn [AST.typesize] in HMach.
    rewrite (Coqlib.align_same (base_ofs + delta) 8) in HMach
      by (auto using Z.divide_add_r; lia).
    unfold Mach.load_stack in HMach. cbn [chunk_of_type] in HMach.
    destruct (Mem.loadv Many64 m_M (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta))))
             as [v_M|] eqn:HLV; try discriminate.
    assert (HAddr: Val.lessdef
              (Val.offset_ptr sp (Ptrofs.repr (base_ofs + delta)))
              (Val.add (rs (DR (IR IR14))) (Vint (Int.repr delta)))).
    { rewrite HIR14. apply val_offset_ptr_lessdef_add_chain. assumption. }
    edestruct (Mem.loadv_extends _ _ _ _ _ _ MEXT HLV HAddr)
      as [v_A [HLV_A HVlessdef]].
    set (rs_new := rs # (FR f) <- v_A).
    assert (AG_new : agree (Mach.Regmap.set r v_M ms) sp rs_new).
    { unfold rs_new. rewrite <- (freg_of_eq _ _ EQ).
      apply agree_set_mreg_parallel; assumption. }
    assert (HIR14_new : rs_new (DR (IR IR14)) = Val.add sp (Vint (Int.repr base_ofs))).
    { unfold rs_new. rewrite Pregmap.gso by discriminate. exact HIR14. }
    assert (Hty_rest : forall r0, In r0 l_rest -> mreg_type r0 = Tany64).
    { intros. apply Hty. right. assumption. }
    assert (Hddiv8 : (8 | delta + 8)).
    { apply Z.divide_add_r; auto. apply Z.divide_refl. }
    edestruct (IH fl' m_M m_A sp base_ofs (delta + 8) (Mach.Regmap.set r v_M ms) ms' rs_new
                HArchi Hty_rest EQ1 Hbdiv Hddiv8 AG_new HIR14_new MEXT)
      as [rs' [EX_recurse [AG_final HIR14_final]]].
    { replace (base_ofs + (delta + 8)) with (base_ofs + delta + 8) by lia. exact HMach. }
    exists rs'. split; [|split].
    + cbn [load_fp_per_reg exec_body].
      cbn [exec_basic exec_load chunk_ldf].
      unfold exec_load_aux. rewrite HLV_A. exact EX_recurse.
    + exact AG_final.
    + rewrite HIR14_final. unfold rs_new. rewrite Pregmap.gso by discriminate.
      reflexivity.
Qed.

Lemma load_callee_saves_fp_to_per_reg:
  forall (l: list mreg) (fl: list freg) (m_M m_A: mem) (sp: val)
         (ofs: Z) (ms ms': Mach.regset) (rs: Asmblockgenproof1.AB.regset),
  (forall r, In r l -> mreg_type r = Tany64) ->
  mmap freg_of l = OK fl ->
  (8 | ofs) ->
  agree ms sp rs ->
  rs (DR (IR IR14)) = Val.add sp (Vint (Int.repr ofs)) ->
  Mem.extends m_M m_A ->
  Mach.load_callee_saves m_M sp ofs l ms = Some ms' ->
  exists rs',
    exec_body tge (load_fp_per_reg fl 0 nil) rs m_A = Next rs' m_A
    /\ agree ms' sp rs'
    /\ rs' (DR (IR IR14)) = rs (DR (IR IR14)).
Proof.
  intros l fl m_M m_A sp ofs ms ms' rs Hty Hmmap Hdiv AG HIR14 MEXT HMach.
  apply (load_callee_saves_fp_to_per_reg_aux l fl m_M m_A sp ofs 0 ms ms' rs);
    auto; try (apply Z.divide_0_r).
  rewrite Z.add_0_r. exact HMach.
Qed.

Lemma exec_body_trans:
  forall l l' rs0 m0 rs1 m1 rs2 m2,
  exec_body tge l rs0 m0 = Next rs1 m1 ->
  exec_body tge l' rs1 m1 = Next rs2 m2 ->
  exec_body tge (l++l') rs0 m0 = Next rs2 m2.
Proof.
  induction l.
  - simpl. induction l'. intros.
    + simpl in *. congruence.
    + intros. inv H. auto.
  - intros until m2. intros EXEB1 EXEB2.
    inv EXEB1. destruct (exec_basic _) eqn:EBI; try discriminate.
    simpl. rewrite EBI. eapply IHl; eauto.
Qed.

Theorem step_simu_basic:
  forall bb bb' s fb sp c ms m rs1 m1 ms' m' bi cs1 tbdy bdy,
  MB.header bb = nil -> MB.body bb = bi::(bdy) ->
  bb' = {| MB.header := nil; MB.body := bdy; MB.exit := MB.exit bb |} ->
  basic_step ge s fb sp ms m bi ms' m' ->
  pstate cs1 = (State rs1 m1) -> pbody1 cs1 = tbdy ->
  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
  (exists rs2 m2 l cs2 tbdy',
       cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := tbdy'; pbody2 := pbody2 cs1;
                pctl := pctl cs1; ep := it1_is_parent (ep cs1) bi; rem := rem cs1; cur := cur cs1 |}
    /\ tbdy = l ++ tbdy'
    /\ exec_body tge l rs1 m1 = Next rs2 m2
    /\ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2).
Proof.
  intros until bdy. intros Hheader Hbody Hbb' BSTEP Hpstate Hpbody1 MCS. inv MCS.
  simpl in *. inv Hpstate.
  rewrite Hbody in TBC. monadInv TBC.
  pose proof EQ0 as TIB.
  inv BSTEP.
  (* MBgetstack *)
  - simpl in EQ0.
    unfold Mach.load_stack in H.
    exploit Mem.loadv_extends; eauto. intros [v' [A B]].
    rewrite (sp_val _ _ _ AG) in A.
    exploit loadind_correct; eauto with asmgen.
    intros (rs2 & EXECS & Hrs'1 & Hrs'2).
    pose proof EXECS as EXECS_IR14. eapply exec_straight_body in EXECS.
    destruct EXECS as (l & Hlbi & EXECB).
    exists rs2, m1, l.
    eexists. eexists. split. instantiate (1 := x). eauto.
    repeat (split; auto).
    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
    subst. simpl in Hheadereq.
    eapply match_codestate_intro; eauto.
    eapply agree_set_mreg; eauto with asmgen.
    intro Hep. simpl in Hep. discriminate.
    { intro HFPI. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [BIIR14 _].
      rewrite <- (RAIR14 HFPI). eapply (transl_instr_basic_preserves_IR14 _ _ _ _ _ _ _ _ TIB BIIR14). first [eexact EXECS_IR14 | eexact P_IR14 | eexact EXES_IR14]. }
    { intro HFPI. exact (proj2 (bbir14_cons _ _ _ Hbody (BBIR14 HFPI))). }
  (* MBsetstack *)
  - simpl in EQ0.
    unfold Mach.store_stack in H.
    assert (Val.lessdef (ms src) (rs1 (preg_of src))). { eapply preg_val; eauto. }
    exploit Mem.storev_extends; eauto. intros [m2' [A B]].
    exploit storeind_correct; eauto with asmgen.
    rewrite (sp_val _ _ _ AG) in A. eauto. intros [rs' [P Q]].
    pose proof P as P_IR14. eapply exec_straight_body in P.
    destruct P as (l & ll & EXECB).
    exists rs', m2', l.
    eexists. eexists. split. instantiate (1 := x). eauto.
    repeat (split; auto).
    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
    subst.
    eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
    eapply agree_undef_regs; eauto with asmgen.
    simpl; intros. rewrite Q; auto with asmgen. rewrite Hheader in DXP. auto.
    { intro HFPI. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [BIIR14 _].
      rewrite <- (RAIR14 HFPI). eapply (transl_instr_basic_preserves_IR14 _ _ _ _ _ _ _ _ TIB BIIR14). eexact P_IR14. }
    { intro HFPI. exact (proj2 (bbir14_cons _ _ _ Hbody (BBIR14 HFPI))). }
  (* MBgetparam *)
  - simpl in EQ0.
    assert (f0 = f) by congruence; subst f0.
    unfold Mach.load_stack in *.
    exploit Mem.loadv_extends. eauto. eexact H0. auto.
    intros [parent' [A B]]. rewrite (sp_val _ _ _ AG) in A.
    exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
    exploit Mem.loadv_extends. eauto. eexact H1. auto.
    intros [v' [C D]].
    monadInv EQ0. rewrite Hheader. rewrite Hheader in DXP.
    destruct ep0 eqn:EPeq.
    (* IR12 contains parent *)
    + exploit loadind_correct. eexact EQ1.
      instantiate (2 := rs1). rewrite DXP; eauto.
      intros [rs2 [P [Q R]]].
      pose proof P as P_IR14. eapply exec_straight_body in P.
      destruct P as (l & ll & EXECB).
      exists rs2, m1, l. eexists.
      eexists. split. instantiate (1 := x). eauto.
      repeat (split; auto).
      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
      assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
      subst.
      eapply match_codestate_intro; eauto.
      eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
      simpl; intros. rewrite R; auto with asmgen. unfold preg_of.
      apply preg_of_not_R12; auto.
    { intro HFPI. exfalso. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [H' _]. discriminate. }
    { intro HFPI. exfalso. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [H' _]. discriminate. }
    (* R12 does not contain parent *)
    + rewrite chunk_of_Tptr in A.
      exploit loadind_int_correct. eexact A. intros [rs2 [P [Q R]]].
      exploit loadind_correct. eexact EQ1. instantiate (2 := rs2). rewrite Q. eauto.
      intros [rs3 [S [T U]]].
      exploit exec_straight_trans.
      eapply P. eapply S.
      intros EXES.
      pose proof EXES as EXES_IR14. eapply exec_straight_body in EXES.
      destruct EXES as (l & ll & EXECB).
      exists rs3, m1, l.
      eexists. eexists. split. instantiate (1 := x). eauto.
      repeat (split; auto).
      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
      assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
      subst.
      eapply match_codestate_intro; eauto.
      eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
      instantiate (1 := rs2#IR12 <- (rs3#IR12)). intros.
      rewrite Pregmap.gso; auto with asmgen.
      congruence.
      intros. unfold Pregmap.set. destruct (PregEq.eq r' IR12). congruence. auto with asmgen.
      simpl; intros. rewrite U; auto with asmgen.
      apply preg_of_not_R12; auto.
    { intro HFPI. exfalso. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [H' _]. discriminate. }
    { intro HFPI. exfalso. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [H' _]. discriminate. }
  (* MBop *)
  - simpl in EQ0. rewrite Hheader in DXP.
    assert (eval_operation tge sp op (map ms args) m' = Some v).
    rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
    exploit eval_operation_lessdef.
    eapply preg_vals; eauto.
    2: eexact H0.
    all: eauto.
    intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
    exploit transl_op_correct; eauto.
    intros [rs2 [P [Q R]]].
    pose proof P as P_IR14. eapply exec_straight_body in P.
    destruct P as (l & ll & EXECB).
    exists rs2, m1, l.
    eexists. eexists. split. instantiate (1 := x). eauto.
    repeat (split; auto).
    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
    subst.
    eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
    apply agree_set_undef_mreg with rs1; auto.
    apply Val.lessdef_trans with v'; auto.
    simpl; intros. destruct op; try discriminate.
    destruct (andb_prop _ _ H1); clear H1.
    rewrite R; auto. apply preg_of_not_R12; auto.
Local Transparent destroyed_by_op.
    simpl; auto; try discriminate.
    { intro HFPI. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [BIIR14 _]. rewrite <- (RAIR14 HFPI). eapply (transl_instr_basic_preserves_IR14 _ _ _ _ _ _ _ _ TIB BIIR14). first [eexact EXECS_IR14 | eexact P_IR14 | eexact EXES_IR14]. }
    { intro HFPI. exact (proj2 (bbir14_cons _ _ _ Hbody (BBIR14 HFPI))). }
  (* MBload *)
  - simpl in EQ0. rewrite Hheader in DXP.
    assert (Op.eval_addressing tge sp addr (map ms args) = Some a).
      rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
    exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
    intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
    exploit Mem.loadv_extends; eauto. intros [v' [C D]]. destruct trap; try discriminate.
    exploit transl_load_correct; eauto.
    intros [rs2 [P [Q R]]].
    pose proof P as P_IR14. eapply exec_straight_body in P.
    destruct P as (l & ll & EXECB).
    exists rs2, m1, l.
    eexists. eexists. split. instantiate (1 := x). eauto.
    repeat (split; auto).
    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
    subst.
    eapply match_codestate_intro; eauto.
    eapply agree_set_mreg; eauto with asmgen.
    intro Hep. simpl in Hep. discriminate.
    { intro HFPI. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [BIIR14 _]. rewrite <- (RAIR14 HFPI). eapply (transl_instr_basic_preserves_IR14 _ _ _ _ _ _ _ _ TIB BIIR14). first [eexact EXECS_IR14 | eexact P_IR14 | eexact EXES_IR14]. }
    { intro HFPI. exact (proj2 (bbir14_cons _ _ _ Hbody (BBIR14 HFPI))). }
    (* MBload notrap1 *)
  - simpl in EQ0. unfold transl_load in EQ0. discriminate.
    (* MBload notrap2 *)
  - simpl in EQ0. unfold transl_load in EQ0. discriminate.
    (* MBstore *)
  - simpl in EQ0. rewrite Hheader in DXP.
    assert (Op.eval_addressing tge sp addr (map ms args) = Some a).
      rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
    exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
    intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
    assert (Val.lessdef (ms src) (rs1 (preg_of src))). eapply preg_val; eauto.
    exploit Mem.storev_extends; eauto. intros [m2' [C D]].
    exploit transl_store_correct; eauto. intros [rs2 [P Q]].
    pose proof P as P_IR14. eapply exec_straight_body in P.
    destruct P as (l & ll & EXECB).
    exists rs2, m2', l.
    eexists. eexists. split. instantiate (1 := x). eauto.
    repeat (split; auto).
    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
    subst.
    eapply match_codestate_intro; eauto.
    eapply agree_undef_regs; eauto with asmgen.
    intro Hep. simpl in Hep. discriminate.
    { intro HFPI. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [BIIR14 _]. rewrite <- (RAIR14 HFPI). eapply (transl_instr_basic_preserves_IR14 _ _ _ _ _ _ _ _ TIB BIIR14). first [eexact EXECS_IR14 | eexact P_IR14 | eexact EXES_IR14]. }
    { intro HFPI. exact (proj2 (bbir14_cons _ _ _ Hbody (BBIR14 HFPI))). }
    (* MBmemcpy *)
  - simpl in EQ0. rewrite Hheader in DXP.
    assert (evalopt_builtin_args tge (fun r : Mach.RegEq.t => ms r) sp m args = Some vargs).
    { rewrite <- H. symmetry. apply evalopt_builtin_args_preserved. apply symbols_preserved. }
    apply evalopt_builtin_args_spec in H2.
    eapply builtin_args_match in H2 as [vargs' [A B]]; eauto.
    apply evalopt_builtin_args_spec in A.
    exploit memcpy_extends; eauto. intros [m2' [C D]].
    rewrite (sp_val _ _ _ AG) in A.
    exploit transl_memcpy_correct.
    eauto. eauto. eauto. exact A. unfold Memcpy.atomic. eexact C.
    intros [rs2 [P Q]].
    pose proof P as P_IR14. eapply exec_straight_body in P. destruct P as [l [ll EXECB]].
    exists rs2, m2', l.
    eexists. eexists. split. instantiate (1 := x). eauto.
    repeat (split; auto).
    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
    subst.
    eapply match_codestate_intro; eauto.
    eapply agree_undef_regs; eauto. intros. symmetry. auto.
    intro Hep. simpl in Hep. discriminate.
    { intro HFPI. destruct (bbir14_cons _ _ _ Hbody (BBIR14 HFPI)) as [BIIR14 _]. rewrite <- (RAIR14 HFPI). eapply (transl_instr_basic_preserves_IR14 _ _ _ _ _ _ _ _ TIB BIIR14). first [eexact EXECS_IR14 | eexact P_IR14 | eexact EXES_IR14]. }
    { intro HFPI. exact (proj2 (bbir14_cons _ _ _ Hbody (BBIR14 HFPI))). }
  (* MBsavecallee *)
  - simpl in EQ0.
    unfold transl_savecallee in EQ0.
    destruct l as [|r l_rest]; [discriminate|].
    remember (mreg_type r) as ty_r eqn:Hty.
    destruct ty_r; try discriminate.
    + (* Tany32 *)
      monadInv EQ0.
      pose proof (sp_val _ _ _ AG) as HSP_eq.
      destruct (addimm_correct tge IR14 IR13 (Int.repr (align ofs 4))
                  (Pcstm IR14 (map (fun ir => (ir, AST.Many32)) x0) ::bi x)
                  rs1 m1) as [rs2 [EX_addimm [EX_IR14 EX_OTH]]].
      assert (AG2 : agree ms' sp rs2).
      { eapply agree_exten; eauto. intros r0 Hr0_data.
        apply EX_OTH.
        - intro Heq. subst r0. discriminate Hr0_data.
        - apply data_if_preg. assumption. }
      assert (HIR14_form : rs2 (DR (IR IR14)) = Val.add sp (Vint (Int.repr (align ofs 4)))).
      { rewrite EX_IR14. rewrite <- HSP_eq. reflexivity. }
      assert (HMach : Mach.store_callee_saves m sp (align ofs 4) (r :: l_rest) ms' = Some m').
      { rewrite <- store_callee_saves_align_first_int by (symmetry; assumption). exact H0. }
      assert (Hall32 : forall r0, In r0 (r :: l_rest) -> mreg_type r0 = Tany32).
      { eapply mmap_ireg_of_type; eauto. }
      assert (Hdiv : (4 | align ofs 4)) by (apply Coqlib.align_divides; lia).
      destruct (store_callee_saves_int_to_multi (r :: l_rest) x0 m m1 m'
                  sp (align ofs 4) ms' rs2
                  Hall32 EQ1 Hdiv AG2 HIR14_form MEXT HMach)
        as [m_A' [EX_storem EX_extends]].
      pose proof EX_addimm as EX_addimm_full.
      eapply exec_straight_body in EX_addimm.
      destruct EX_addimm as (l_addimm & ll_addimm & EXEB_addimm).
      exists rs2, m_A',
        (l_addimm ++ PStore (Pcstm IR14 (map (fun ir : ireg => (ir, AST.Many32)) x0)) :: nil).
      eexists. eexists. split. instantiate (1 := x). reflexivity.
      split.
      { rewrite ll_addimm. rewrite <- app_assoc. simpl. reflexivity. }
      split.
      { eapply exec_body_trans; [exact EXEB_addimm|].
        simpl. unfold exec_store. cbn.
        rewrite (stm_iregs_wf_callee_saves_int _ _ H EQ1).
        rewrite EX_storem. reflexivity. }
      eapply match_codestate_intro; eauto.
      * simpl. intro Hep. discriminate.
      * intro HFPI. exfalso.
        pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
        rewrite Hbody in HBB. simpl in HBB. discriminate.
      * intro HFPI. exfalso.
        pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
        rewrite Hbody in HBB. simpl in HBB. discriminate.
    + (* Tany64 *)
      apply bind_inversion in EQ0. destruct EQ0 as (x0 & EQ1 & EQ2).
      destruct (fregs_contiguous x0) eqn:HCONT.
      2: { (* non-contiguous fp save fallback via store_fp_per_reg *)
        inv EQ2.
        pose proof (sp_val _ _ _ AG) as HSP_eq.
        destruct (addimm_correct tge IR14 IR13 (Int.repr (align ofs 8))
                    (store_fp_per_reg x0 0 x)
                    rs1 m1) as [rs2 [EX_addimm [EX_IR14 EX_OTH]]].
        assert (AG2 : agree ms' sp rs2).
        { eapply agree_exten; eauto. intros r0 Hr0_data.
          apply EX_OTH.
          - intro Heq. subst r0. discriminate Hr0_data.
          - apply data_if_preg. assumption. }
        assert (HIR14_form : rs2 (DR (IR IR14)) = Val.add sp (Vint (Int.repr (align ofs 8)))).
        { rewrite EX_IR14. rewrite <- HSP_eq. reflexivity. }
        assert (HMach : Mach.store_callee_saves m sp (align ofs 8) (r :: l_rest) ms' = Some m').
        { rewrite <- store_callee_saves_align_first_fp by (symmetry; assumption). exact H0. }
        assert (Hall64 : forall r0, In r0 (r :: l_rest) -> mreg_type r0 = Tany64).
        { eapply mmap_freg_of_type; eauto. }
        assert (Hdiv : (8 | align ofs 8)) by (apply Coqlib.align_divides; lia).
        destruct (store_callee_saves_fp_to_per_reg (r :: l_rest) x0 m m1 m'
                    sp (align ofs 8) ms' rs2
                    Hall64 EQ1 Hdiv AG2 HIR14_form MEXT HMach)
          as [m_A' [EX_storem EX_extends]].
        pose proof EX_addimm as EX_addimm_full.
        eapply exec_straight_body in EX_addimm.
        destruct EX_addimm as (l_addimm & ll_addimm & EXEB_addimm).
        exists rs2, m_A', (l_addimm ++ store_fp_per_reg x0 0 nil).
        eexists. eexists. split. instantiate (1 := x). reflexivity.
        split.
        { rewrite ll_addimm. rewrite <- app_assoc. simpl.
          (* store_fp_per_reg x0 0 x = store_fp_per_reg x0 0 nil ++ x *)
          assert (Happ: forall fl k delta, store_fp_per_reg fl delta k = store_fp_per_reg fl delta nil ++ k).
          { induction fl as [|fr fl' IH]; intros; simpl; auto. rewrite IH; auto. }
          rewrite (Happ x0 x 0). rewrite app_assoc. reflexivity. }
        split.
        { eapply exec_body_trans; [exact EXEB_addimm | exact EX_storem]. }
        eapply match_codestate_intro; eauto.
        * simpl. intro Hep. discriminate.
        * intro HFPI. exfalso.
          pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
          rewrite Hbody in HBB. simpl in HBB. discriminate.
        * intro HFPI. exfalso.
          pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
          rewrite Hbody in HBB. simpl in HBB. discriminate. }
      inv EQ2.
      pose proof (sp_val _ _ _ AG) as HSP_eq.
      destruct (addimm_correct tge IR14 IR13 (Int.repr (align ofs 8))
                  (Pcvstm IR14 (map (fun fr => (fr, AST.Many64)) x0) ::bi x)
                  rs1 m1) as [rs2 [EX_addimm [EX_IR14 EX_OTH]]].
      assert (AG2 : agree ms' sp rs2).
      { eapply agree_exten; eauto. intros r0 Hr0_data.
        apply EX_OTH.
        - intro Heq. subst r0. discriminate Hr0_data.
        - apply data_if_preg. assumption. }
      assert (HIR14_form : rs2 (DR (IR IR14)) = Val.add sp (Vint (Int.repr (align ofs 8)))).
      { rewrite EX_IR14. rewrite <- HSP_eq. reflexivity. }
      assert (HMach : Mach.store_callee_saves m sp (align ofs 8) (r :: l_rest) ms' = Some m').
      { rewrite <- store_callee_saves_align_first_fp by (symmetry; assumption). exact H0. }
      assert (Hall64 : forall r0, In r0 (r :: l_rest) -> mreg_type r0 = Tany64).
      { eapply mmap_freg_of_type; eauto. }
      assert (Hdiv : (8 | align ofs 8)) by (apply Coqlib.align_divides; lia).
      destruct (store_callee_saves_fp_to_multi (r :: l_rest) x0 m m1 m'
                  sp (align ofs 8) ms' rs2
                  Hall64 EQ1 Hdiv AG2 HIR14_form MEXT HMach)
        as [m_A' [EX_storem EX_extends]].
      pose proof EX_addimm as EX_addimm_full.
      eapply exec_straight_body in EX_addimm.
      destruct EX_addimm as (l_addimm & ll_addimm & EXEB_addimm).
      exists rs2, m_A',
        (l_addimm ++ PStore (Pcvstm IR14 (map (fun fr : freg => (fr, AST.Many64)) x0)) :: nil).
      eexists. eexists. split. instantiate (1 := x). reflexivity.
      split.
      { rewrite ll_addimm. rewrite <- app_assoc. simpl. reflexivity. }
      split.
      { eapply exec_body_trans; [exact EXEB_addimm|].
        simpl. unfold exec_store. cbn.
        rewrite (vstm_fregs_wf_contiguous_callee_saves _ HCONT).
        rewrite EX_storem. reflexivity. }
      eapply match_codestate_intro; eauto.
      * simpl. intro Hep. discriminate.
      * intro HFPI. exfalso.
        pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
        rewrite Hbody in HBB. simpl in HBB. discriminate.
      * intro HFPI. exfalso.
        pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
        rewrite Hbody in HBB. simpl in HBB. discriminate.
  (* MBrestorecallee *)
  - simpl in EQ0.
    unfold transl_restorecallee in EQ0.
    destruct l as [|r l_rest]; [discriminate|].
    remember (mreg_type r) as ty_r eqn:Hty.
    destruct ty_r; try discriminate.
    + (* Tany32 *)
      monadInv EQ0.
      pose proof (sp_val _ _ _ AG) as HSP_eq.
      destruct (addimm_correct tge IR14 IR13 (Int.repr (align ofs 4))
                  (Pcldm IR14 (map (fun ir => (ir, AST.Many32)) x0) ::bi x)
                  rs1 m1) as [rs2 [EX_addimm [EX_IR14 EX_OTH]]].
      assert (AG2 : agree ms sp rs2).
      { eapply agree_exten; eauto. intros r0 Hr0_data.
        apply EX_OTH.
        - intro Heq. subst r0. discriminate Hr0_data.
        - apply data_if_preg. assumption. }
      assert (HIR14_form : rs2 (DR (IR IR14)) = Val.add sp (Vint (Int.repr (align ofs 4)))).
      { rewrite EX_IR14. rewrite <- HSP_eq. reflexivity. }
      assert (HMach : Mach.load_callee_saves m' sp (align ofs 4) (r :: l_rest) ms = Some ms').
      { rewrite <- load_callee_saves_align_first_int by (symmetry; assumption). exact H0. }
      assert (Hall32 : forall r0, In r0 (r :: l_rest) -> mreg_type r0 = Tany32).
      { eapply mmap_ireg_of_type; eauto. }
      assert (Hdiv : (4 | align ofs 4)) by (apply Coqlib.align_divides; lia).
      destruct (load_callee_saves_int_to_multi (r :: l_rest) x0 m' m1
                  sp (align ofs 4) ms ms' rs2
                  Hall32 EQ1 Hdiv AG2 HIR14_form MEXT HMach)
        as [rs3 [EX_loadm AG3]].
      pose proof EX_addimm as EX_addimm_full.
      eapply exec_straight_body in EX_addimm.
      destruct EX_addimm as (l_addimm & ll_addimm & EXEB_addimm).
      exists rs3, m1,
        (l_addimm ++ PLoad (Pcldm IR14 (map (fun ir : ireg => (ir, AST.Many32)) x0)) :: nil).
      eexists. eexists. split. instantiate (1 := x). reflexivity.
      split.
      { rewrite ll_addimm. rewrite <- app_assoc. simpl. reflexivity. }
      split.
      { eapply exec_body_trans; [exact EXEB_addimm|].
        simpl. unfold exec_load. cbn.
        rewrite (ldm_iregs_wf_callee_saves_int _ _ H EQ1).
        rewrite EX_loadm. reflexivity. }
      eapply match_codestate_intro; eauto.
      * simpl. intro Hep. discriminate.
      * intro HFPI. exfalso.
        pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
        rewrite Hbody in HBB. simpl in HBB. discriminate.
      * intro HFPI. exfalso.
        pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
        rewrite Hbody in HBB. simpl in HBB. discriminate.
    + (* Tany64 *)
      apply bind_inversion in EQ0. destruct EQ0 as (x0 & EQ1 & EQ2).
      destruct (fregs_contiguous x0) eqn:HCONT.
      2: { (* non-contiguous fp restore fallback via load_fp_per_reg *)
        inv EQ2.
        pose proof (sp_val _ _ _ AG) as HSP_eq.
        destruct (addimm_correct tge IR14 IR13 (Int.repr (align ofs 8))
                    (load_fp_per_reg x0 0 x)
                    rs1 m1) as [rs2 [EX_addimm [EX_IR14 EX_OTH]]].
        assert (AG2 : agree ms sp rs2).
        { eapply agree_exten; eauto. intros r0 Hr0_data.
          apply EX_OTH.
          - intro Heq. subst r0. discriminate Hr0_data.
          - apply data_if_preg. assumption. }
        assert (HIR14_form : rs2 (DR (IR IR14)) = Val.add sp (Vint (Int.repr (align ofs 8)))).
        { rewrite EX_IR14. rewrite <- HSP_eq. reflexivity. }
        assert (HMach : Mach.load_callee_saves m' sp (align ofs 8) (r :: l_rest) ms = Some ms').
        { rewrite <- load_callee_saves_align_first_fp by (symmetry; assumption). exact H0. }
        assert (Hall64 : forall r0, In r0 (r :: l_rest) -> mreg_type r0 = Tany64).
        { eapply mmap_freg_of_type; eauto. }
        assert (Hdiv : (8 | align ofs 8)) by (apply Coqlib.align_divides; lia).
        destruct (load_callee_saves_fp_to_per_reg (r :: l_rest) x0 m' m1
                    sp (align ofs 8) ms ms' rs2
                    Hall64 EQ1 Hdiv AG2 HIR14_form MEXT HMach)
          as [rs3 [EX_loadm [AG3 _]]].
        pose proof EX_addimm as EX_addimm_full.
        eapply exec_straight_body in EX_addimm.
        destruct EX_addimm as (l_addimm & ll_addimm & EXEB_addimm).
        exists rs3, m1, (l_addimm ++ load_fp_per_reg x0 0 nil).
        eexists. eexists. split. instantiate (1 := x). reflexivity.
        split.
        { rewrite ll_addimm. rewrite <- app_assoc. simpl.
          assert (Happ: forall fl k delta, load_fp_per_reg fl delta k = load_fp_per_reg fl delta nil ++ k).
          { induction fl as [|fr fl' IH]; intros; simpl; auto. rewrite IH; auto. }
          rewrite (Happ x0 x 0). rewrite app_assoc. reflexivity. }
        split.
        { eapply exec_body_trans; [exact EXEB_addimm | exact EX_loadm]. }
        eapply match_codestate_intro; eauto.
        * simpl. intro Hep. discriminate.
        * intro HFPI. exfalso.
          pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
          rewrite Hbody in HBB. simpl in HBB. discriminate.
        * intro HFPI. exfalso.
          pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
          rewrite Hbody in HBB. simpl in HBB. discriminate. }
      inv EQ2.
      pose proof (sp_val _ _ _ AG) as HSP_eq.
      destruct (addimm_correct tge IR14 IR13 (Int.repr (align ofs 8))
                  (Pcvldm IR14 (map (fun fr => (fr, AST.Many64)) x0) ::bi x)
                  rs1 m1) as [rs2 [EX_addimm [EX_IR14 EX_OTH]]].
      assert (AG2 : agree ms sp rs2).
      { eapply agree_exten; eauto. intros r0 Hr0_data.
        apply EX_OTH.
        - intro Heq. subst r0. discriminate Hr0_data.
        - apply data_if_preg. assumption. }
      assert (HIR14_form : rs2 (DR (IR IR14)) = Val.add sp (Vint (Int.repr (align ofs 8)))).
      { rewrite EX_IR14. rewrite <- HSP_eq. reflexivity. }
      assert (HMach : Mach.load_callee_saves m' sp (align ofs 8) (r :: l_rest) ms = Some ms').
      { rewrite <- load_callee_saves_align_first_fp by (symmetry; assumption). exact H0. }
      assert (Hall64 : forall r0, In r0 (r :: l_rest) -> mreg_type r0 = Tany64).
      { eapply mmap_freg_of_type; eauto. }
      assert (Hdiv : (8 | align ofs 8)) by (apply Coqlib.align_divides; lia).
      destruct (load_callee_saves_fp_to_multi (r :: l_rest) x0 m' m1
                  sp (align ofs 8) ms ms' rs2
                  Hall64 EQ1 Hdiv AG2 HIR14_form MEXT HMach)
        as [rs3 [EX_loadm AG3]].
      pose proof EX_addimm as EX_addimm_full.
      eapply exec_straight_body in EX_addimm.
      destruct EX_addimm as (l_addimm & ll_addimm & EXEB_addimm).
      exists rs3, m1,
        (l_addimm ++ PLoad (Pcvldm IR14 (map (fun fr : freg => (fr, AST.Many64)) x0)) :: nil).
      eexists. eexists. split. instantiate (1 := x). reflexivity.
      split.
      { rewrite ll_addimm. rewrite <- app_assoc. simpl. reflexivity. }
      split.
      { eapply exec_body_trans; [exact EXEB_addimm|].
        simpl. unfold exec_load. cbn.
        rewrite (vldm_fregs_wf_contiguous_callee_saves _ HCONT).
        rewrite EX_loadm. reflexivity. }
      eapply match_codestate_intro; eauto.
      * simpl. intro Hep. discriminate.
      * intro HFPI. exfalso.
        pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
        rewrite Hbody in HBB. simpl in HBB. discriminate.
      * intro HFPI. exfalso.
        pose proof (BBIR14 HFPI) as HBB. unfold bblock_preserves_IR14 in HBB.
        rewrite Hbody in HBB. simpl in HBB. discriminate.
Qed.

Lemma exec_body_control:
  forall b t rs1 m1 rs2 m2 rs3 m3 fn,
  exec_body tge (body b) rs1 m1 = Next rs2 m2 ->
  exec_exit tge fn (Ptrofs.repr (size b)) rs2 m2 (exit b) t rs3 m3 ->
  exec_bblock tge fn b rs1 m1 t rs3 m3.
Proof.
  intros until fn. intros EXEB EXECTL.
  econstructor; eauto.
Qed.

Inductive exec_header: codestate -> codestate -> Prop :=
  | exec_header_cons: forall cs1,
      exec_header cs1 {| pstate := pstate cs1; pheader := nil; pbody1 := pbody1 cs1; pbody2 := pbody2 cs1;
                          pctl := pctl cs1; ep := (if pheader cs1 then ep cs1 else false); rem := rem cs1;
                          cur := cur cs1 |}.

Theorem step_simu_header:
  forall bb s fb sp c ms m rs1 m1 cs1,
  pstate cs1 = (State rs1 m1) ->
  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
  (exists cs1',
       exec_header cs1 cs1'
    /\ match_codestate fb (MB.State s fb sp (mb_remove_header bb::c) ms m) cs1').
Proof.
  intros until cs1. intros Hpstate MCS.
  eexists. split; eauto.
  econstructor; eauto.
  inv MCS. simpl in *. inv Hpstate.
  econstructor; eauto.
Qed.

Theorem step_simu_body:
  forall bb s fb sp c ms m rs1 m1 ms' cs1 m',
  MB.header bb = nil ->
  body_step ge s fb sp (MB.body bb) ms m ms' m' ->
  pstate cs1 = (State rs1 m1) ->
  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
  (exists rs2 m2 cs2 ep,
       cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := nil; pbody2 := pbody2 cs1;
                pctl := pctl cs1; ep := ep; rem := rem cs1; cur := cur cs1 |}
    /\ exec_body tge (pbody1 cs1) rs1 m1 = Next rs2 m2
    /\ match_codestate fb (MB.State s fb sp ({| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |}::c) ms' m') cs2).
Proof.
  intros bb. destruct bb as [hd bdy ex]; simpl; auto. induction bdy as [|bi bdy].
  - intros until m'. intros Hheader BSTEP Hpstate MCS.
    inv BSTEP.
    exists rs1, m1, cs1, (ep cs1).
    inv MCS. inv Hpstate. simpl in *. monadInv TBC. repeat (split; simpl; auto).
    econstructor; eauto.
  - intros until m'. intros Hheader BSTEP Hpstate MCS. inv BSTEP.
    rename ms' into ms''. rename m' into m''. rename rs' into ms'. rename m'0 into m'.
    exploit (step_simu_basic); eauto. simpl. eauto. simpl; auto. simpl; auto.
    intros (rs2 & m2 & l & cs2 & tbdy' & Hcs2 & Happ & EXEB & MCS').
    simpl in *.
    exploit IHbdy. auto. eapply H6. 2: eapply MCS'. all: eauto. subst; eauto. simpl; auto.
    intros (rs3 & m3 & cs3 & ep & Hcs3 & EXEB' & MCS'').
    exists rs3, m3, cs3, ep.
    repeat (split; simpl; auto). subst. simpl in *. auto.
    rewrite Happ. eapply exec_body_trans; eauto. rewrite Hcs2 in EXEB'; simpl in EXEB'. auto.
Qed.

Lemma step_simulation_bblock':
  forall t sf f sp bb bb' bb'' rs m rs' m' s'' c S1,
  bb' = mb_remove_header bb ->
  body_step ge sf f sp (Machblock.body bb') rs m rs' m' ->
  bb'' = mb_remove_body bb' ->
  exit_step return_address_offset ge (Machblock.exit bb'') (Machblock.State sf f sp (bb'' :: c) rs' m') t s'' ->
  match_states (Machblock.State sf f sp (bb :: c) rs m) S1 ->
  exists S2 : state, plus step tge S1 t S2 /\ match_states s'' S2.
Proof.
  intros until S1. intros Hbb' BSTEP Hbb'' ESTEP MS.
  destruct (mbsize bb) eqn:SIZE.
  - apply mbsize_eqz in SIZE. destruct SIZE as (Hbody & Hexit).
    destruct bb as [hd bdy ex]; simpl in *; subst.
    inv MS. inv AT. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. rename tc' into tc.
    exploit transl_blocks_distrib; eauto. intros (nl' & TLB & TLBS).
    unfold transl_block in TLB. simpl in TLB. inv TLB. inv ESTEP. inv BSTEP.
    eexists. split.
    + eapply plus_one.
      exploit functions_translated; eauto. intros (tf0 & FIND' & TRANSF'). monadInv TRANSF'.
      assert (x = tf) by congruence. subst x.
      eapply exec_step_internal; eauto. eapply find_bblock_tail; eauto.
      unfold exec_bblock. simpl.
      eexists; eexists; split; eauto.
      econstructor.
    + econstructor.
      1,2,3: eauto.
      *
      unfold incrPC. rewrite Pregmap.gss.
      unfold Val.offset_ptr. rewrite <- H.
    assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
      { eapply transf_function_no_overflow; eauto. }
    econstructor; eauto.
      generalize (code_tail_next_int _ _ _ _ NOOV H3). intro CT1. eauto.
      *
    eapply agree_exten; eauto. intros. unfold incrPC. rewrite Pregmap.gso; auto.
    intro Hdata. unfold data_preg in Hdata. destruct r; try congruence; try discriminate.
   *
    intros. discriminate.
   *
    intro HFPI. unfold incrPC. rewrite Pregmap.gso by discriminate. exact (RAIR14 HFPI).
   *
    eapply is_tail_trans. apply is_tail_cons. apply is_tail_refl. eauto.
  - subst. exploit mbsize_neqz. { instantiate (1 := bb). rewrite SIZE. discriminate. }
    intros Hnotempty.
    (* initial setting *)
    exploit match_state_codestate.
      eapply Hnotempty.
      all: eauto.
    intros (cs1 & fb & f0 & tbb & tc & ep & MCS & MAS & FIND & TLBS & Hbody & Hexit & Hcur & Hrem & Hpstate).
    (* step_simu_header part *)
    assert (exists rs1 m1, pstate cs1 = State rs1 m1). { inv MAS. simpl. eauto. }
    destruct H as (rs1 & m1 & Hpstate2). subst.
    assert (f = fb). { inv MCS. auto. } subst fb.
    exploit step_simu_header.
      2: eapply MCS.
      all: eauto.
    intros (cs1' & EXEH & MCS2).
    (* step_simu_body part *)
    assert (Hpstate': pstate cs1' = pstate cs1). { inv EXEH; auto. }
    exploit step_simu_body.
      2: eapply BSTEP.
      3: eapply MCS2.
      all: eauto. rewrite Hpstate'. eauto.
    intros (rs2 & m2 & cs2 & ep' & Hcs2 & EXEB & MCS').

    (* step_simu_control part *)
    assert (exists tf, Genv.find_funct_ptr tge f = Some (Internal tf)).
    { exploit functions_translated; eauto. intros (tf & FIND' & TRANSF'). monadInv TRANSF'. eauto. }
    destruct H as (tf & FIND').
    inv EXEH. simpl in *.
    subst. exploit step_simu_control.
      8: eapply MCS'. all: simpl.
      9: eapply ESTEP.
      all: simpl; eauto.
      { inv MAS; simpl in *. inv Hpstate2. eapply match_asmstate_some; eauto.
        erewrite exec_body_pc; eauto. }
    intros (rs3 & m3 & rs4 & m4 & EXEB' & EXECTL' & MS').

    (* bringing the pieces together *)
    exploit exec_body_trans.
      eapply EXEB.
      eauto.
    intros EXEB2.
    exploit exec_body_control; eauto.
    rewrite <- Hbody in EXEB2. eauto.
    rewrite Hexit. eauto.
    intros EXECB. (* inv EXECB. *)
    exists (State rs4 m4).
    split; auto. eapply plus_one. rewrite Hpstate2.
    assert (exists ofs, rs1 PC = Vptr f ofs).
    { rewrite Hpstate2 in MAS. inv MAS. simpl in *. eauto. }
    destruct H as (ofs & Hrs1pc).
    eapply exec_step_internal; eauto.

    (* proving the initial find_bblock *)
    rewrite Hpstate2 in MAS. inv MAS. simpl in *.
    assert (f1 = f0) by congruence. subst f0.
    rewrite PCeq in Hrs1pc. inv Hrs1pc.
    exploit functions_translated; eauto. intros (tf1 & FIND'' & TRANS''). rewrite FIND' in FIND''.
    inv FIND''. monadInv TRANS''. rewrite TRANSF0 in EQ. inv EQ.
    eapply find_bblock_tail; eauto.
Qed.

Theorem step_simulation_bblock:
  forall t sf f sp bb ms m ms' m' S2 c,
  body_step ge sf f sp (Machblock.body bb) ms m ms' m' ->
  exit_step return_address_offset ge (Machblock.exit bb) (Machblock.State sf f sp (bb :: c) ms' m') t S2 ->
  forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' ->
  exists S2' : state, plus step tge S1' t S2' /\ match_states S2 S2'.
Proof.
  intros until c. intros BSTEP ESTEP S1' MS.
  eapply step_simulation_bblock'; eauto.
  all: destruct bb as [hd bdy ex]; simpl in *; eauto.
  inv ESTEP.
  - econstructor. inv H; try (econstructor; eauto; fail).
  - econstructor.
Qed.

We need to show that, in the simulation diagram, we cannot take infinitely many Mach transitions that correspond to zero transitions on the ARM side. Actually, all Mach transitions correspond to at least one ARM transition, except the transition from Machsem.Returnstate to Machsem.State. So, the following integer measure will suffice to rule out the unwanted behaviour.

Definition measure (s: MB.state) : nat :=
  match s with
  | MB.State _ _ _ _ _ _ => 0%nat
  | MB.Callstate _ _ _ _ => 0%nat
  | MB.Returnstate _ _ _ => 1%nat
  end.

Remark preg_of_not_R12: forall r, negb (mreg_eq r R12) = true -> DR IR12 <> preg_of r.
Proof.
  intros. change (DR IR12) with (preg_of R12). red; intros.
  exploit preg_of_injective; eauto. intros; subst r.
  unfold proj_sumbool in H; rewrite dec_eq_true in H; discriminate.
Qed.

Theorem step_simulation:
  forall S1 t S2, MB.step return_address_offset ge S1 t S2 ->
  forall S1' (MS: match_states S1 S1'),
  (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
Proof.
  induction 1; intros.
  - (* bblock *)
    left. destruct (Machblock.exit bb) eqn:MBE; try destruct c0.
    all: try(inversion H0; subst; inv H2; eapply step_simulation_bblock;
         try (rewrite MBE; try discriminate); eauto).
    + inversion H0. subst. eapply step_simulation_bblock;
      try (rewrite MBE; try discriminate); eauto.
  - (* internal function *)
    inv MS.
    exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
    generalize EQ; intros EQ'. monadInv EQ'.
    destruct (zlt Ptrofs.max_unsigned (size_blocks x0.(fn_blocks)));
    inversion EQ1. clear EQ1. subst x0.
    unfold Mach.store_stack in *.
    exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
    intros [m1' [C D]].
    exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
    intros [m2' [F G]].
    exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
    intros [m3' [P Q]].
    monadInv EQ0.
    set (ra_ofs := fn_retaddr_ofs f) in *.
    set (ra_ofs' := Ptrofs.to_int ra_ofs) in *.
    set (tfbody := make_prologue f x0) in *.
    set (tf := {| fn_sig := MB.fn_sig f; fn_blocks := tfbody |}).
    (* Execution of function prologue *)
    set (rs2 := (rs0#IR12 <- (parent_sp s) #IR13 <- (Vptr stk Ptrofs.zero))).
    edestruct (save_lr_correct tge ra_ofs (Pcfi_rel_offset ra_ofs' :: nil) rs2) as (rs3 & X & Y & Z).
    destruct (f.(fn_retaddr_pac)).
    unfold PAC.pointer_auth_encode in P. simpl in P. simpl. unfold rs2.
    repeat rewrite Pregmap.gso by discriminate.
    rewrite ATLR. exact P.
    assert (EXEC_PROLOGUE: exists rs3',
              exec_straight_blocks tge tf (tf.(fn_blocks)) rs0 m' x0 rs3' m3'
              /\ forall r, r <> PC -> rs3' r = rs3 r).
    { change (fn_blocks tf) with tfbody; unfold tfbody.
      unfold make_prologue.
      destruct (fn_retaddr_pac).
      eexists. split.
      - constructor.
        unfold exec_bblock. exists rs3. exists m3'.
        split.
        simpl. rewrite C. rewrite (agree_sp _ _ _ AG).
        simpl in F. simpl. rewrite F.
        apply exec_straight_body in X.
        destruct X as (l' & SX & EX).
        replace (fn_retaddr_ofs f) with (ra_ofs); auto.
        replace (Ptrofs.to_int ra_ofs) with (ra_ofs'); auto.
        rewrite SX. rewrite exec_body_concat with (rs' := rs3) (m' := m3').
        simpl. auto. auto.
        constructor.
        unfold incrPC. rewrite Pregmap.gss.
        f_equal. assert (rs0 PC = rs2 PC) by auto.
        rewrite H3. eapply exec_straight_pc. apply X.
      - intros. unfold incrPC.
        rewrite Pregmap.gso by assumption.
        auto.
    }
    destruct EXEC_PROLOGUE as (rs3' & EXEC_PROLOGUE & Heqrs3').
    (* After the function prologue is the code for the function body *)
    exploit exec_straight_steps_2; eauto using functions_transl.
    simpl fn_blocks. simpl fn_blocks in g. lia. constructor.
    intros (ofs' & K & L).
    left; exists (State rs3' m3'); split.
    eapply exec_straight_steps_1; eauto.
    simpl fn_blocks. simpl fn_blocks in g. lia. constructor.
    econstructor; eauto.
    rewrite K; econstructor; eauto.
    apply agree_undef_regs2 with rs2. auto.
    apply agree_change_sp with (parent_sp s).
    apply agree_undef_regs with rs0. auto.
    intros; Simpl.
    congruence.
    intros.
    rewrite Heqrs3'. rewrite Y; auto with asmgen.
    auto with asmgen.
    intros. rewrite Heqrs3' by discriminate.
    rewrite Z.
    unfold rs2. auto. auto.
    (* RAIR14 for function entry *)
    intro HFPI. rewrite Heqrs3' by discriminate.
    rewrite Y by (auto; discriminate). unfold rs2. repeat rewrite Pregmap.gso by discriminate. exact ATLR.
    apply is_tail_refl.
  - (* external function *)
    inv MS.
    exploit functions_translated; eauto.
    intros [tf [A B]]. simpl in B. inv B.
    exploit extcall_arguments_match; eauto.
    intros [args' [C D]].
    exploit external_call_mem_extends; eauto.
    intros [res' [m2' [P [Q [R S]]]]].
    left; econstructor; split.
    apply plus_one. eapply exec_step_external; eauto.
    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
    econstructor; eauto.
    unfold loc_external_result.
    apply agree_set_other; auto.
    apply agree_set_pair; auto.
    apply agree_undef_caller_save_regs; auto.
    - (* return *)
      inv MS.
      inv STACKS. simpl in *.
      right. split. lia. split. auto.
      rewrite <- ATPC in *.
      apply match_states_intro with (f:=f0) (tf:=tf) (tc:=tc) (ep:=false); eauto; try congruence;
      try (intro; discriminate).
Qed.

Lemma transf_initial_states:
  forall st1, MB.initial_state prog st1 ->
  exists st2, AB.initial_state tprog st2 /\ match_states st1 st2.
Proof.
  intros. inversion H. unfold ge0 in *.
  econstructor; split.
  econstructor.
  eapply (Genv.init_mem_transf_partial TRANSF); eauto.
  replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero)
     with (Vptr fb Ptrofs.zero).
  econstructor; eauto.
  constructor.
  apply Mem.extends_refl.
  split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
  intros. rewrite Mach.Regmap.gi. auto.
  unfold Genv.symbol_address.
  rewrite (match_program_main TRANSF).
  rewrite symbols_preserved.
  unfold ge; rewrite H1. auto.
Qed.

Lemma transf_final_states:
  forall st1 st2 r,
  match_states st1 st2 -> MB.final_state st1 r -> AB.final_state st2 r.
Proof.
  intros. inv H0. inv H. constructor. assumption.
  compute in H1. inv H1.
  generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
Qed.

Lemma transf_program_correct:
  forward_simulation (MB.semantics return_address_offset prog) (AB.semantics tprog).
Proof.
  eapply forward_simulation_star with (measure := measure).
  - apply senv_preserved.
  - eexact transf_initial_states.
  - eexact transf_final_states.
  - exact step_simulation.
Qed.

End PRESERVATION.