Module Asmgenproof


Correctness proof for ARM code generation from Asmblock


Require Import Coqlib Errors.
Require Import Integers Floats AST Linking Compopts.
Require Import Values Memory Events Globalenvs Smallstep.
Require Import Op Locations Mach Conventions Asm Asmblock.
Require Machblockgenproof Asmblockgenproof PostpassSchedulingproof.
Require Import Asmgen.
Require Import Axioms.
Require Import IterList.
Require Import Ring Lia.

Module Asmblock_PRESERVATION.

Import Asmblock_TRANSF.

Definition match_prog (p: Asmblock.program) (tp: Asm.program) :=
  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.

Lemma transf_program_match:
  forall p tp, transf_program p = OK tp -> match_prog p tp.
Proof.
  intros. eapply match_transform_partial_program; eauto.
Qed.


Ltac desmatch :=
  match goal with
  | [ |- context [ match ?e with _ => _ end ] ]
    => destruct e
  end.

Ltac desif :=
  match goal with
  | [ |- context [ if ?e then _ else _ ] ]
    => destruct e
  end.

Ltac desifH H :=
  match goal with
  | [ H: context [ if ?e then _ else _ ] |- _ ]
    => destruct e eqn:?
  end.

Ltac hdmatchinv :=
  match goal with
  | H: match ?e with _ => _ end = _ |- _
    => destruct e; inv H
  end.

Ltac find_rwrt_ag :=
  simpl in *;
  match goal with
  | [ AG: forall r, r <> ?PC -> _ r = _ r |- _ ]
      => repeat rewrite <- AG; try congruence
  end.

Ltac assign_once :=
  match goal with
  | |- context [ _ # ?r1 <- _ ?r2 ]
    => destruct (PregEq.eq r1 r2); subst;
       [ rewrite 2!Pregmap.gss; find_rwrt_ag
       | rewrite Pregmap.gso; try congruence;
         symmetry;
         rewrite Pregmap.gso; try congruence;
         find_rwrt_ag ]
  end.

Section PRESERVATION.

Variable prog: Asmblock.program.
Variable tprog: Asm.program.
Hypothesis TRANSF: match_prog prog tprog.
Let ge := Genv.globalenv prog.
Let tge := Genv.globalenv tprog.

Lemma symbols_preserved:
  forall s, Genv.find_symbol tge s = Genv.find_symbol ge s.
Proof.
apply (Genv.find_symbol_match TRANSF). Qed.

Lemma symbol_addresses_preserved:
  forall (s: qualident) (ofs: ptrofs),
  Genv.symbol_address tge s ofs = Genv.symbol_address ge s ofs.
Proof.
  intros; unfold Genv.symbol_address; rewrite symbols_preserved; reflexivity.
Qed.

Lemma senv_preserved:
  Senv.equiv ge tge.
Proof.
apply (Genv.senv_match TRANSF). Qed.

Lemma functions_translated:
forall b f,
  Genv.find_funct_ptr ge b = Some f ->
  exists tf,
  Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
Proof (Genv.find_funct_ptr_transf_partial TRANSF).

Lemma internal_functions_translated:
  forall b f,
  Genv.find_funct_ptr ge b = Some (Internal f) ->
  exists tf,
  Genv.find_funct_ptr tge b = Some (Internal tf) /\ transf_function f = OK tf.
Proof.
  intros; exploit functions_translated; eauto.
  intros (x & FIND & TRANSf).
  apply bind_inversion in TRANSf.
  destruct TRANSf as (tf & TRANSf & X).
  inv X.
  eauto.
Qed.

Lemma internal_functions_unfold:
  forall b f,
  Genv.find_funct_ptr ge b = Some (Internal f) ->
  exists tc,
  Genv.find_funct_ptr tge b = Some (Internal (Asm.mkfunction (fn_sig f) tc))
  /\ unfold (fn_blocks f) = OK tc
  /\ list_length_z tc <= Ptrofs.max_unsigned.
Proof.
  intros.
  exploit internal_functions_translated; eauto.
  intros (tf & FINDtf & TRANStf).
  unfold transf_function in TRANStf.
  monadInv TRANStf.
  destruct (zlt _ _); try congruence.
  inv EQ. inv EQ0.
  eexists; intuition eauto.
  lia.
Qed.

Inductive is_nth_inst (bb: bblock) (n:Z) (i:Asm.instruction): Prop :=
  | is_nth_label l:
     list_nth_z (header bb) n = Some l ->
     i = Asm.Plabel l ->
     is_nth_inst bb n i
  | is_nth_basic bi:
     list_nth_z (body bb) (n - list_length_z (header bb)) = Some bi ->
     basic_to_instruction bi = OK i ->
     is_nth_inst bb n i
  | is_nth_ctlflow cfi:
     (exit bb) = Some cfi ->
     n = size bb - 1 ->
     i = control_to_instruction cfi ->
     is_nth_inst bb n i.

Definition match_states (s1 s2 : state) := s1 = s2.

Inductive match_internal: forall n, state -> state -> Prop :=
  | match_internal_intro n rs1 m1 rs2 m2
    (MEM: m1 = m2)
    (AG: forall r, r <> PC -> rs1 r = rs2 r)
    (AGPC: Val.offset_ptr (rs1 PC) (Ptrofs.repr n) = rs2 PC)
    : match_internal n (State rs1 m1) (State rs2 m2).

Lemma match_internal_set_parallel:
  forall n rs1 m1 rs2 m2 r val,
  match_internal n (State rs1 m1) (State rs2 m2) ->
  r <> PC ->
  match_internal n (State (rs1#r <- val) m1) (State (rs2#r <- val ) m2).
Proof.
  intros n rs1 m1 rs2 m2 r v MI.
  inversion MI; constructor; auto.
  - intros r' NOTPC.
    unfold Pregmap.set; rewrite AG. reflexivity. assumption.
  - unfold Pregmap.set; destruct (PregEq.eq PC r); congruence.
Qed.

Lemma agree_match_states:
  forall rs1 m1 rs2 m2,
  match_states (State rs1 m1) (State rs2 m2) ->
  forall r : preg, rs1#r = rs2#r.
Proof.
  intros.
  unfold match_states in *.
  assert (rs1 = rs2) as EQ. { congruence. }
  rewrite EQ. reflexivity.
Qed.

Lemma match_states_set_parallel:
  forall rs1 m1 rs2 m2 r v,
  match_states (State rs1 m1) (State rs2 m2) ->
  match_states (State (rs1#r <- v) m1) (State (rs2#r <- v) m2).
Proof.
  intros; unfold match_states in *.
  assert (rs1 = rs2) as RSEQ. { congruence. }
  assert (m1 = m2) as MEQ. { congruence. }
  rewrite RSEQ in *; rewrite MEQ in *; unfold Pregmap.set; reflexivity.
Qed.

Lemma mi_from_ms:
  forall rs1 m1 rs2 m2 b ofs,
  match_states (State rs1 m1) (State rs2 m2) ->
  rs1#PC = Vptr b ofs ->
  match_internal 0 (State rs1 m1) (State rs2 m2).
Proof.
  intros rs1 m1 rs2 m2 b ofs MS PCVAL.
  inv MS; constructor; auto; unfold Val.offset_ptr;
  rewrite PCVAL; rewrite Ptrofs.add_zero; reflexivity.
Qed.

Lemma transf_initial_states:
  forall s1, Asmblock.initial_state prog s1 ->
  exists s2, Asm.initial_state tprog s2 /\ match_states s1 s2.
Proof.
  intros ? INIT_s1.
  inversion INIT_s1 as (m, ?, ge0, rs). unfold ge0 in *.
  econstructor; split.
  - econstructor.
    eapply (Genv.init_mem_transf_partial TRANSF); eauto.
  - rewrite (match_program_main TRANSF); rewrite symbol_addresses_preserved.
    reflexivity.
Qed.

Lemma transf_final_states:
  forall s1 s2 r,
  match_states s1 s2 -> Asmblock.final_state s1 r -> Asm.final_state s2 r.
Proof.
  intros s1 s2 r MATCH FINAL_s1.
  inv FINAL_s1; inv MATCH; constructor; assumption.
Qed.

Definition max_pos (f : Asm.function) := list_length_z f.(Asm.fn_code).

Lemma functions_bound_max_pos: forall fb f tf,
  Genv.find_funct_ptr ge fb = Some (Internal f) ->
  transf_function f = OK tf ->
  max_pos tf <= Ptrofs.max_unsigned.
Proof.
  intros fb f tf FINDf TRANSf.
  unfold transf_function in TRANSf.
  apply bind_inversion in TRANSf.
  destruct TRANSf as (c & TRANSf).
  destruct TRANSf as (_ & TRANSf).
  destruct (zlt _ _).
  - inversion TRANSf.
  - unfold max_pos.
    assert (Asm.fn_code tf = c) as H. { inversion TRANSf as (H'); auto. }
    rewrite H; lia.
Qed.

Lemma one_le_max_unsigned:
  1 <= Ptrofs.max_unsigned.
Proof.
  unfold Ptrofs.max_unsigned; simpl; unfold Ptrofs.wordsize;
  unfold Wordsize_Ptrofs.wordsize; destruct Archi.ptr64; simpl; lia.
Qed.

Lemma incrPC_agree_but_pc:
  forall rs r ofs,
  r <> PC ->
  (incrPC ofs rs)#r = rs#r.
Proof.
  intros rs r ofs NOTPC.
  unfold incrPC; unfold Pregmap.set; destruct (PregEq.eq r PC).
  - contradiction.
  - reflexivity.
Qed.

Lemma bblock_non_empty bb: body bb <> nil \/ exit bb <> None.
Proof.
  destruct bb. simpl.
  unfold non_empty_bblockb in correct.
  unfold non_empty_body, non_empty_exit, Is_true in correct.
  destruct body, exit.
  - right. discriminate.
  - contradiction.
  - right. discriminate.
  - left. discriminate.
Qed.

Lemma list_length_z_aux_increase A (l: list A): forall acc,
  list_length_z_aux l acc >= acc.
Proof.
  induction l; simpl; intros.
  - lia.
  - generalize (IHl (Z.succ acc)). lia.
Qed.

Lemma bblock_size_aux_pos bb: list_length_z (body bb) + Z.of_nat (length_opt (exit bb)) >= 1.
Proof.
  destruct (bblock_non_empty bb), (body bb) as [|hd tl], (exit bb); simpl;
  try (congruence || lia);
  unfold list_length_z; simpl;
  generalize (list_length_z_aux_increase _ tl 1); lia.
Qed.

Lemma list_length_add_acc A (l : list A) acc:
  list_length_z_aux l acc = (list_length_z l) + acc.
Proof.
    unfold list_length_z, list_length_z_aux. simpl.
    fold list_length_z_aux.
    rewrite (list_length_z_aux_shift l acc 0).
    lia.
Qed.

Lemma list_length_z_cons A hd (tl : list A):
  list_length_z (hd :: tl) = list_length_z tl + 1.
Proof.
  unfold list_length_z; simpl; rewrite list_length_add_acc; reflexivity.
Qed.

Lemma bblock_size_aux bb: size bb = list_length_z (header bb) + list_length_z (body bb) + Z.of_nat (length_opt (exit bb)).
Proof.
  unfold size.
  repeat (rewrite list_length_z_nat). repeat (rewrite Nat2Z.inj_add). reflexivity.
Qed.

Lemma header_size_lt_block_size bb:
  list_length_z (header bb) < size bb.
Proof.
  rewrite bblock_size_aux.
  generalize (bblock_non_empty bb); intros NEMPTY; destruct NEMPTY as [HDR|EXIT].
  - destruct (body bb); try contradiction; rewrite list_length_z_cons;
    repeat rewrite list_length_z_nat; lia.
  - destruct (exit bb); try contradiction; simpl; repeat rewrite list_length_z_nat; lia.
Qed.

Lemma body_size_le_block_size bb:
  list_length_z (body bb) <= size bb.
Proof.
  rewrite bblock_size_aux; repeat rewrite list_length_z_nat; lia.
Qed.

Lemma bblock_size_pos bb: size bb >= 1.
Proof.
  rewrite (bblock_size_aux bb).
  generalize (bblock_size_aux_pos bb).
  generalize (list_length_z_pos (header bb)).
  lia.
Qed.

Lemma unfold_car_cdr bb bbs tc:
  unfold (bb :: bbs) = OK tc ->
  exists tbb tc', unfold_bblock bb = OK tbb
                  /\ unfold bbs = OK tc'
                  /\ unfold (bb :: bbs) = OK (tbb ++ tc').
Proof.
  intros UNFOLD.
  assert (UF := UNFOLD).
  unfold unfold in UNFOLD.
  apply bind_inversion in UNFOLD. destruct UNFOLD as (? & UBB). destruct UBB as (UBB & REST).
  apply bind_inversion in REST. destruct REST as (? & UNFOLD').
  fold unfold in UNFOLD'. destruct UNFOLD' as (UNFOLD' & UNFOLD).
  rewrite <- UNFOLD in UF.
  eauto.
Qed.

Lemma unfold_cdr bb bbs tc:
  unfold (bb :: bbs) = OK tc ->
  exists tc', unfold bbs = OK tc'.
Proof.
  intros; exploit unfold_car_cdr; eauto. intros (_ & ? & _ & ? & _).
  eexists; eauto.
Qed.

Lemma unfold_car bb bbs tc:
  unfold (bb :: bbs) = OK tc ->
  exists tbb, unfold_bblock bb = OK tbb.
Proof.
  intros; exploit unfold_car_cdr; eauto. intros (? & _ & ? & _ & _).
  eexists; eauto.
Qed.

Lemma all_blocks_translated:
  forall bbs tc,
  unfold bbs = OK tc ->
  forall bb, In bb bbs ->
  exists c, unfold_bblock bb = OK c.
Proof.
  induction bbs as [| bb bbs IHbbs].
  - contradiction.
  - intros ? UNFOLD ? IN.
    (* unfold proceeds by unfolding the basic block at the head of the list and
     * then recurring *)

    exploit unfold_car_cdr; eauto. intros (? & ? & ? & ? & _).
    (* basic block is either in head or tail *)
    inversion IN as [EQ | NEQ].
    + rewrite <- EQ; eexists; eauto.
    + eapply IHbbs; eauto.
Qed.

Lemma entire_body_translated:
  forall lbi tc,
  unfold_body lbi = OK tc ->
  forall bi, In bi lbi ->
  exists bi', basic_to_instruction bi = OK bi'.
Proof.
  induction lbi as [| a lbi IHlbi].
  - intros. contradiction.
  - intros tc UNFOLD_BODY bi IN.
    unfold unfold_body in UNFOLD_BODY. apply bind_inversion in UNFOLD_BODY.
    destruct UNFOLD_BODY as (? & TRANSbi & REST).
    apply bind_inversion in REST. destruct REST as (? & UNFOLD_BODY' & ?).
    fold unfold_body in UNFOLD_BODY'.
    inversion IN as [EQ | NEQ].
    + rewrite <- EQ; eauto.
    + eapply IHlbi; eauto.
Qed.

Lemma bblock_in_bblocks bbs bb: forall
  tc pos
  (UNFOLD: unfold bbs = OK tc)
  (FINDBB: find_bblock pos bbs = Some bb),
  In bb bbs.
Proof.
  induction bbs as [| b bbs IH].
  - intros. inversion FINDBB.
  - destruct pos.
    + intros. inversion FINDBB as (EQ). rewrite <- EQ. apply in_eq.
    + intros.
      exploit unfold_cdr; eauto. intros (tc' & UNFOLD').
      unfold find_bblock in FINDBB. simpl in FINDBB.
      fold find_bblock in FINDBB.
      apply in_cons. eapply IH; eauto.
    + intros. inversion FINDBB.
Qed.

Lemma blocks_translated tc pos bbs bb: forall
  (UNFOLD: unfold bbs = OK tc)
  (FINDBB: find_bblock pos bbs = Some bb),
  exists tbb, unfold_bblock bb = OK tbb.
Proof.
  intros; exploit bblock_in_bblocks; eauto; intros;
  eapply all_blocks_translated; eauto.
Qed.

Lemma size_header b pos f bb: forall
  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
  (FINDBB: find_bblock pos (fn_blocks f) = Some bb),
  list_length_z (header bb) <= 1.
Proof.
  intros.
  exploit internal_functions_unfold; eauto.
  intros (tc & FINDtf & TRANStf & ?).
  exploit blocks_translated; eauto. intros TBB.

  unfold unfold_bblock in TBB.
  destruct (zle (list_length_z (header bb)) 1).
  - assumption.
  - destruct TBB as (? & TBB). discriminate TBB.
Qed.

Lemma list_nth_z_neg A (l: list A): forall n,
  n < 0 -> list_nth_z l n = None.
Proof.
  induction l; simpl; auto.
  intros n H; destruct (zeq _ _); (try eapply IHl); lia.
Qed.

Lemma find_bblock_neg bbs: forall pos,
  pos < 0 -> find_bblock pos bbs = None.
Proof.
  induction bbs; simpl; auto.
  intros. destruct (zlt pos 0). { reflexivity. }
  destruct (zeq pos 0); contradiction.
Qed.

Lemma equal_header_size bb:
  length (header bb) = length (unfold_label (header bb)).
Proof.
  induction (header bb); auto.
  simpl. rewrite IHl. auto.
Qed.

Lemma equal_body_size:
  forall bb tb,
  unfold_body (body bb) = OK tb ->
  length (body bb) = length tb.
Proof.
  intros bb. induction (body bb).
  - simpl. intros ? H. inversion H. auto.
  - intros tb H. simpl in H. apply bind_inversion in H.
    destruct H as (? & BI & TAIL).
    apply bind_inversion in TAIL.
    destruct TAIL as (tb' & BODY' & CONS). inv CONS.
    simpl. specialize (IHl tb' BODY'). rewrite IHl. reflexivity.
Qed.

Lemma equal_exit_size bb:
  length_opt (exit bb) = length (unfold_exit (exit bb)).
Proof.
  destruct (exit bb); trivial.
Qed.

Lemma bblock_size_preserved bb tb:
  unfold_bblock bb = OK tb ->
  size bb = list_length_z tb.
Proof.
  unfold unfold_bblock. intros UNFOLD_BBLOCK.
  destruct (zle (list_length_z (header bb)) 1). 2: { inversion UNFOLD_BBLOCK. }
  apply bind_inversion in UNFOLD_BBLOCK.
  destruct UNFOLD_BBLOCK as (? & UNFOLD_BODY & CONS).
  inversion CONS.
  unfold size.
  rewrite equal_header_size, equal_exit_size.
  erewrite equal_body_size; eauto.
  rewrite list_length_z_nat.
  repeat (rewrite app_length).
  rewrite Nat.add_assoc. auto.
Qed.

Lemma size_of_blocks_max_pos_aux:
  forall bbs tbbs pos bb,
  find_bblock pos bbs = Some bb ->
  unfold bbs = OK tbbs ->
  pos + size bb <= list_length_z tbbs.
Proof.
  induction bbs as [| bb ? IHbbs].
  - intros tbbs ? ? FINDBB; inversion FINDBB.
  - simpl; intros tbbs pos bb' FINDBB UNFOLD.
    apply bind_inversion in UNFOLD; destruct UNFOLD as (tbb & UNFOLD_BBLOCK & H).
    apply bind_inversion in H; destruct H as (tbbs' & UNFOLD & CONS).
    inv CONS.
    destruct (zlt pos 0). { discriminate FINDBB. }
    destruct (zeq pos 0).
    + inv FINDBB.
      exploit bblock_size_preserved; eauto; intros SIZE; rewrite SIZE.
      repeat (rewrite list_length_z_nat). rewrite app_length, Nat2Z.inj_add.
      lia.
    + generalize (IHbbs tbbs' (pos - size bb) bb' FINDBB UNFOLD). intros IH.
      exploit bblock_size_preserved; eauto; intros SIZE.
      repeat (rewrite list_length_z_nat); rewrite app_length.
      rewrite Nat2Z.inj_add; repeat (rewrite <- list_length_z_nat).
      lia.
Qed.

Lemma size_of_blocks_max_pos pos f tf bi:
  find_bblock pos (fn_blocks f) = Some bi ->
  transf_function f = OK tf ->
  pos + size bi <= max_pos tf.
Proof.
  unfold transf_function, max_pos.
  intros FINDBB UNFOLD.
  apply bind_inversion in UNFOLD. destruct UNFOLD as (? & UNFOLD & H).
  destruct (zlt Ptrofs.max_unsigned (list_length_z x)). { discriminate H. }
  inv H. simpl.
  eapply size_of_blocks_max_pos_aux; eauto.
Qed.

Lemma unfold_bblock_not_nil bb:
  unfold_bblock bb = OK nil -> False.
Proof.
  intros.
  exploit bblock_size_preserved; eauto. unfold list_length_z; simpl. intros SIZE.
  generalize (bblock_size_pos bb). intros SIZE'. lia.
Qed.

Lemma find_instr_range:
  forall c n i,
  Asm.find_instr n c = Some i -> 0 <= n < list_length_z c.
Proof.
  induction c; simpl; intros.
  discriminate.
  rewrite list_length_z_cons. destruct (zeq n 0).
  generalize (list_length_z_pos c); lia.
  exploit IHc; eauto. lia.
Qed.

Lemma find_instr_tail:
  forall tbb pos c i,
  Asm.find_instr pos c = Some i ->
  Asm.find_instr (pos + list_length_z tbb) (tbb ++ c) = Some i.
Proof.
  induction tbb as [| ? ? IHtbb].
  - intros. unfold list_length_z; simpl. rewrite Z.add_0_r. assumption.
  - intros. rewrite list_length_z_cons. simpl.
    destruct (zeq (pos + (list_length_z tbb + 1)) 0).
    + exploit find_instr_range; eauto. intros POS_RANGE.
      generalize (list_length_z_pos tbb). lia.
    + replace (pos + (list_length_z tbb + 1) - 1) with (pos + list_length_z tbb) by lia.
      eapply IHtbb; eauto.
Qed.

Lemma size_of_blocks_bounds fb pos f bi:
      Genv.find_funct_ptr ge fb = Some (Internal f) ->
      find_bblock pos (fn_blocks f) = Some bi ->
      pos + size bi <= Ptrofs.max_unsigned.
Proof.
  intros; exploit internal_functions_translated; eauto.
  intros (tf & _ & TRANSf).
  assert (pos + size bi <= max_pos tf). { eapply size_of_blocks_max_pos; eauto. }
  assert (max_pos tf <= Ptrofs.max_unsigned). { eapply functions_bound_max_pos; eauto. }
  lia.
Qed.

Lemma find_instr_bblock_tail:
  forall tbb bb pos c i,
  Asm.find_instr pos c = Some i ->
  unfold_bblock bb = OK tbb ->
  Asm.find_instr (pos + size bb ) (tbb ++ c) = Some i.
Proof.
  induction tbb.
   - intros. exploit unfold_bblock_not_nil; eauto. intros. contradiction.
   - intros. simpl.
     destruct (zeq (pos + size bb) 0).
     + (* absurd *)
       exploit find_instr_range; eauto. intros POS_RANGE.
       generalize (bblock_size_pos bb). intros SIZE. lia.
     + erewrite bblock_size_preserved; eauto.
       rewrite list_length_z_cons.
       replace (pos + (list_length_z tbb + 1) - 1) with (pos + list_length_z tbb) by lia.
       apply find_instr_tail; auto.
Qed.

Lemma list_nth_z_find_label:
  forall (ll : list label) il n l,
  list_nth_z ll n = Some l ->
  Asm.find_instr n ((unfold_label ll) ++ il) = Some (Asm.Plabel l).
Proof.
  induction ll.
  - intros. inversion H.
  - intros. simpl.
    destruct (zeq n 0) as [Z | NZ].
    + inversion H as (H'). rewrite Z in H'. simpl in H'. inv H'. reflexivity.
    + simpl in H. destruct (zeq n 0). { contradiction. }
      apply IHll; auto.
Qed.

Lemma list_nth_z_find_bi:
  forall lbi bi tlbi n bi' exit,
  list_nth_z lbi n = Some bi ->
  unfold_body lbi = OK tlbi ->
  basic_to_instruction bi = OK bi' ->
  Asm.find_instr n (tlbi ++ exit) = Some bi'.
Proof.
  induction lbi.
  - intros. inversion H.
  - simpl. intros.
    apply bind_inversion in H0. destruct H0 as (? & ? & ?).
    apply bind_inversion in H2. destruct H2 as (? & ? & ?).
    destruct (zeq n 0) as [Z | NZ].
    + destruct n.
      * inversion H as (BI). rewrite BI in *.
        inversion H3. simpl. congruence.
      * (* absurd *) congruence.
      * (* absurd *) congruence.
    + inv H3. simpl. destruct (zeq n 0). { contradiction. }
      eapply IHlbi; eauto.
Qed.

Lemma list_nth_z_find_bi_with_header:
  forall ll lbi bi tlbi n bi' (rest : list Asm.instruction),
  list_nth_z lbi (n - list_length_z ll) = Some bi ->
  unfold_body lbi = OK tlbi ->
  basic_to_instruction bi = OK bi' ->
  Asm.find_instr n ((unfold_label ll) ++ (tlbi) ++ (rest)) = Some bi'.
Proof.
  induction ll.
  - unfold list_length_z. simpl. intros.
    replace (n - 0) with n in H by lia. eapply list_nth_z_find_bi; eauto.
  - intros. simpl. destruct (zeq n 0).
    + rewrite list_length_z_cons in H. rewrite e in H.
      replace (0 - (list_length_z ll + 1)) with (-1 - (list_length_z ll)) in H by lia.
      generalize (list_length_z_pos ll). intros.
      rewrite list_nth_z_neg in H; try lia. inversion H.
    + rewrite list_length_z_cons in H.
      replace (n - (list_length_z ll + 1)) with (n -1 - (list_length_z ll)) in H by lia.
      eapply IHll; eauto.
Qed.

Lemma list_nth_z_n_too_big:
  forall (A: Type) (l: list A) n,
  0 <= n ->
  list_nth_z l n = None ->
  n >= list_length_z l.
Proof.
  induction l.
  - intros. unfold list_length_z. simpl. lia.
  - intros. rewrite list_length_z_cons.
    simpl in H0.
    destruct (zeq n 0) as [N | N].
    + inversion H0.
    + assert (n > 0). { lia. }
      assert (0 <= n - 1). { lia. }
      generalize (IHl (n - 1)). intros IH.
      assert (n - 1 >= list_length_z l). { auto. }
      assert (n > list_length_z l); lia.
Qed.

Lemma find_instr_past_header:
  forall labels n rest,
  list_nth_z labels n = None ->
  Asm.find_instr n (unfold_label labels ++ rest) =
  Asm.find_instr (n - list_length_z labels) rest.
Proof.
  induction labels as [| label labels' IH].
  - unfold list_length_z; simpl; intros; rewrite Z.sub_0_r; reflexivity.
  - intros. simpl. destruct (zeq n 0) as [N | N].
    + rewrite N in H. inversion H.
    + rewrite list_length_z_cons.
      replace (n - (list_length_z labels' + 1)) with (n - 1 - list_length_z labels') by lia.
      simpl in H. destruct (zeq n 0). { contradiction. }
      replace (Z.pred n) with (n - 1) in H by lia.
      apply IH; auto.
Qed.

Lemma find_instr_past_body:
  forall lbi n tlbi rest,
  list_nth_z lbi n = None ->
  unfold_body lbi = OK tlbi ->
  Asm.find_instr n (tlbi ++ rest) =
  Asm.find_instr (n - list_length_z lbi) rest.
Proof.
  induction lbi.
  - unfold list_length_z; simpl; intros ? ? ? ? H. inv H; rewrite Z.sub_0_r;
    reflexivity.
  - intros n tlib ? NTH UNFOLD_BODY.
    unfold unfold_body in UNFOLD_BODY. apply bind_inversion in UNFOLD_BODY.
    destruct UNFOLD_BODY as (? & BI & H).
    apply bind_inversion in H. destruct H as (? & UNFOLD_BODY' & CONS).
    fold unfold_body in UNFOLD_BODY'. inv CONS.
    simpl; destruct (zeq n 0) as [N|N].
    + rewrite N in NTH; inversion NTH.
    + rewrite list_length_z_cons.
      replace (n - (list_length_z lbi + 1)) with (n - 1 - list_length_z lbi) by lia.
      simpl in NTH. destruct (zeq n 0). { contradiction. }
      replace (Z.pred n) with (n - 1) in NTH by lia.
      apply IHlbi; auto.
Qed.

Lemma n_beyond_body:
  forall bb n,
  0 <= n < size bb ->
  list_nth_z (header bb) n = None ->
  list_nth_z (body bb) (n - list_length_z (header bb)) = None ->
  n >= Z.of_nat (length (header bb) + length (body bb)).
Proof.
  intros.
  assert (0 <= n). { lia. }
  generalize (list_nth_z_n_too_big label (header bb) n H2 H0). intros.
  generalize (list_nth_z_n_too_big _ (body bb) (n - list_length_z (header bb))). intros.
  unfold size in H.
  assert (0 <= n - list_length_z (header bb)). { lia. }
  assert (n - list_length_z (header bb) >= list_length_z (body bb)). { apply H4; auto. }
  assert (n >= list_length_z (header bb) + list_length_z (body bb)). { lia. }
  rewrite Nat2Z.inj_add.
  repeat (rewrite <- list_length_z_nat). assumption.
Qed.

Lemma exec_arith_instr_dont_move_PC ai rs rs' m: forall
  (BASIC: exec_arith_instr ai rs m = rs'),
  rs PC = rs' PC.
Proof.
  destruct ai; simpl; intros;
  try (rewrite <- BASIC; rewrite Pregmap.gso; auto; discriminate);
  destruct i; simpl in BASIC; rewrite <- BASIC;
  repeat rewrite Pregmap.gso; try discriminate; try reflexivity;
  try (unfold compare_float32 in BASIC || unfold compare_float in BASIC);
  destruct (rs r1); try destruct (rs r2); simpl; repeat rewrite Pregmap.gso; try discriminate; try reflexivity.
Qed.

Lemma exec_basic_dont_move_PC bi rs m rs' m': forall
  (BASIC: exec_basic ge bi rs m = Next rs' m'),
  rs PC = rs' PC.
Proof.
  destruct bi; simpl; intros.
  - inv BASIC. exploit exec_arith_instr_dont_move_PC; eauto.
  - unfold exec_load in BASIC.
    repeat destruct ld;
    first [
      (* Pcldm *)
      destruct (Asm.ldm_iregs_wf ra l) eqn:WF; try discriminate BASIC;
      destruct (exec_load_multi_i rs#ra 0 l rs m) eqn:E;
      try discriminate BASIC; inv BASIC;
      symmetry; eapply Asmblockprops.exec_load_multi_i_pc; eauto
    | (* Pcvldm *)
      destruct (Asm.vldm_fregs_wf l) eqn:WF; try discriminate BASIC;
      destruct (exec_load_multi_f rs#ra 0 l rs m) eqn:E;
      try discriminate BASIC; inv BASIC;
      symmetry; eapply Asmblockprops.exec_load_multi_f_pc; eauto
    | (unfold exec_load_aux, exec_load_pi_aux, exec_load_pd_aux, exec_load_double_aux,
              exec_load_pi_double_aux, undef_flags in BASIC;
       repeat destruct (Mem.loadv _ _ _);
       try destruct o;
       inv BASIC; repeat rewrite !Pregmap.gso; congruence) ].
  - unfold exec_store in BASIC.
    repeat destruct st;
    first [
      (* Pcstm *)
      destruct (Asm.stm_iregs_wf ra l) eqn:WF; try discriminate BASIC;
      destruct (exec_store_multi_i rs#ra 0 l rs m) eqn:E;
      try discriminate BASIC; inv BASIC; reflexivity
    | (* Pcvstm *)
      destruct (Asm.vstm_fregs_wf l) eqn:WF; try discriminate BASIC;
      destruct (exec_store_multi_f rs#ra 0 l rs m) eqn:E;
      try discriminate BASIC; inv BASIC; reflexivity
    | (unfold exec_store_aux, exec_store_pi_aux, exec_store_pd_aux, exec_store_double_aux,
              exec_store_pi_double_aux, undef_flags in BASIC;
       repeat destruct (Mem.storev _ _ _);
       try destruct o;
       inv BASIC; repeat rewrite !Pregmap.gso; auto; congruence) ].
  - revert BASIC. unfold exec_memcpy, exec_memcpy_aux, mcpy_rs.
    destruct cp; try (desif; try congruence);
    repeat (desmatch; try congruence);
    simpl; intros; inv BASIC; intros;
    unfold undef_flags; repeat (rewrite Pregmap.gso; try congruence).
  - destruct Mem.alloc, Mem.store. 2: { discriminate BASIC. }
    inv BASIC. repeat (rewrite Pregmap.gso; try discriminate). reflexivity.
  - destruct Mem.loadv. 2: { discriminate BASIC. }
    destruct rs, Mem.free; try discriminate BASIC;
    inv BASIC; auto.
  - inv BASIC; auto.
  - inv BASIC; auto.
  - inv BASIC; auto.
  - destruct (Val.divs (rs r1) (rs r2)); destruct (Archi.hardware_idiv tt);
    inv BASIC; auto.
  - destruct (Val.divu (rs r1) (rs r2)); destruct (Archi.hardware_idiv tt);
    inv BASIC; auto.
  - revert BASIC. unfold exec_memcpy, exec_memcpy_aux, mcpy_rs.
    repeat (desmatch; try congruence);
    simpl; intros; inv BASIC; intros;
    unfold undef_flags; repeat (rewrite Pregmap.gso; try congruence).
Qed.

Lemma exec_body_dont_move_PC_aux:
  forall bis rs m rs' m'
  (BODY: exec_body ge bis rs m = Next rs' m'),
  rs PC = rs' PC.
Proof.
  induction bis.
  - intros; inv BODY; reflexivity.
  - simpl; intros.
    remember (exec_basic ge a rs m) as bi eqn:BI; destruct bi. 2: { discriminate BODY. }
    symmetry in BI; simpl in BODY, BI.
    exploit exec_basic_dont_move_PC; eauto; intros AGPC; rewrite AGPC.
    eapply IHbis; eauto.
Qed.

Lemma exec_body_dont_move_PC bb rs m rs' m': forall
  (BODY: exec_body ge (body bb) rs m = Next rs' m'),
  rs PC = rs' PC.
Proof.
apply exec_body_dont_move_PC_aux. Qed.

Lemma find_instr_bblock:
  forall n lb pos bb tlb
  (FINDBB: find_bblock pos lb = Some bb)
  (UNFOLD: unfold lb = OK tlb)
  (SIZE: 0 <= n < size bb),
  exists i, is_nth_inst bb n i /\ Asm.find_instr (pos+n) tlb = Some i.
Proof.
  induction lb as [| b lb IHlb].
  - intros. inversion FINDBB.
  - intros pos bb tlb FINDBB UNFOLD SIZE.
    destruct pos.
    + inv FINDBB. simpl.
      exploit unfold_car_cdr; eauto. intros (tbb & tlb' & UNFOLD_BBLOCK & UNFOLD' & UNFOLD_cons).
      rewrite UNFOLD in UNFOLD_cons. inversion UNFOLD_cons.
      unfold unfold_bblock in UNFOLD_BBLOCK.
      destruct (zle (list_length_z (header bb)) 1). 2: { inversion UNFOLD_BBLOCK. }
      apply bind_inversion in UNFOLD_BBLOCK.
      destruct UNFOLD_BBLOCK as (? & UNFOLD_BODY & H).
      inversion H as (UNFOLD_BBLOCK).
      remember (list_nth_z (header bb) n) as label_opt eqn:LBL. destruct label_opt.
      * (* nth instruction is a label *)
        eexists; split. { eapply is_nth_label; eauto. }
        inversion UNFOLD_cons.
        symmetry in LBL.
        rewrite <- app_assoc.
        apply list_nth_z_find_label; auto.
      * remember (list_nth_z (body bb) (n - list_length_z (header bb))) as bi_opt eqn:BI.
        destruct bi_opt.
        -- (* nth instruction is a basic instruction *)
           exploit list_nth_z_in; eauto. intros INBB.
           exploit entire_body_translated; eauto. intros BI'.
           destruct BI'.
           eexists; split.
            ++ eapply is_nth_basic; eauto.
            ++ repeat (rewrite <- app_assoc). eapply list_nth_z_find_bi_with_header; eauto.
        -- (* nth instruction is the exit instruction *)
           generalize n_beyond_body. intros TEMP.
           assert (n >= Z.of_nat (Datatypes.length (header bb)
                        + Datatypes.length (body bb))) as NGE. { auto. } clear TEMP.
           remember (exit bb) as exit_opt eqn:EXIT. destruct exit_opt.
           ++ rewrite <- app_assoc. rewrite find_instr_past_header; auto.
              rewrite <- app_assoc. erewrite find_instr_past_body; eauto.
              assert (SIZE' := SIZE).
              unfold size in SIZE. rewrite <- EXIT in SIZE. simpl in SIZE.
              destruct SIZE as (LOWER & UPPER).
              repeat (rewrite Nat2Z.inj_add in UPPER).
              repeat (rewrite <- list_length_z_nat in UPPER). repeat (rewrite Nat2Z.inj_add in NGE).
              repeat (rewrite <- list_length_z_nat in NGE). simpl in UPPER.
              assert (n = list_length_z (header bb) + list_length_z (body bb)). { lia. }
              assert (n = size bb - 1). {
                unfold size. rewrite <- EXIT. simpl.
                repeat (rewrite Nat2Z.inj_add). repeat (rewrite <- list_length_z_nat). simpl. lia.
              }
              symmetry in EXIT.
              eexists; split.
              ** eapply is_nth_ctlflow; eauto.
              ** simpl.
                 destruct (zeq (n - list_length_z (header bb) - list_length_z (body bb)) 0). { reflexivity. }
                 (* absurd *) lia.
           ++ (* absurd *)
              unfold size in SIZE. rewrite <- EXIT in SIZE. simpl in SIZE.
              destruct SIZE as (? & SIZE'). rewrite Nat.add_0_r in SIZE'. lia.
    + unfold find_bblock in FINDBB; simpl in FINDBB; fold find_bblock in FINDBB.
      inversion UNFOLD as (UNFOLD').
      apply bind_inversion in UNFOLD'. destruct UNFOLD' as (? & (UNFOLD_BBLOCK' & UNFOLD')).
      apply bind_inversion in UNFOLD'. destruct UNFOLD' as (? & (UNFOLD' & TLB)).
      inversion TLB.
      generalize (IHlb _ _ _ FINDBB UNFOLD'). intros IH.
      destruct IH as (? & (IH_is_nth & IH_find_instr)); eauto.
      eexists; split.
      * apply IH_is_nth.
      * replace (Z.pos p + n) with (Z.pos p + n - size b + size b) by lia.
        eapply find_instr_bblock_tail; try assumption.
        replace (Z.pos p + n - size b) with (Z.pos p - size b + n) by lia.
        apply IH_find_instr.
    + (* absurd *)
      generalize (Pos2Z.neg_is_neg p). intros. exploit (find_bblock_neg (b :: lb)); eauto.
      rewrite FINDBB. intros CONTRA. inversion CONTRA.
Qed.

Lemma exec_header_simulation b ofs f bb rs m: forall
  (ATPC: rs PC = Vptr b ofs)
  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb),
  exists s', star Asm.step tge (State rs m) E0 s'
             /\ match_internal (list_length_z (header bb)) (State rs m) s'.
Proof.
  intros.
  exploit internal_functions_unfold; eauto.
  intros (tc & FINDtf & TRANStf & _).
  assert (BNDhead: list_length_z (header bb) <= 1). { eapply size_header; eauto. }
  destruct (header bb) as [|l[|]] eqn: EQhead.
  + (* header nil *)
    eexists; split.
    - eapply star_refl.
    - split; eauto.
      unfold list_length_z; rewrite !ATPC; simpl.
      rewrite Ptrofs.add_zero; auto.
  + (* header one *)
    assert (Lhead: list_length_z (header bb) = 1). { rewrite EQhead; unfold list_length_z; simpl. auto. }
    exploit (find_instr_bblock 0); eauto.
    { generalize (bblock_size_pos bb). lia. }
    intros (i & NTH & FIND_INSTR).
    inv NTH.
    * rewrite EQhead in H; simpl in H. inv H.
      replace (Ptrofs.unsigned ofs + 0) with (Ptrofs.unsigned ofs) in FIND_INSTR by lia.
      eexists. split.
      - eapply star_one.
        eapply Asm.exec_step_internal; eauto.
        simpl; eauto.
      - unfold list_length_z; simpl. split; eauto.
        intros r; destruct r; simpl; congruence || auto.
    * (* absurd case *)
      erewrite list_nth_z_neg in * |-; [ congruence | rewrite Lhead; lia].
    * (* absurd case *)
      rewrite bblock_size_aux, Lhead in *. generalize (bblock_size_aux_pos bb). lia.
  + (* absurd case *)
    unfold list_length_z in BNDhead. simpl in *.
    generalize (list_length_z_aux_increase _ l1 2); lia.
Qed.

Lemma nextinstr_agree_but_pc rs1 rs2: forall
  (AG: forall r, r <> PC -> rs1 r = rs2 r),
  forall r, r <> PC -> rs1 r = Asm.nextinstr rs2 r.
Proof.
  intros; unfold Asm.nextinstr in *; rewrite Pregmap.gso in *; eauto.
Qed.

Lemma ptrofs_nextinstr_agree rs1 rs2 n: forall
  (BOUNDED : 0 <= n <= Ptrofs.max_unsigned)
  (AGPC : Val.offset_ptr (rs1 PC) (Ptrofs.repr n) = rs2 PC),
  Val.offset_ptr (rs1 PC) (Ptrofs.repr (n + 1)) = Asm.nextinstr rs2 PC.
Proof.
  intros; unfold Asm.nextinstr; rewrite Pregmap.gss.
  rewrite <- Ptrofs.unsigned_one; rewrite <- (Ptrofs.unsigned_repr n); eauto;
  rewrite <- Ptrofs.add_unsigned; rewrite <- Val.offset_ptr_assoc; rewrite AGPC; eauto.
Qed.

Multi-register load/store helpers: exec_*_multi_* commutes with register-set agreement and is independent of the regset's PC. These are used to prove simulation for Pcldm/Pcvldm/Pcstm/Pcvstm.

Lemma exec_load_multi_i_match: forall l ofs base rs1 rs2 m rs1',
  (forall r, r <> PC -> rs1 r = rs2 r) ->
  exec_load_multi_i base ofs l rs1 m = Some rs1' ->
  exists rs2',
    exec_load_multi_i base ofs l rs2 m = Some rs2' /\
    (forall r, r <> PC -> rs1' r = rs2' r) /\
    rs1' PC = rs1 PC /\
    rs2' PC = rs2 PC.
Proof.
  induction l as [| [r chk] tl IH]; intros ofs base rs1 rs2 m rs1' HAG H; simpl in *.
  - inversion H; subst. exists rs2. repeat split; auto.
  - destruct (Mem.loadv chk m (Val.add base (Vint (Int.repr ofs)))) eqn:LD; try discriminate.
    assert (forall r0, r0 <> PC -> (rs1 # r <- v) r0 = (rs2 # r <- v) r0) as HAG'.
    { intros r0 NEQ. unfold Pregmap.set. destruct (PregEq.eq r0 r); [auto | apply HAG; auto]. }
    destruct (IH _ _ _ _ _ _ HAG' H) as (rs2' & E2 & A & PC1 & PC2).
    exists rs2'. repeat split.
    * rewrite E2; reflexivity.
    * intros r0 NEQ. apply A. exact NEQ.
    * rewrite PC1. unfold Pregmap.set. destruct (PregEq.eq PC r); congruence.
    * rewrite PC2. unfold Pregmap.set. destruct (PregEq.eq PC r); congruence.
Qed.

Lemma exec_load_multi_f_match: forall l ofs base rs1 rs2 m rs1',
  (forall r, r <> PC -> rs1 r = rs2 r) ->
  exec_load_multi_f base ofs l rs1 m = Some rs1' ->
  exists rs2',
    exec_load_multi_f base ofs l rs2 m = Some rs2' /\
    (forall r, r <> PC -> rs1' r = rs2' r) /\
    rs1' PC = rs1 PC /\
    rs2' PC = rs2 PC.
Proof.
  induction l as [| [r chk] tl IH]; intros ofs base rs1 rs2 m rs1' HAG H; simpl in *.
  - inversion H; subst. exists rs2. repeat split; auto.
  - destruct (Mem.loadv chk m (Val.add base (Vint (Int.repr ofs)))) eqn:LD; try discriminate.
    assert (forall r0, r0 <> PC -> (rs1 # r <- v) r0 = (rs2 # r <- v) r0) as HAG'.
    { intros r0 NEQ. unfold Pregmap.set. destruct (PregEq.eq r0 r); [auto | apply HAG; auto]. }
    destruct (IH _ _ _ _ _ _ HAG' H) as (rs2' & E2 & A & PC1 & PC2).
    exists rs2'. repeat split.
    * rewrite E2; reflexivity.
    * intros r0 NEQ. apply A. exact NEQ.
    * rewrite PC1. unfold Pregmap.set. destruct (PregEq.eq PC r); congruence.
    * rewrite PC2. unfold Pregmap.set. destruct (PregEq.eq PC r); congruence.
Qed.

Lemma exec_store_multi_i_match: forall l ofs base rs1 rs2 m m',
  (forall r, r <> PC -> rs1 r = rs2 r) ->
  exec_store_multi_i base ofs l rs1 m = Some m' ->
  exec_store_multi_i base ofs l rs2 m = Some m'.
Proof.
  induction l as [| [r chk] tl IH]; intros ofs base rs1 rs2 m m' HAG H; simpl in *.
  - assumption.
  - destruct (Mem.storev chk m (Val.add base (Vint (Int.repr ofs))) (rs1 r)) eqn:SD; try discriminate.
    rewrite <- (HAG r); [| discriminate].
    rewrite SD.
    apply IH with rs1; auto.
Qed.

Lemma exec_store_multi_f_match: forall l ofs base rs1 rs2 m m',
  (forall r, r <> PC -> rs1 r = rs2 r) ->
  exec_store_multi_f base ofs l rs1 m = Some m' ->
  exec_store_multi_f base ofs l rs2 m = Some m'.
Proof.
  induction l as [| [r chk] tl IH]; intros ofs base rs1 rs2 m m' HAG H; simpl in *.
  - assumption.
  - destruct (Mem.storev chk m (Val.add base (Vint (Int.repr ofs))) (rs1 r)) eqn:SD; try discriminate.
    rewrite <- (HAG r); [| discriminate].
    rewrite SD.
    apply IH with rs1; auto.
Qed.

Lemma load_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (rd: dreg) chk a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HLOAD: exec_load_aux chk a rd rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.exec_load chk a rd rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_load_aux, Asm.exec_load in *.
  inversion MATCHI; subst.
  destruct (Mem.loadv _ _ _); try congruence.
  inversion HLOAD. repeat econstructor.
  - apply nextinstr_agree_but_pc; intros.
    destruct (PregEq.eq r rd); subst; repeat rewrite Pregmap.gss;
    repeat rewrite Pregmap.gso; auto.
  - apply ptrofs_nextinstr_agree. auto.
    destruct rd; auto.
Qed.

Lemma load_pi_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (rd ra: dreg) chk a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HLOAD: exec_load_pi_aux chk a rd ra rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.exec_load_pi chk a rd ra rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_load_pi_aux, Asm.exec_load_pi in *.
  inversion MATCHI; subst.
  rewrite <- AG; try congruence.
  destruct (Mem.loadv _ _ _); try congruence.
  inversion HLOAD. repeat econstructor.
  - apply nextinstr_agree_but_pc; intros;
    destruct (PregEq.eq r rd); subst; unfold undef_flags.
    + rewrite !Pregmap.gss. auto.
    + destruct r; auto; rewrite Pregmap.gso; auto;
      symmetry; rewrite Pregmap.gso; auto;
      try destruct (PregEq.eq d ra); try rewrite e;
      try rewrite !Pregmap.gss; auto;
      rewrite !Pregmap.gso; try rewrite AG; auto.
  - apply ptrofs_nextinstr_agree. auto.
    destruct rd; auto.
Qed.

Lemma load_pd_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (rd ra: dreg) chk a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HLOAD: exec_load_pd_aux chk a rd ra rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.exec_load_pd chk a rd ra rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_load_pd_aux, Asm.exec_load_pd in *.
  inversion MATCHI; subst.
  destruct (Mem.loadv _ _ _); try congruence.
  inversion HLOAD. repeat econstructor.
  - apply nextinstr_agree_but_pc; intros;
    destruct (PregEq.eq r rd); subst; unfold undef_flags.
    + rewrite !Pregmap.gss. auto.
    + destruct r; auto; rewrite Pregmap.gso; auto;
      symmetry; rewrite Pregmap.gso; auto;
      try destruct (PregEq.eq d ra); try rewrite e;
      try rewrite !Pregmap.gss; auto;
      rewrite !Pregmap.gso; try rewrite AG; auto.
  - apply ptrofs_nextinstr_agree. auto.
    destruct rd; auto.
Qed.

Lemma load_double_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (rd1 rd2: ireg) chk1 chk2 a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HTHUMB: thumb tt = true \/ ls_double_next_reg rd1 = Some rd2)
  (HLOAD: exec_load_double_aux chk1 chk2 a (Val.add a (Vint (Int.repr 4))) rd1 rd2 rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.exec_load_double chk1 chk2 a rd1 rd2 rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_load_double, exec_load_double_aux in *.
  inversion MATCHI; subst.
  destruct HTHUMB; unfold ls_double_valid_regs; rewrite H;
  try (destruct rd2 eqn:R2; rewrite orb_true_r); try rewrite orb_true_l; simpl;
  repeat destruct (Mem.loadv _ _ _); try congruence;
  inversion HLOAD;
  repeat econstructor;
  try (apply ptrofs_nextinstr_agree; auto);
  apply nextinstr_agree_but_pc; intros;
  destruct (PregEq.eq r rd2); subst; repeat rewrite Pregmap.gss; auto;
  rewrite Pregmap.gso; auto; symmetry; rewrite Pregmap.gso; auto;
  destruct (PregEq.eq r rd1); subst; repeat rewrite Pregmap.gss; auto;
  rewrite Pregmap.gso; auto; symmetry; rewrite Pregmap.gso; auto.
Qed.

Lemma load_pi_double_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (rd1 rd2 ra: ireg)
                                   chk1 chk2 addr_new: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HTHUMB: thumb tt = true)
  (HLOAD: exec_load_pi_double_aux chk1 chk2 addr_new rd1 rd2 ra rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem),
    Asm.exec_load_pi_double chk1 chk2 addr_new rd1 rd2 ra rs2 m2 = Next rs2' m2'
    /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_load_pi_double_aux, Asm.exec_load_pi_double in *.
  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
  rewrite HTHUMB; simpl.
  rewrite <- !EQR; try discriminate.
  repeat destruct (Mem.loadv _ _ _); try congruence.
  inversion HLOAD. repeat (econstructor; eauto).
  2: { apply ptrofs_nextinstr_agree; subst; auto. }
  apply nextinstr_agree_but_pc; intros r0 HrPC; unfold undef_flags.
  destruct r0 as [d|c|].
  - (* DR d *)
    destruct (PregEq.eq (DR d) (DR (IR rd2))) as [Heq2|Hne2].
    + rewrite Heq2. rewrite !Pregmap.gss. reflexivity.
    + rewrite (Pregmap.gso _ _ Hne2). symmetry.
      rewrite (Pregmap.gso _ _ Hne2). symmetry.
      destruct (PregEq.eq (DR d) (DR (IR rd1))) as [Heq1|Hne1].
      * rewrite Heq1. rewrite !Pregmap.gss. reflexivity.
      * rewrite (Pregmap.gso _ _ Hne1). symmetry.
        rewrite (Pregmap.gso _ _ Hne1). symmetry.
        destruct (PregEq.eq (DR d) (DR (IR ra))) as [Heqa|Hnea].
        -- rewrite Heqa. rewrite !Pregmap.gss. reflexivity.
        -- rewrite (Pregmap.gso _ _ Hnea). symmetry.
           rewrite (Pregmap.gso _ _ Hnea). symmetry.
           apply EQR. discriminate.
  - (* CR c *) reflexivity.
  - (* PC *) congruence.
Qed.

Lemma load_double_reverse_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (rd1 rd2: ireg) chk1 chk2 a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HREGS: ls_double_next_reg rd1 = Some rd2)
  (HLOAD: exec_load_double_aux chk2 chk1 (Val.add a (Vint (Int.repr 4))) a rd2 rd1 rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.exec_load_double chk1 chk2 a rd1 rd2 rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_load_double, exec_load_double_aux in *.
  inversion MATCHI; subst.
  unfold ls_double_valid_regs; rewrite HREGS;
  try (destruct rd2 eqn:R2; rewrite orb_true_r); try rewrite orb_true_l; simpl;
  repeat destruct (Mem.loadv _ _ _); try congruence;
  inversion HLOAD;
  repeat econstructor;
  try (apply ptrofs_nextinstr_agree; auto);
  apply nextinstr_agree_but_pc; intros;
  destruct rd1 eqn:R1; unfold ls_double_next_reg in *; try congruence;
  destruct (PregEq.eq r rd1); destruct (PregEq.eq r rd2); try congruence;
  subst; repeat rewrite Pregmap.gss;
  try (rewrite Pregmap.gso; auto; rewrite Pregmap.gss; auto);
  rewrite !Pregmap.gso; auto; try congruence; rewrite AG; auto.
Qed.

Lemma store_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (v: dreg) chk a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HSTORE: exec_store_aux chk a v rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.exec_store chk a v rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_store_aux, Asm.exec_store in *.
  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
  rewrite <- EQR; try discriminate.
  destruct (Mem.storev _ _ _ _); try congruence.
  inversion HSTORE; auto. repeat (econstructor; eauto).
  - apply nextinstr_agree_but_pc. subst. auto.
  - apply ptrofs_nextinstr_agree; subst; auto.
Qed.

Lemma store_pi_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (v ra: dreg) chk a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HSTORE: exec_store_pi_aux chk a v ra rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.exec_store_pi chk a v ra rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_store_pi_aux, Asm.exec_store_pi in *.
  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
  rewrite <- !EQR; try discriminate.
  destruct (Mem.storev _ _ _ _); try congruence.
  inversion HSTORE; auto. repeat (econstructor; eauto).
  - apply nextinstr_agree_but_pc. subst.
    intros. unfold undef_flags.
    destruct r; auto;
    try destruct (PregEq.eq d ra); try rewrite e;
    try rewrite !Pregmap.gss; auto;
    rewrite !Pregmap.gso; auto.
  - apply ptrofs_nextinstr_agree; subst; auto.
Qed.

Lemma store_pd_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (v ra: dreg) chk a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HSTORE: exec_store_pd_aux chk a v ra rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.exec_store_pd chk a v ra rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_store_pd_aux, Asm.exec_store_pd in *.
  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
  rewrite <- !EQR; try discriminate.
  destruct (Mem.storev _ _ _ _); try congruence.
  inversion HSTORE; auto. repeat (econstructor; eauto).
  - apply nextinstr_agree_but_pc. subst.
    intros. unfold undef_flags.
    destruct r; auto;
    try destruct (PregEq.eq d ra); try rewrite e;
    try rewrite !Pregmap.gss; auto;
    rewrite !Pregmap.gso; auto.
  - apply ptrofs_nextinstr_agree; subst; auto.
Qed.

Lemma store_pi_double_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (v1 v2 ra: ireg)
                                    chk1 chk2 addr_new: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HTHUMB: thumb tt = true)
  (HSTORE: exec_store_pi_double_aux chk1 chk2 addr_new v1 v2 ra rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem),
    Asm.exec_store_pi_double chk1 chk2 addr_new v1 v2 ra rs2 m2 = Next rs2' m2'
    /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_store_pi_double_aux, Asm.exec_store_pi_double in *.
  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
  rewrite HTHUMB; simpl.
  rewrite <- !EQR; try discriminate.
  destruct (Mem.storev _ _ _ _); try congruence.
  destruct (Mem.storev _ _ _ _); try congruence.
  inversion HSTORE. repeat (econstructor; eauto).
  2: { apply ptrofs_nextinstr_agree; subst; auto. }
  apply nextinstr_agree_but_pc; intros r0 HrPC; unfold undef_flags.
  destruct r0 as [d|c|].
  - (* DR d *)
    destruct (PregEq.eq (DR d) (DR (IR ra))) as [Heqa|Hnea].
    + rewrite Heqa. rewrite !Pregmap.gss. reflexivity.
    + rewrite (Pregmap.gso _ _ Hnea). symmetry.
      rewrite (Pregmap.gso _ _ Hnea). symmetry.
      apply EQR. discriminate.
  - (* CR c *) reflexivity.
  - (* PC *) congruence.
Qed.

Lemma store_double_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (v1 v2: ireg) chk1 chk2 a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HTHUMB: thumb tt = true \/ ls_double_next_reg v1 = Some v2)
  (HSTORE: exec_store_double_aux chk1 chk2 a (Val.add a (Vint (Int.repr 4))) v1 v2 rs1 m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.exec_store_double chk1 chk2 a v1 v2 rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_store_double_aux, exec_store_double in *.
  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
  destruct HTHUMB; unfold ls_double_valid_regs; rewrite H;
  try (destruct v2 eqn:R2; rewrite orb_true_r); try rewrite orb_true_l; simpl;
  rewrite <- !EQR; try discriminate;
  repeat destruct (Mem.storev _ _ _ _); try congruence;
  inversion HSTORE; auto; repeat (econstructor; eauto);
  try (apply nextinstr_agree_but_pc; subst; auto);
  apply ptrofs_nextinstr_agree; subst; auto.
Qed.

Lemma conseq_addr32_other i:
  forall
  (Halign: (4 | Ptrofs.unsigned i)),
  Ptrofs.unsigned i + 4 <=
  Ptrofs.unsigned (Ptrofs.add i (Ptrofs.of_int (Int.repr 4))) \/
  Ptrofs.unsigned (Ptrofs.add i (Ptrofs.of_int (Int.repr 4))) + 4 <=
  Ptrofs.unsigned i.
Proof.
  Local Transparent Archi.ptr64.
  destruct (Ptrofs.eq_dec i (Ptrofs.repr 4294967292)).
  + subst. right. vm_compute. discriminate.
  + left.
    assert (Ptrofs.unsigned i + 4 =
            Ptrofs.unsigned (Ptrofs.add i (Ptrofs.of_int (Int.repr 4)))).
    { rewrite Ptrofs.add_unsigned.
      replace (Ptrofs.unsigned (Ptrofs.of_int (Int.repr 4)))
         with (Int.unsigned (Int.repr 4))
           by (symmetry; apply Ptrofs.agree32_of_int; auto).
      change (Int.unsigned (Int.repr 4)) with 4.
      symmetry. apply Ptrofs.unsigned_repr.
      exploit (Zdivide_interval (Ptrofs.unsigned i) Ptrofs.modulus 4);
      unfold Ptrofs.modulus, Ptrofs.wordsize, Ptrofs.max_unsigned;
      unfold Wordsize_Ptrofs.wordsize, two_power_nat.
      lia. apply Ptrofs.unsigned_range. auto.
      exists (two_p (Ptrofs.zwordsize - 2)). reflexivity.
      assert (Ptrofs.unsigned i <> 4294967292).
      { intro x. apply n. symmetry. rewrite <- Ptrofs.repr_unsigned. congruence. }
      simpl. lia. }
    lia.
  Local Opaque Archi.ptr64.
Qed.

Lemma conseq_addr32_other_al1 i:
  Ptrofs.unsigned i + 4 <= Ptrofs.max_unsigned ->
  Ptrofs.unsigned i + 4 <=
  Ptrofs.unsigned (Ptrofs.add i (Ptrofs.of_int (Int.repr 4))) \/
  Ptrofs.unsigned (Ptrofs.add i (Ptrofs.of_int (Int.repr 4))) + 4 <=
  Ptrofs.unsigned i.
Proof.
  Local Transparent Archi.ptr64.
  intro Hbound. left.
  assert (Ptrofs.unsigned i + 4 =
          Ptrofs.unsigned (Ptrofs.add i (Ptrofs.of_int (Int.repr 4)))).
  { rewrite Ptrofs.add_unsigned.
    replace (Ptrofs.unsigned (Ptrofs.of_int (Int.repr 4)))
       with (Int.unsigned (Int.repr 4))
         by (symmetry; apply Ptrofs.agree32_of_int; auto).
    change (Int.unsigned (Int.repr 4)) with 4.
    symmetry. apply Ptrofs.unsigned_repr. generalize (Ptrofs.unsigned_range i). lia. }
  lia.
  Local Opaque Archi.ptr64.
Qed.

Lemma conseq_storev_storev_other chk1 chk2 a v1 v2 m m1 m2:
  forall
  (DCHK: ls_double_valid_chunk chk1 && ls_double_valid_chunk chk2 = true),
  Mem.storev chk1 m a v1 = Some m1 ->
  Mem.storev chk2 m (Val.add a (Vint (Int.repr 4))) v2 = Some m2 ->
  Mem.storev chk1 m2 a v1 = Mem.storev chk2 m1 (Val.add a (Vint (Int.repr 4))) v2.
Proof.
  Local Transparent Archi.ptr64.
  intros. unfold ls_double_valid_chunk in *.
  destruct a; simpl in *; try congruence.
  exploit Mem.store_valid_access_3. eexact H. intros [Hperm [Halign1 Halign2]].
  destruct chk1, chk2; simpl in *; try congruence; clear DCHK;
  erewrite Mem.store_store_other; eauto; simpl;
  try (right; apply conseq_addr32_other; auto; fail).
  (* Remaining Mint32al1 cases: ofs = 0xFFFFFFFC handled separately *)
  all: right;
    destruct (Ptrofs.eq_dec i (Ptrofs.repr 4294967292));
    [ subst; right; vm_compute; discriminate
    | apply conseq_addr32_other_al1;
      assert (Ptrofs.unsigned i <> 4294967292)
        by (intro x; apply n; symmetry; rewrite <- Ptrofs.repr_unsigned; congruence);
      (* From Hperm (store validity): ofs+3 has Writable permission,
         so ofs <= 4294967292. Combined with <>, ofs <= 4294967291. *)

      assert (HPBOUND: Ptrofs.unsigned i <= 4294967292);
      [ simpl in Hperm;
        pose proof (Hperm (Ptrofs.unsigned i + 3)) as HP3;
        simpl in HP3;
        pose proof (Ptrofs.unsigned_range i);
        lia
      | ];
      unfold Ptrofs.max_unsigned, Ptrofs.modulus, Ptrofs.wordsize,
             Wordsize_Ptrofs.wordsize, two_power_nat;
      lia ].
  Local Opaque Archi.ptr64.
Qed.

Lemma conseq_storev_storev_other' chk1 chk2 a v1 v2 m m1 m':
  forall
  (DCHK: ls_double_valid_chunk chk1 && ls_double_valid_chunk chk2 = true),
  Mem.storev chk2 m (Val.add a (Vint (Int.repr 4))) v2 = Some m1 /\
  Mem.storev chk1 m1 a v1 = Some m' ->
  exists m2,
  Mem.storev chk1 m a v1 = Some m2 /\
  Mem.storev chk2 m2 (Val.add a (Vint (Int.repr 4))) v2 = Some m'.
Proof.
  Local Transparent Archi.ptr64.
  intros. destruct H as [S1 S2].
  destruct (Mem.storev chk1 m a v1) eqn:S3.
  - exploit conseq_storev_storev_other; eauto.
    intros. rewrite H in S2.
    exists m0. split; auto.
  - destruct a; simpl in *; try congruence.
    remember S1 as A1. clear HeqA1.
    apply Mem.store_valid_access_3 in A1.
    remember S2 as A2. clear HeqA2.
    apply Mem.store_valid_access_3 in A2.
    eapply Mem.store_valid_access_2 in A2; try apply S1.
    destruct (Mem.valid_access_store m chk1 b (Ptrofs.unsigned i) v1); congruence.
Qed.

Lemma store_double_reverse_aux_preserved n rs1 m1 rs1' m1' rs2 m2 (v1 v2: ireg) chk1 chk2 a: forall
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (HTHUMB: thumb tt = true \/ ls_double_next_reg v1 = Some v2)
  (HSTORE: exec_store_double_aux chk2 chk1 (Val.add a (Vint (Int.repr 4))) a v2 v1 rs1 m1 = Next rs1' m1')
  (DCHK: ls_double_valid_chunk chk1 && ls_double_valid_chunk chk2 = true),
  exists (rs2' : regset) (m2' : mem), Asm.exec_store_double chk1 chk2 a v1 v2 rs2 m2 = Next rs2' m2'
  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold exec_store_double_aux, exec_store_double in *.
  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
  unfold ls_double_valid_regs.
  destruct HTHUMB as [HCOND|HCOND]; rewrite HCOND;
  try (destruct (PregEq.eq v2 v2); try congruence;
       simpl; rewrite orb_true_r); simpl;
  destruct (Mem.storev chk2 _ _ _) eqn:S2; try congruence;
  destruct (Mem.storev chk1 _ _ _) eqn:S1; try congruence;
  inv HSTORE;
  exploit conseq_storev_storev_other'; try split; eauto;
  intros (m3 & S1' & S2');
  do 2 eexists; find_rwrt_ag; erewrite S1', S2';
  repeat (econstructor; eauto);
  try (apply nextinstr_agree_but_pc; auto);
  apply ptrofs_nextinstr_agree; auto.
Qed.

Lemma exec_basic_simulation:
  forall tf n rs1 m1 rs1' m1' rs2 m2 bi tbi
  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
  (BASIC: exec_basic ge bi rs1 m1 = Next rs1' m1')
  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
  (TRANSBI: basic_to_instruction bi = OK tbi),
  exists rs2' m2', Asm.exec_instr tge tf tbi
                                  rs2 m2 = Next rs2' m2'
                   /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  destruct bi.
  (* PArith *)
  - repeat (destruct i; simpl in *);
    try (destruct sumbool_rec; try congruence);
    monadInv TRANSBI; repeat hdmatchinv;
    inversion MATCHI; inversion BASIC; subst;
    repeat eexists.
    all: tryif (lazymatch goal with
                | |- context [compare_int_sbc] => idtac
                end)
         then (
           try (eapply ptrofs_nextinstr_agree; subst; eauto);
           (apply nextinstr_agree_but_pc; intros r NPC;
                unfold compare_int_sbc;
                destruct so; cbn -[Pregmap.set])
         ) else (
           repeat unfold arith_uf_fn, undef_flags, nextinstr_nf,
                         eval_shift_op, eval_testcond,
                         compare_float, compare_float32, compare_int, compare_int_test,
                         compare_int_sbc, compare_int_add;
           repeat find_rwrt_ag;
           repeat desmatch;
           try (eapply ptrofs_nextinstr_agree; subst; eauto);
           try (apply nextinstr_agree_but_pc; intros;
           repeat assign_once)
         ).
    (* For each Psbcs sub-case (6 from destruct so), rewrite rs1 X to rs2 X via AG, then peel registers *)
    all: rewrite ?(AG r1) by congruence;
         rewrite ?(AG CC) by congruence;
         try (rewrite (AG _) by congruence).
    all: destruct (PregEq.eq r rd) as [Hrd|Hrd];
         [ subst; rewrite ! Pregmap.gss; reflexivity | ];
         rewrite Pregmap.gso by congruence;
         symmetry; rewrite Pregmap.gso by congruence; symmetry;
         destruct (PregEq.eq r CV) as [Hcv|Hcv];
         [ subst; rewrite ! Pregmap.gss; reflexivity | ];
         rewrite Pregmap.gso by congruence;
         symmetry; rewrite Pregmap.gso by congruence; symmetry;
         destruct (PregEq.eq r CC) as [Hcc|Hcc];
         [ subst; rewrite ! Pregmap.gss; reflexivity | ];
         rewrite Pregmap.gso by congruence;
         symmetry; rewrite Pregmap.gso by congruence; symmetry;
         destruct (PregEq.eq r CZ) as [Hcz|Hcz];
         [ subst; rewrite ! Pregmap.gss; reflexivity | ];
         rewrite Pregmap.gso by congruence;
         symmetry; rewrite Pregmap.gso by congruence; symmetry;
         destruct (PregEq.eq r CN) as [Hcn|Hcn];
         [ subst; rewrite ! Pregmap.gss; reflexivity | ];
         rewrite Pregmap.gso by congruence;
         symmetry; rewrite Pregmap.gso by congruence; symmetry;
         apply AG; congruence.
  (* PLoad *)
  - Local Transparent Archi.ptr64.
    assert (forall a, Ptrofs.agree32 (Ptrofs.of_int a) a) as Hptr.
    { apply Ptrofs.agree32_of_int; reflexivity. }
    assert (forall a i,
            (Val.add (Val.add a (Vint i)) (Vint (Int.repr 4)) =
             Val.add a (Vint (Int.add i (Int.repr 4))))) as Hadd.
    { intros. unfold Val.add. destruct a; try congruence; simpl.
      - rewrite Int.add_assoc. reflexivity.
      - rewrite Ptrofs.add_assoc. repeat f_equal.
        rewrite <- (Ptrofs.agree32_of_int_eq _ (Int.add i (Int.repr 4))). reflexivity.
        apply Ptrofs.agree32_add; auto. }
    repeat destruct ld.
    1-22:
      monadInv TRANSBI;
      inversion BASIC; inversion MATCHI;
      subst; simpl;
      try destruct o;
      try eapply load_aux_preserved in H0;
      try eapply load_pi_aux_preserved in H0;
      try eapply load_pd_aux_preserved in H0;
      eauto; find_rwrt_ag.
    + simpl in TRANSBI. unfold gen_load_double in TRANSBI.
      desifH TRANSBI; try congruence.
      desifH TRANSBI.
      (* thumb mode *)
      * desifH TRANSBI; try congruence.
        apply andb_prop in Heqb1. destruct Heqb1 as [_ Heqb1].
        apply Int.same_if_eq in Heqb1.
        monadInv TRANSBI.
        inversion BASIC. inversion MATCHI.
        subst. simpl.
        rewrite <- Hadd in H0.
        eapply load_double_aux_preserved in H0; eauto.
        find_rwrt_ag.
      (* arm mode *)
      * unfold ls_double_constraints, ls_pair_reg in TRANSBI.
        destruct rd1, rd2;
        repeat (desifH TRANSBI; try apply Int.same_if_eq in Heqb1; subst;
                try clear Heqb1; try congruence);
        simpl in TRANSBI; try congruence;
        monadInv TRANSBI;
        inversion BASIC; inversion MATCHI;
        subst; simpl;
        rewrite <- Hadd in H0;
        try eapply load_double_aux_preserved in H0;
        try eapply load_double_reverse_aux_preserved in H0;
        try congruence; eauto;
        find_rwrt_ag.
    + (* Pclddi_pi *)
      simpl in TRANSBI. unfold gen_load_pi_double in TRANSBI.
      desifH TRANSBI; try congruence.
      desifH TRANSBI; try congruence.
      monadInv TRANSBI.
      inversion BASIC; inversion MATCHI; subst; simpl.
      eapply load_pi_double_aux_preserved in H0; eauto.
      find_rwrt_ag.
    + (* Pcldm *)
      simpl in TRANSBI, BASIC.
      inversion TRANSBI; subst; clear TRANSBI.
      destruct (Asm.ldm_iregs_wf ra l) eqn:WF; try discriminate BASIC.
      destruct (exec_load_multi_i rs1#ra 0 l rs1 m1) as [rsX|] eqn:E; try discriminate.
      inversion BASIC; subst; clear BASIC.
      inversion MATCHI; subst.
      destruct (exec_load_multi_i_match l 0 (rs1 ra) rs1 rs2 m2 rs1' AG E)
        as (rs2' & E2 & A & PC1 & PC2).
      exists (Asm.nextinstr rs2'). exists m2.
      split.
      * simpl. rewrite WF.
        replace (rs2 ra) with (rs1 ra) by (apply AG; discriminate).
        rewrite E2. reflexivity.
      * constructor.
        -- reflexivity.
        -- apply nextinstr_agree_but_pc; auto.
        -- rewrite PC1. apply ptrofs_nextinstr_agree; auto. rewrite <- PC2 in AGPC. exact AGPC.
    + (* Pcvldm *)
      simpl in TRANSBI, BASIC.
      inversion TRANSBI; subst; clear TRANSBI.
      destruct (Asm.vldm_fregs_wf l) eqn:WF; try discriminate BASIC.
      destruct (exec_load_multi_f rs1#ra 0 l rs1 m1) as [rsX|] eqn:E; try discriminate.
      inversion BASIC; subst; clear BASIC.
      inversion MATCHI; subst.
      destruct (exec_load_multi_f_match l 0 (rs1 ra) rs1 rs2 m2 rs1' AG E)
        as (rs2' & E2 & A & PC1 & PC2).
      exists (Asm.nextinstr rs2'). exists m2.
      split.
      * simpl. rewrite WF.
        replace (rs2 ra) with (rs1 ra) by (apply AG; discriminate).
        rewrite E2. reflexivity.
      * constructor.
        -- reflexivity.
        -- apply nextinstr_agree_but_pc; auto.
        -- rewrite PC1. apply ptrofs_nextinstr_agree; auto. rewrite <- PC2 in AGPC. exact AGPC.
 (* PStore *)
  - assert (forall a, Ptrofs.agree32 (Ptrofs.of_int a) a) as Hptr.
    { apply Ptrofs.agree32_of_int; reflexivity. }
    assert (forall a i,
            (Val.add (Val.add a (Vint i)) (Vint (Int.repr 4)) =
             Val.add a (Vint (Int.add i (Int.repr 4))))) as Hadd.
    { intros. unfold Val.add. destruct a; try congruence; simpl.
      - rewrite Int.add_assoc. reflexivity.
      - rewrite Ptrofs.add_assoc. repeat f_equal.
        rewrite <- (Ptrofs.agree32_of_int_eq _ (Int.add i (Int.repr 4))). reflexivity.
        apply Ptrofs.agree32_add; auto. }
    repeat destruct st.
    1-16:
      monadInv TRANSBI;
      inversion BASIC; inversion MATCHI;
      subst; simpl;
      try destruct o;
      try eapply store_aux_preserved in H0;
      try eapply store_pi_aux_preserved in H0;
      try eapply store_pd_aux_preserved in H0;
      eauto; find_rwrt_ag.
    + simpl in TRANSBI. unfold gen_store_double in TRANSBI.
      desifH TRANSBI; try congruence.
      desifH TRANSBI.
      (* thumb mode *)
      * repeat desifH TRANSBI; try congruence.
        apply Int.same_if_eq in Heqb1.
        monadInv TRANSBI.
        inversion BASIC. inversion MATCHI. subst. simpl.
        rewrite <- Hadd in H0.
        eapply store_double_aux_preserved in H0; eauto.
        find_rwrt_ag.
        rewrite andb_comm in Heqb.
        apply Int.same_if_eq in Heqb2.
        monadInv TRANSBI.
        inversion BASIC. inversion MATCHI. subst. simpl.
        rewrite <- Hadd in H0.
        eapply store_double_reverse_aux_preserved in H0; eauto.
        find_rwrt_ag.
      (* arm mode *)
      * unfold ls_double_constraints, ls_pair_reg in TRANSBI.
        destruct rs0, rs3; try congruence;
        repeat (desifH TRANSBI; try apply Int.same_if_eq in Heqb1; subst;
                try clear Heqb1; try congruence);
        simpl in TRANSBI; try congruence;
        monadInv TRANSBI;
        inversion BASIC; inversion MATCHI;
        subst; simpl;
        rewrite <- Hadd in H0;
        try eapply store_double_aux_preserved in H0;
        try eapply store_double_reverse_aux_preserved in H0;
        try congruence; eauto;
        find_rwrt_ag;
        destruct chk1, chk2; simpl; simpl in *;
        try congruence; reflexivity.
    + (* Pcstdi_pi *)
      simpl in TRANSBI. unfold gen_store_pi_double in TRANSBI.
      desifH TRANSBI; try congruence.
      desifH TRANSBI; try congruence.
      monadInv TRANSBI.
      inversion BASIC; inversion MATCHI; subst; simpl.
      eapply store_pi_double_aux_preserved in H0; eauto.
      find_rwrt_ag.
    + (* Pcstm *)
      simpl in TRANSBI, BASIC.
      inversion TRANSBI; subst tbi; clear TRANSBI.
      destruct (Asm.stm_iregs_wf ra l) eqn:WF; try discriminate BASIC.
      destruct (exec_store_multi_i rs1#ra 0 l rs1 m1) as [m1''|] eqn:E; try discriminate.
      inversion BASIC; subst rs1' m1'; clear BASIC.
      inversion MATCHI; subst.
      pose proof (exec_store_multi_i_match l 0 (rs1 ra) rs1 rs2 m2 m1'' AG E) as E2.
      exists (Asm.nextinstr rs2). exists m1''.
      split.
      * simpl. rewrite WF.
        replace (rs2 ra) with (rs1 ra) by (apply AG; discriminate).
        rewrite E2. reflexivity.
      * constructor.
        -- reflexivity.
        -- apply nextinstr_agree_but_pc; auto.
        -- apply ptrofs_nextinstr_agree; auto.
    + (* Pcvstm *)
      simpl in TRANSBI, BASIC.
      inversion TRANSBI; subst tbi; clear TRANSBI.
      destruct (Asm.vstm_fregs_wf l) eqn:WF; try discriminate BASIC.
      destruct (exec_store_multi_f rs1#ra 0 l rs1 m1) as [m1''|] eqn:E; try discriminate.
      inversion BASIC; subst rs1' m1'; clear BASIC.
      inversion MATCHI; subst.
      pose proof (exec_store_multi_f_match l 0 (rs1 ra) rs1 rs2 m2 m1'' AG E) as E2.
      exists (Asm.nextinstr rs2). exists m1''.
      split.
      * simpl. rewrite WF.
        replace (rs2 ra) with (rs1 ra) by (apply AG; discriminate).
        rewrite E2. reflexivity.
      * constructor.
        -- reflexivity.
        -- apply nextinstr_agree_but_pc; auto.
        -- apply ptrofs_nextinstr_agree; auto.
  (* PMemcpy *)
  - Local Opaque PregEq.eq.
    destruct cp; simpl in BASIC;
    destruct mas, mad; monadInv TRANSBI; simpl;
    unfold exec_memcpy_aux in BASIC; simpl in BASIC;
    inversion MATCHI; subst;
    unfold Asm.exec_memcpy;
    unfold mcpy_rs_use_src, mcpy_rs_use_addr, mcpy_rs in *;
    try (destruct (PregEq.eq rad tr); simpl in *; try congruence);
    try (destruct (PregEq.eq rad tr1), (PregEq.eq rad tr2); simpl in *; try congruence);
    try (destruct (PregEq.eq ras rad); simpl in *; try congruence);
    try (desif; try congruence);
    find_rwrt_ag;
    try rewrite Pregmap.gss in *; desmatch; try congruence;
    try rewrite Pregmap.gss in *;
    try rewrite !Pregmap.gso in *; try congruence;
    find_rwrt_ag;
    repeat (desmatch; try congruence);
    do 2 eexists; split; auto;
    inversion BASIC;
    econstructor; auto; simpl;
    try (apply ptrofs_nextinstr_agree; eauto);
    apply nextinstr_agree_but_pc;
    intros; unfold undef_flags; repeat assign_once.
    Local Transparent PregEq.eq.
  (* Pallocframe *)
  - monadInv TRANSBI. simpl. simpl in BASIC. inversion MATCHI; subst.
    destruct sz eqn:EQSZ;
    destruct Mem.alloc eqn:EQALLOC;
    rewrite <- AG; try congruence;
    destruct Mem.store eqn:EQSTOR;
    inversion BASIC;
    eexists; eexists; split; auto;
    econstructor; auto;
    try (eapply ptrofs_nextinstr_agree; eauto);
    apply nextinstr_agree_but_pc; intros;
    repeat assign_once.
  (* Pfreeframe *)
  - monadInv TRANSBI. simpl. simpl in BASIC. inversion MATCHI; subst.
    destruct sz eqn:EQSZ;
    rewrite <- AG; try congruence;
    destruct Mem.loadv eqn:EQLOAD;
    destruct (rs1 SP) eqn:EQRS1SP;
    try (destruct Mem.free eqn:EQFREE);
    inversion BASIC;
    eexists; eexists; split; auto;
    econstructor; auto;
    try (eapply ptrofs_nextinstr_agree; eauto);
    apply nextinstr_agree_but_pc; intros;
    assign_once.
  (* Ploadsymbol *)
  - monadInv TRANSBI;
    inversion BASIC. inversion MATCHI.
    subst. simpl. repeat eexists.
    apply nextinstr_agree_but_pc. intros.
    rewrite symbol_addresses_preserved.
    assign_once.
    eapply ptrofs_nextinstr_agree; eauto.
  (* Pnop *)
  - monadInv TRANSBI;
    inversion BASIC. inversion MATCHI.
    subst. simpl. repeat eexists.
    apply nextinstr_agree_but_pc. apply AG.
    eapply ptrofs_nextinstr_agree; eauto.
  (* Pci_rel_offset *)
  - monadInv TRANSBI;
    inversion BASIC. inversion MATCHI.
    subst. simpl. repeat eexists.
    apply nextinstr_agree_but_pc. apply AG.
    eapply ptrofs_nextinstr_agree; eauto.
  (* Psdiv *)
  - monadInv TRANSBI. simpl. simpl in BASIC. inversion MATCHI; subst.
    subst. simpl.
    find_rwrt_ag.
    destruct (Val.divs (rs1 r1) (rs1 r2));
    destruct (Archi.hardware_idiv tt); auto;
    try congruence;
    inversion BASIC; repeat eexists;
    try (eapply ptrofs_nextinstr_agree; eauto);
    apply nextinstr_agree_but_pc; intros;
    repeat assign_once.
  (* Pudiv *)
  - monadInv TRANSBI. simpl. simpl in BASIC. inversion MATCHI; subst.
    simpl. find_rwrt_ag.
    destruct (Val.divu (rs1 r1) (rs1 r2));
    destruct (Archi.hardware_idiv tt); auto;
    try congruence;
    inversion BASIC; repeat eexists;
    try (eapply ptrofs_nextinstr_agree; eauto);
    apply nextinstr_agree_but_pc; intros;
    repeat assign_once.
  (* Pmemcpyf64 *)
  - revert BASIC. monadInv TRANSBI. simpl. inv MATCHI.
    unfold exec_memcpy_aux, Asm.exec_memcpy,
           mcpy_rs_use_src, mcpy_rs_use_addr, mcpy_rs.
    find_rwrt_ag; rewrite !Pregmap.gss.
    repeat (desmatch; try congruence). intros.
    do 2 eexists. split. auto.
    inversion BASIC.
    econstructor; auto; simpl;
    try (apply ptrofs_nextinstr_agree; eauto).
    apply nextinstr_agree_but_pc.
    intros. unfold undef_flags. repeat assign_once.
Qed.

Lemma find_basic_instructions b ofs f bb tc n: forall
  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
  (UNFOLD: unfold (fn_blocks f) = OK tc)
  (BOUND: 0 <= n < list_length_z (body bb)),
  exists (i : Asm.instruction) (bi : basic),
     list_nth_z (body bb) n = Some bi
  /\ basic_to_instruction bi = OK i
  /\ Asm.find_instr (Ptrofs.unsigned ofs
                     + (list_length_z (header bb))
                     + n) tc
                     = Some i.
Proof.
  intros; assert ((Z.to_nat n) < length (body bb))%nat.
  { rewrite Nat2Z.inj_lt, <- list_length_z_nat, Z2Nat.id; try lia. }
  exploit internal_functions_unfold; eauto.
  intros (tc' & FINDtf & TRANStf & _).
  assert (tc' = tc) by congruence; subst.
  exploit (find_instr_bblock (list_length_z (header bb) + n)). eauto. eauto.
  { unfold size. split.
    - rewrite list_length_z_nat; lia.
    - repeat (rewrite list_length_z_nat). repeat (rewrite Nat2Z.inj_add). lia. }
  intros (i & NTH & FIND_INSTR).
  exists i; intros.
  inv NTH.
  - (* absurd *) apply list_nth_z_range in H0; lia.
  - exists bi.
    rewrite Z.add_simpl_l in H0.
    rewrite Z.add_assoc in FIND_INSTR.
    intuition.
  - (* absurd *) rewrite bblock_size_aux in H1;
    rewrite H0 in H1; simpl in H1; repeat rewrite list_length_z_nat in H1; lia.
Qed.

Lemma header_body_tail_bound: forall (a: basic) (li: list basic) bb ofs
  (BOUNDBB : Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned)
  (BDYLENPOS : 0 <= list_length_z (body bb) - list_length_z (a :: li) <
              list_length_z (body bb)),
0 <= list_length_z (header bb) + list_length_z (body bb) - list_length_z (a :: li) <=
Ptrofs.max_unsigned.
Proof.
  intros.
  assert (HBBPOS: list_length_z (header bb) >= 0) by eapply list_length_z_pos.
  assert (HBBSIZE: list_length_z (header bb) < size bb) by eapply header_size_lt_block_size.
  assert (OFSBOUND: 0 <= Ptrofs.unsigned ofs <= Ptrofs.max_unsigned) by eapply Ptrofs.unsigned_range_2.
  assert (BBSIZE: size bb <= Ptrofs.max_unsigned) by lia.
  unfold size in BBSIZE.
  rewrite !Nat2Z.inj_add in BBSIZE.
  rewrite <- !list_length_z_nat in BBSIZE.
  lia.
Qed.

Lemma exec_body_simulation_plus_gen li: forall b ofs f bb rs m s2 rs' m'
  (BLI: is_tail li (body bb))
  (ATPC: rs PC = Vptr b ofs)
  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
  (NEMPTY_BODY: li <> nil)
  (MATCHI: match_internal ((list_length_z (header bb)) + (list_length_z (body bb)) - (list_length_z li)) (State rs m) s2)
  (BODY: exec_body ge li rs m = Next rs' m'),
  exists s2', plus Asm.step tge s2 E0 s2'
             /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'.
Proof.
  induction li as [|a li]; simpl; try congruence.
  intros.
  assert (BDYLENPOS: 0 <= (list_length_z (body bb) - list_length_z (a::li)) < list_length_z (body bb)). {
    assert (Z.of_nat O < list_length_z (a::li) <= list_length_z (body bb)); try lia.
    rewrite !list_length_z_nat; split.
    - rewrite <- Nat2Z.inj_lt. simpl. lia.
    - rewrite <- Nat2Z.inj_le; eapply is_tail_bound; eauto.
  }
  exploit internal_functions_unfold; eauto.
  intros (tc & FINDtf & TRANStf & _).
  exploit find_basic_instructions; eauto.
  intros (tbi & (bi & (NTHBI & TRANSBI & FIND_INSTR))).
  exploit is_tail_list_nth_z; eauto.
  rewrite NTHBI; simpl.
  intros X; inversion X; subst; clear X NTHBI.
  destruct (exec_basic _ _ _ _) eqn:EXEC_BASIC; try congruence.
  (* destruct s as (rs1 & m1); simpl in *. *)
  destruct s2 as (rs2 & m2); simpl in *.
  assert (BOUNDBBMAX: Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned)
  by (eapply size_of_blocks_bounds; eauto).
  exploit header_body_tail_bound; eauto. intros BDYTAIL.
  exploit exec_basic_simulation; eauto.
  intros (rs_next' & m_next' & EXEC_INSTR & MI_NEXT).
  exploit exec_basic_dont_move_PC; eauto. intros AGPC.
  inversion MI_NEXT as [A B C D E M_NEXT_AGREE RS_NEXT_AGREE ATPC_NEXT PC_OFS_NEXT RS RS'].
  subst A. subst B. subst C. subst D. subst E.
  rewrite ATPC in AGPC. symmetry in AGPC, ATPC_NEXT.
  inv MATCHI. symmetry in AGPC0.
  rewrite ATPC in AGPC0.
  unfold Val.offset_ptr in AGPC0.
  simpl in FIND_INSTR.
  (* Execute internal step. *)
  exploit (Asm.exec_step_internal tge b); eauto.
  {
    rewrite Ptrofs.add_unsigned.
    repeat (rewrite Ptrofs.unsigned_repr); try lia.
    2: {
      assert (BOUNDOFS: 0 <= Ptrofs.unsigned ofs <= Ptrofs.max_unsigned)
      by eapply Ptrofs.unsigned_range_2.
      assert (list_length_z (body bb) <= size bb) by eapply body_size_le_block_size.
      assert (list_length_z (header bb) <= 1). { eapply size_header; eauto. }
      lia. }
    try rewrite list_length_z_nat; try split;
    simpl; rewrite <- !list_length_z_nat;
    replace (Ptrofs.unsigned ofs + (list_length_z (header bb) + list_length_z (body bb) -
      list_length_z (a :: li))) with (Ptrofs.unsigned ofs + list_length_z (header bb) +
      (list_length_z (body bb) - list_length_z (a :: li))) by lia;
    try assumption; try lia. }
  (* This is our STEP hypothesis. *)
  intros STEP_NEXT.
  destruct li as [|a' li]; simpl in *.
  - (* case of a single instruction in li: this our base case in the induction *)
    inversion BODY; subst.
    eexists; split.
    + apply plus_one. eauto.
    + constructor; auto.
      rewrite ATPC_NEXT.
      apply f_equal.
      apply f_equal.
      rewrite bblock_size_aux, list_length_z_cons; simpl.
      lia.
  - exploit (IHli b ofs f bb r m_next' (State rs_next' m_next')); congruence || eauto.
    + exploit is_tail_app_def; eauto.
      intros (l3 & EQ); rewrite EQ.
      exploit (is_tail_app_right (l3 ++ a::nil)).
      rewrite <- app_assoc; simpl; eauto.
    + constructor; auto.
      rewrite ATPC_NEXT.
      apply f_equal.
      apply f_equal.
      rewrite! list_length_z_cons; simpl.
      lia.
    + intros (s2' & LAST_STEPS & LAST_MATCHS).
      eexists. split; eauto.
      eapply plus_left'; eauto.
Qed.

Lemma exec_body_simulation_plus b ofs f bb rs m s2 rs' m': forall
  (ATPC: rs PC = Vptr b ofs)
  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
  (NEMPTY_BODY: body bb <> nil)
  (MATCHI: match_internal (list_length_z (header bb)) (State rs m) s2)
  (BODY: exec_body ge (body bb) rs m = Next rs' m'),
  exists s2', plus Asm.step tge s2 E0 s2'
             /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'.
Proof.
  intros.
  exploit exec_body_simulation_plus_gen; eauto.
  - constructor.
  - replace (list_length_z (header bb) + list_length_z (body bb) - list_length_z (body bb)) with (list_length_z (header bb)); auto.
    lia.
Qed.

Lemma exec_body_simulation_star b ofs f bb rs m s2 rs' m': forall
  (ATPC: rs PC = Vptr b ofs)
  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
  (MATCHI: match_internal (list_length_z (header bb)) (State rs m) s2)
  (BODY: exec_body ge (body bb) rs m = Next rs' m'),
  exists s2', star Asm.step tge s2 E0 s2'
             /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'.
Proof.
  intros.
  destruct (body bb) eqn: Hbb.
  - simpl in BODY. inv BODY.
    eexists. split.
    eapply star_refl; eauto.
    assert (EQ: (size bb - Z.of_nat (length_opt (exit bb))) = list_length_z (header bb)).
    { rewrite bblock_size_aux. rewrite Hbb; unfold list_length_z; simpl. lia. }
    rewrite EQ; eauto.
  - exploit exec_body_simulation_plus; congruence || eauto.
    { rewrite Hbb; eauto. }
    intros (s2' & PLUS & MATCHI').
    eexists; split; eauto.
    eapply plus_star; eauto.
Qed.

Lemma list_nth_z_range_exceeded A (l : list A) n:
  n >= list_length_z l ->
  list_nth_z l n = None.
Proof.
  intros N.
  remember (list_nth_z l n) as opt eqn:H. symmetry in H.
  destruct opt; auto.
  exploit list_nth_z_range; eauto. lia.
Qed.

Lemma label_in_header_list lbl a:
  is_label lbl a = true -> list_length_z (header a) <= 1 -> header a = lbl :: nil.
Proof.
  intros.
  eapply is_label_correct_true in H.
  destruct (header a).
  - eapply in_nil in H. contradiction.
  - rewrite list_length_z_cons in H0.
    assert (list_length_z l0 >= 0) by eapply list_length_z_pos.
    assert (list_length_z l0 = 0) by lia.
    rewrite list_length_z_nat in H2.
    assert (Datatypes.length l0 = 0%nat) by lia.
    eapply length_zero_iff_nil in H3. subst.
    unfold In in H. destruct H.
    + subst; eauto.
    + destruct H.
Qed.

Lemma no_label_in_basic_inst: forall a lbl x,
  basic_to_instruction a = OK x -> Asm.is_label lbl x = false.
Proof.
  intros.
  destruct a;
  simpl in *;
  repeat destruct i;
  try (destruct sumbool_rec; try congruence);
  repeat destruct ld; repeat destruct st;
  simpl in *;
  unfold gen_load_double, gen_store_double, gen_load_pi_double,
         gen_store_pi_double, ls_double_constraints in H;
  try (destruct rs1, rs2; repeat desifH H; simpl in H; try discriminate);
  try (destruct rd1, rd2; repeat desifH H; simpl in H; try discriminate);
  try (destruct ns, nd; try discriminate);
  try monadInv H;
  try hdmatchinv;
  simpl; reflexivity.
Qed.

Lemma label_pos_body bdy: forall c1 c2 z ex lbl
  (HUNF: unfold_body bdy = OK c2),
  Asm.label_pos lbl (z + Z.of_nat ((Datatypes.length bdy) + length_opt ex)) c1 = Asm.label_pos lbl (z) ((c2 ++ unfold_exit ex) ++ c1).
Proof.
  induction bdy.
  - intros. monadInv HUNF. simpl in *.
    destruct ex eqn:EQEX.
    + simpl in *. unfold Asm.is_label. destruct c; simpl; try congruence.
      destruct i; simpl; try congruence.
    + simpl in *. ring_simplify (z + 0). auto.
  - intros. simpl in *. monadInv HUNF. simpl in *.
    erewrite no_label_in_basic_inst; eauto. rewrite <- IHbdy; eauto.
    erewrite Zpos_P_of_succ_nat.
    apply f_equal2; auto. lia.
Qed.

Lemma asm_label_pos_header: forall z a x0 x1 lbl
  (HUNF: unfold_body (body a) = OK x1),
  Asm.label_pos lbl (z + size a) x0 =
  Asm.label_pos lbl (z + list_length_z (header a)) ((x1 ++ unfold_exit (exit a)) ++ x0).
Proof.
  intros.
  unfold size.
  rewrite <- Nat.add_assoc. rewrite Nat2Z.inj_add.
  rewrite list_length_z_nat.
  replace (z + (Z.of_nat (Datatypes.length (header a)) + Z.of_nat (Datatypes.length (body a) + length_opt (exit a)))) with (z + Z.of_nat (Datatypes.length (header a)) + Z.of_nat (Datatypes.length (body a) + length_opt (exit a))) by lia.
  eapply (label_pos_body (body a) x0 x1 (z + Z.of_nat (Datatypes.length (header a))) (exit a) lbl). auto.
Qed.

Lemma header_size_cons_nil: forall (l0: label) (l1: list label)
  (HSIZE: list_length_z (l0 :: l1) <= 1),
  l1 = nil.
Proof.
  intros.
  destruct l1; try congruence. rewrite !list_length_z_cons in HSIZE.
  assert (list_length_z l1 >= 0) by eapply list_length_z_pos.
  assert (list_length_z l1 + 1 + 1 >= 2) by lia.
  assert (2 <= 1) by lia. contradiction H1. lia.
Qed.

Lemma label_pos_preserved_gen bbs: forall lbl c z
  (HUNF: unfold bbs = OK c),
  label_pos lbl z bbs = Asm.label_pos lbl z c.
Proof.
  induction bbs.
  - intros. simpl in *. inversion HUNF. simpl. reflexivity.
  - intros. simpl in *. monadInv HUNF. unfold unfold_bblock in EQ.
    destruct (zle _ _); try congruence. monadInv EQ.
    destruct (is_label _ _) eqn:EQLBL.
    + erewrite label_in_header_list; eauto.
      simpl in *. destruct (peq lbl lbl); try congruence.
    + erewrite IHbbs. eauto.
      rewrite (asm_label_pos_header z a x0 x1 lbl); auto.
      unfold is_label in *.
      destruct (header a).
      * replace (z + list_length_z (@nil label)) with (z); eauto.
        unfold list_length_z. simpl. lia.
      * eapply header_size_cons_nil in l as HL1.
        subst. simpl in *. destruct (in_dec _ _); try congruence.
        simpl in *.
        destruct (peq _ _); try intuition congruence.
      * inversion EQ0. auto.
Qed.

Lemma label_pos_preserved f lbl z tf: forall
  (FINDF: transf_function f = OK tf),
  label_pos lbl z (fn_blocks f) = Asm.label_pos lbl z (Asm.fn_code tf).
Proof.
  intros.
  eapply label_pos_preserved_gen.
  unfold transf_function in FINDF. monadInv FINDF.
  destruct zlt; try congruence. inversion EQ0. eauto.
Qed.

Lemma goto_label_preserved bb rs1 m1 rs1' m1' rs2 m2 lbl f tf v: forall
  (FINDF: transf_function f = OK tf)
  (BOUNDED: size bb <= Ptrofs.max_unsigned)
  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2))
  (HGOTO: goto_label f lbl (incrPC v rs1) m1 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem), Asm.goto_label tf lbl rs2 m2 = Next rs2' m2'
  /\ match_states (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  unfold goto_label, Asm.goto_label in *.
  rewrite <- (label_pos_preserved f); auto.
  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
  destruct label_pos; try congruence.
  destruct (incrPC v rs1 PC) eqn:INCRPC; try congruence.
  inversion HGOTO; auto. repeat (econstructor; eauto).
  rewrite <- EQPC.
  unfold incrPC in *.
  rewrite !Pregmap.gss in *.
  destruct (rs1 PC) eqn:EQRS1; simpl in *; try congruence.
  replace (rs2 # PC <- (Vptr b0 (Ptrofs.repr z))) with
  ((rs1 # PC <- (Vptr b0 (Ptrofs.add i0 v))) # PC <- (Vptr b (Ptrofs.repr z))); auto.
  eapply functional_extensionality. intros.
  destruct (PregEq.eq x PC); subst.
  rewrite !Pregmap.gss. congruence.
  rewrite !Pregmap.gso; auto.
Qed.

Lemma next_inst_incr_pc_preserved bb rs1 m1 rs1' m1' rs2 m2 f tf: forall
  (FINDF: transf_function f = OK tf)
  (BOUNDED: size bb <= Ptrofs.max_unsigned)
  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2))
  (NEXT: Next (incrPC (Ptrofs.repr (size bb)) rs1) m2 = Next rs1' m1'),
  exists (rs2' : regset) (m2' : mem),
  Next (Asm.nextinstr rs2) m2 = Next rs2' m2'
  /\ match_states (State rs1' m1') (State rs2' m2').
Proof.
  intros; simpl in *; unfold incrPC in NEXT;
  inv MATCHI;
  assert (size bb >= 1) by eapply bblock_size_pos;
  assert (0 <= size bb - 1 <= Ptrofs.max_unsigned) by lia;
  inversion NEXT; subst;
  eexists; eexists; split; eauto.
  assert (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb))) = Asm.nextinstr rs2). {
    unfold Pregmap.set. apply functional_extensionality.
    intros x. destruct (PregEq.eq x PC).
    -- unfold Asm.nextinstr. rewrite <- AGPC.
       rewrite Val.offset_ptr_assoc. rewrite Ptrofs.add_unsigned.
       rewrite (Ptrofs.unsigned_repr (size bb - 1)); try lia.
       rewrite Ptrofs.unsigned_one.
       replace (size bb - 1 + 1) with (size bb) by lia.
       rewrite e. rewrite Pregmap.gss.
       reflexivity.
    -- eapply nextinstr_agree_but_pc; eauto. }
       rewrite H1. econstructor.
Qed.

Lemma pc_reg_overwrite: forall (r: ireg) rs1 m1 rs2 m2 bb
  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)),
  rs2 # PC <- (rs2 r) =
  (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb)))) # PC <-
  (rs1 r).
Proof.
  intros.
  unfold Pregmap.set; apply functional_extensionality.
  intros x; destruct (PregEq.eq x PC) as [X | X]; try discriminate;
  inv MATCHI; rewrite AG; congruence.
Qed.

Lemma eq_size_bb_ofs_one: forall bb rs1 rs2 v
  (SIZE: size bb <= Ptrofs.max_unsigned)
  (BBPOS : size bb >= 1)
  (AG: forall r : preg, r <> PC -> rs1 r = rs2 r)
  (AGPC: Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb - 1)) = rs2 PC),
  ((incrPC (Ptrofs.repr (size bb)) rs1) # IR14 <-
   (incrPC (Ptrofs.repr (size bb)) rs1 PC)) # PC <- v =
  (rs2 # IR14 <- (Val.offset_ptr (rs2 PC) (Ptrofs.one))) # PC <- v.
Proof.
  intros.
  unfold incrPC, Pregmap.set. simpl.
  apply functional_extensionality. intros x. repeat desif; auto.
  rewrite <- AGPC.
  rewrite Val.offset_ptr_assoc. unfold Ptrofs.one.
  rewrite Ptrofs.add_unsigned. rewrite Ptrofs.unsigned_repr;
  try lia. rewrite Ptrofs.unsigned_repr; try lia.
  rewrite Z.sub_add; reflexivity.
Qed.

Lemma exec_cfi_simulation:
  forall bb f tf rs1 m1 rs1' m1' rs2 m2 cfi
  (SIZE: size bb <= Ptrofs.max_unsigned)
  (FINDF: transf_function f = OK tf)
  (CFI: exec_cfi ge f cfi (incrPC (Ptrofs.repr (size bb)) rs1) m1 = Next rs1' m1')
  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)),
  exists rs2' m2', Asm.exec_instr tge tf (cf_instruction_to_instruction cfi)
                                  rs2 m2 = Next rs2' m2'
                   /\ match_states (State rs1' m1') (State rs2' m2').
Proof.
  intros.
  assert (BBPOS: size bb >= 1) by eapply bblock_size_pos.
  destruct cfi; inv CFI; simpl.
  (* Pb *)
  - exploit goto_label_preserved; eauto.
  (* Pbc *)
  - inv MATCHI.
    unfold eval_testcond in *. destruct c;
    erewrite !incrPC_agree_but_pc in H0; try rewrite <- !AG; try congruence;
    desmatch; try (simpl in H0; discriminate);
    desif;
    try (eapply goto_label_preserved; eauto; econstructor; eauto);
    try (eapply next_inst_incr_pc_preserved; eauto; econstructor; eauto).
  (* Pcbz *)
  - rename H0 into HCBZ.
    assert (MATCHI' := MATCHI). inv MATCHI.
    unfold eval_testzero, eval_branch in *.
    erewrite incrPC_agree_but_pc in HCBZ; try congruence.
    destruct (Val.mxcmpu_bool Ceq (rs1 r) (Vint Int.zero)) eqn:HTZ;
    try (simpl in HCBZ; discriminate).
    destruct b; simpl in *.
    + edestruct (goto_label_preserved bb) as (rs2' & m2' & HGL2 & MS); eauto.
      eexists; eexists; split.
      * simpl. rewrite <- AG; try congruence. rewrite HTZ. rewrite HGL2. reflexivity.
      * exact MS.
    + inv HCBZ.
      edestruct (next_inst_incr_pc_preserved bb) as (rs2' & m2' & HN & MS); eauto.
      inv HN.
      eexists; eexists; split.
      * simpl. rewrite <- AG; try congruence. rewrite HTZ. reflexivity.
      * exact MS.
  (* Pcbnz *)
  - rename H0 into HCBNZ.
    assert (MATCHI' := MATCHI). inv MATCHI.
    unfold eval_testzero, eval_neg_branch in *.
    erewrite incrPC_agree_but_pc in HCBNZ; try congruence.
    destruct (Val.mxcmpu_bool Ceq (rs1 r) (Vint Int.zero)) eqn:HTZ;
    try (simpl in HCBNZ; discriminate).
    destruct b; simpl in *.
    + inv HCBNZ.
      edestruct (next_inst_incr_pc_preserved bb) as (rs2' & m2' & HN & MS); eauto.
      inv HN.
      eexists; eexists; split.
      * simpl. rewrite <- AG; try congruence. rewrite HTZ. reflexivity.
      * exact MS.
    + edestruct (goto_label_preserved bb) as (rs2' & m2' & HGL2 & MS); eauto.
      eexists; eexists; split.
      * simpl. rewrite <- AG; try congruence. rewrite HTZ. rewrite HGL2. reflexivity.
      * exact MS.
  (* Pbsymb *)
  - eexists. eexists. split. eauto.
    assert ((incrPC (Ptrofs.repr (size bb)) rs1) # PC <-
            (Genv.symbol_address ge id Ptrofs.zero) =
            rs2 # PC <- (Genv.symbol_address tge id Ptrofs.zero)) as EQRS.
    { unfold incrPC, Pregmap.set. rewrite symbol_addresses_preserved. inv MATCHI.
      apply functional_extensionality. intros x. destruct (PregEq.eq x PC); auto. }
    rewrite EQRS; inv MATCHI; reflexivity.
  (* Pblsymb *)
  - eexists. eexists. split. eauto.
    rewrite symbol_addresses_preserved.
    inv MATCHI.
    erewrite eq_size_bb_ofs_one; eauto.
    reflexivity.
  (* Pbreg *)
  - eexists. eexists. split. eauto.
    unfold incrPC. rewrite Pregmap.gso; try congruence.
    erewrite pc_reg_overwrite; eauto.
    inv MATCHI. reflexivity.
  (* Pblreg *)
  - eexists. eexists. split. eauto.
    inv MATCHI.
    rewrite (eq_size_bb_ofs_one bb rs1 rs2); auto.
    unfold incrPC. rewrite <- AGPC.
    rewrite Pregmap.gso; try congruence.
    rewrite AG; congruence.
  (* Pbtbl *)
  - inv MATCHI.
    rewrite <- AG; try congruence.
    assert (incrPC (Ptrofs.repr (size bb)) rs1 r = rs1 r) as REQ.
    { unfold incrPC. rewrite Pregmap.gso; congruence. }
    rewrite REQ in H0. destruct (rs1 r); try discriminate. desmatch; try discriminate.
    unfold Asm.goto_label, goto_label in *.
    rewrite <- (label_pos_preserved f); auto.
    desmatch; try discriminate.
    destruct ((incrPC (Ptrofs.repr (size bb)) rs1) # IR14 <- Vundef PC)
    eqn:INCPC; try congruence.
    inversion H0.
    repeat (econstructor; eauto).
    rewrite Pregmap.gso; try congruence.
    rewrite <- AGPC.
    unfold incrPC in *.
    destruct (rs1 PC) eqn:EQRS1; simpl in *; try discriminate.
    replace ((rs2 # IR14 <- Vundef) # PC <- (Vptr b0 (Ptrofs.repr z))) with (((rs1 # PC <- (Vptr b0 (Ptrofs.add i1 (Ptrofs.repr (size bb))))) # IR14 <-
      Vundef) # PC <- (Vptr b (Ptrofs.repr z))); auto.
    apply functional_extensionality. intros.
    destruct (PregEq.eq x PC); subst.
    + rewrite Pregmap.gso in INCPC; try congruence.
      rewrite Pregmap.gss in INCPC.
      rewrite !Pregmap.gss in *. congruence.
    + rewrite Pregmap.gso; auto.
      rewrite (Pregmap.gso (i := x) (j := PC)); auto.
      destruct (PregEq.eq x IR14); subst.
      * rewrite !Pregmap.gss; auto.
      * rewrite !Pregmap.gso; auto.
Qed.

Lemma last_instruction_cannot_be_label bb:
  list_nth_z (header bb) (size bb - 1) = None.
Proof.
  assert (list_length_z (header bb) <= size bb - 1). {
    rewrite bblock_size_aux. generalize (bblock_size_aux_pos bb). lia.
  }
  remember (list_nth_z (header bb) (size bb - 1)) as label_opt; destruct label_opt; auto;
  exploit list_nth_z_range; eauto; lia.
Qed.

Lemma pc_ptr_exec_step: forall ofs bb b rs1 m1 rs2 m2
  (ATPC: rs1 PC = Vptr b ofs)
  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)),
  rs2 PC = Vptr b (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))).
Proof.
  intros; inv MATCHI. rewrite <- AGPC; rewrite ATPC; unfold Val.offset_ptr; eauto.
Qed.

Lemma find_instr_ofs_somei: forall ofs bb f tc asmi rs1 m1 rs2 m2
  (BOUNDOFS : Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned)
  (FIND_INSTR : Asm.find_instr (Ptrofs.unsigned ofs + (size bb - 1)) tc =
                Some (asmi))
  (MATCHI : match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)),
  Asm.find_instr (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))))
    (Asm.fn_code {| Asm.fn_sig := fn_sig f; Asm.fn_code := tc |}) =
  Some (asmi).
Proof.
  intros; simpl.
  replace (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))))
          with (Ptrofs.unsigned ofs + (size bb - 1)); try assumption.
  generalize (bblock_size_pos bb); generalize (Ptrofs.unsigned_range_2 ofs); intros.
  unfold Ptrofs.add.
  rewrite Ptrofs.unsigned_repr. rewrite Ptrofs.unsigned_repr; try lia.
  rewrite Ptrofs.unsigned_repr; lia.
Qed.

Lemma eval_builtin_arg_match: forall rs1 rs2 m a1 b1
  (AG: forall r: preg, r <> PC -> rs1 r = rs2 r)
  (EVAL: eval_builtin_arg tge (fun r: dreg => rs1 r) (rs1 SP) m a1 b1),
  eval_builtin_arg tge rs2 (rs2 SP) m (map_builtin_arg DR a1) b1.
Proof.
  intros; induction EVAL; simpl in *;
  try rewrite AG; try rewrite AG in EVAL; try discriminate; try congruence; eauto with barg;
  econstructor;
  rewrite <- AG; try discriminate; auto;
  rewrite AG; try discriminate; auto.
Qed.

Lemma eval_builtin_args_match: forall bb rs1 m1 rs2 m2 args vargs
  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2))
  (EVAL: eval_builtin_args tge (fun r: dreg => rs1 r) (rs1 SP) m1 args vargs),
  eval_builtin_args tge rs2 (rs2 SP) m2 (map (map_builtin_arg DR) args) vargs.
Proof.
  intros; inv MATCHI.
  induction EVAL; subst.
  - econstructor.
  - econstructor.
    + eapply eval_builtin_arg_match; eauto.
    + eauto.
Qed.

Lemma pc_both_sides: forall (rs _rs: regset) v
  (AG : forall r : preg, r <> PC -> rs r = _rs r),
  rs # PC <- v = _rs # PC <- v.
Proof.
  intros; unfold Pregmap.set; apply functional_extensionality; intros y.
  destruct (PregEq.eq y PC); try rewrite AG; eauto.
Qed.

Lemma set_buitin_res_sym res: forall vres rs _rs r
  (NPC: r <> PC)
  (AG : forall r : preg, r <> PC -> rs r = _rs r),
  set_res res vres rs r = set_res res vres _rs r.
Proof.
  induction res; simpl; intros; unfold Pregmap.set; try rewrite AG; eauto.
Qed.

Lemma set_builtin_res_dont_move_pc_gen res: forall vres rs _rs v1 v2
  (HV: v1 = v2)
  (AG : forall r : preg, r <> PC -> rs r = _rs r),
  (set_res res vres rs) # PC <- v1 =
  (set_res res vres _rs) # PC <- v2.
Proof.
  intros. rewrite HV. generalize res vres rs _rs AG v2.
  clear res vres rs _rs AG v1 v2 HV.
  induction res.
  - simpl; intros. apply pc_both_sides; intros.
    unfold Pregmap.set; try rewrite AG; eauto.
  - simpl; intros; apply pc_both_sides; eauto.
  - simpl; intros.
    erewrite IHres2; eauto; intros.
    eapply set_buitin_res_sym; eauto.
Qed.

Lemma set_builtin_map_not_pc (res: builtin_res dreg): forall vres rs,
  set_res (map_builtin_res DR res) vres rs PC = rs PC.
Proof.
  induction res.
  - intros; simpl. unfold Pregmap.set. destruct (PregEq.eq PC x); try congruence.
  - intros; simpl; congruence.
  - intros; simpl in *. rewrite IHres2. rewrite IHres1. reflexivity.
Qed.

Lemma undef_reg_preserved (rl: list mreg): forall rs _rs r
  (NPC: r <> PC)
  (AG : forall r : preg, r <> PC -> rs r = _rs r),
  undef_regs (map preg_of rl) rs r = undef_regs (map preg_of rl) _rs r.
Proof.
  induction rl.
  - simpl; auto.
  - simpl; intros. erewrite IHrl; eauto.
    intros. unfold Pregmap.set. destruct (PregEq.eq r0 (preg_of a)); try rewrite AG; eauto.
Qed.

Lemma undef_regs_other:
  forall r rl rs,
  (forall r', In r' rl -> r <> r') ->
  undef_regs rl rs r = rs r.
Proof.
  induction rl; simpl; intros. auto.
  rewrite IHrl by auto. rewrite Pregmap.gso; auto.
Qed.

Lemma exec_exit_simulation_plus b ofs f bb s2 t rs m rs' m': forall
  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
  (NEMPTY_EXIT: exit bb <> None)
  (MATCHI: match_internal (size bb - Z.of_nat (length_opt (exit bb))) (State rs m) s2)
  (EXIT: exec_exit ge f (Ptrofs.repr (size bb)) rs m (exit bb) t rs' m')
  (ATPC: rs PC = Vptr b ofs),
  plus Asm.step tge s2 t (State rs' m').
Proof.
  intros.
  exploit internal_functions_unfold; eauto.
  intros (tc & FINDtf & TRANStf & _).
  exploit (find_instr_bblock (size bb - 1)); eauto.
  { generalize (bblock_size_pos bb). lia. }
  intros (i' & NTH & FIND_INSTR).
  inv NTH.
  + rewrite last_instruction_cannot_be_label in *. discriminate.
  + destruct (exit bb) as [ctrl |] eqn:NEMPTY_EXIT'. 2: { contradiction. }
    rewrite bblock_size_aux in *. rewrite NEMPTY_EXIT' in *. simpl in *.
    replace (list_length_z (header bb) + list_length_z (body bb) + 1 - 1 -
       list_length_z (header bb)) with (list_length_z (body bb)) in H by lia.
    rewrite list_nth_z_range_exceeded in H; try lia. discriminate.
  + assert (Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned).
    { eapply size_of_blocks_bounds; eauto. }
    assert (size bb <= Ptrofs.max_unsigned).
    { generalize (Ptrofs.unsigned_range_2 ofs); lia. }
    destruct cfi.
    (* control flow instruction *)
    * destruct s2.
      rewrite H in EXIT. (* exit bb is a cfi *)
      inv EXIT.
      rewrite H in MATCHI. simpl in MATCHI.
      exploit internal_functions_translated; eauto.
      rewrite FINDtf.
      intros (tf & FINDtf' & TRANSf). inversion FINDtf'; subst; clear FINDtf'.
      exploit exec_cfi_simulation; eauto.
      (* extract exec_cfi_simulation's conclusion as separate hypotheses *)
      intros (rs2' & m2' & EXECI & MATCHS); rewrite MATCHS.
      apply plus_one.
      eapply Asm.exec_step_internal; eauto.
      - eapply pc_ptr_exec_step; eauto.
      - eapply find_instr_ofs_somei; eauto.
    (* builtin *)
    * destruct s2.
      rewrite H in EXIT.
      rewrite H in MATCHI. simpl in MATCHI.
      simpl in FIND_INSTR.
      inversion EXIT.
      apply plus_one.
      eapply external_call_symbols_preserved in H10; try (apply senv_preserved).
      eapply eval_builtin_args_preserved in H6; try (apply symbols_preserved).
      eapply Asm.exec_step_builtin; eauto.
      - eapply pc_ptr_exec_step; eauto.
      - eapply find_instr_ofs_somei; eauto.
      - eapply eval_builtin_args_match; eauto.
      - inv MATCHI; eauto.
      - inv MATCHI.
        unfold Asm.nextinstr, incrPC.
        assert (HPC: Val.offset_ptr (rs PC) (Ptrofs.repr (size bb))
                   = Val.offset_ptr (r PC) Ptrofs.one).
        { rewrite <- AGPC. rewrite ATPC. unfold Val.offset_ptr.
          rewrite Ptrofs.add_assoc. unfold Ptrofs.add.
          assert (BBPOS: size bb >= 1) by eapply bblock_size_pos.
          rewrite (Ptrofs.unsigned_repr (size bb - 1)); try lia.
          rewrite Ptrofs.unsigned_one.
          replace (size bb - 1 + 1) with (size bb) by lia.
          reflexivity. }
        apply set_builtin_res_dont_move_pc_gen.
        -- erewrite !set_builtin_map_not_pc.
           erewrite !undef_regs_other.
           rewrite HPC; auto.
           all: intros; simpl in *; destruct H3 as [HR14 | HDES];
                subst; try discriminate; exploit list_in_map_inv; eauto;
                intros [mr [A B]]; subst; discriminate.
        -- intros. eapply undef_reg_preserved; eauto.
           intros. destruct (PregEq.eq IR14 r1); subst.
           rewrite !Pregmap.gss; reflexivity.
           rewrite !Pregmap.gso; try congruence. rewrite AG; congruence.
Qed.

Lemma exec_exit_simulation_star b ofs f bb s2 t rs m rs' m': forall
  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
  (MATCHI: match_internal (size bb - Z.of_nat (length_opt (exit bb))) (State rs m) s2)
  (EXIT: exec_exit ge f (Ptrofs.repr (size bb)) rs m (exit bb) t rs' m')
  (ATPC: rs PC = Vptr b ofs),
  star Asm.step tge s2 t (State rs' m').
Proof.
  intros.
  destruct (exit bb) eqn: Hex.
  - eapply plus_star.
    eapply exec_exit_simulation_plus; try rewrite Hex; congruence || eauto.
  - inv MATCHI.
    inv EXIT.
    assert (X: rs2 = incrPC (Ptrofs.repr (size bb)) rs). {
      unfold incrPC. unfold Pregmap.set.
        apply functional_extensionality. intros x.
        destruct (PregEq.eq x PC) as [X|].
        - rewrite X. rewrite <- AGPC. simpl.
          replace (size bb - 0) with (size bb) by lia. reflexivity.
        - rewrite AG; try assumption. reflexivity.
    }
    destruct X.
    subst; eapply star_refl; eauto.
Qed.

Lemma exec_bblock_simulation b ofs f bb t rs m rs' m': forall
  (ATPC: rs PC = Vptr b ofs)
  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
  (EXECBB: exec_bblock ge f bb rs m t rs' m'),
  plus Asm.step tge (State rs m) t (State rs' m').
Proof.
  intros; destruct EXECBB as (rs1 & m1 & BODY & CTL).
  exploit exec_header_simulation; eauto.
  intros (s0 & STAR & MATCH0).
  eapply star_plus_trans; traceEq || eauto.
  destruct (bblock_non_empty bb).
  - (* body bb <> nil *)
     exploit exec_body_simulation_plus; eauto.
     intros (s1 & PLUS & MATCH1).
     eapply plus_star_trans; traceEq || eauto.
     eapply exec_exit_simulation_star; eauto.
     erewrite <- exec_body_dont_move_PC; eauto.
  - (* exit bb <> None *)
     exploit exec_body_simulation_star; eauto.
     intros (s1 & STAR1 & MATCH1).
     eapply star_plus_trans; traceEq || eauto.
     eapply exec_exit_simulation_plus; eauto.
     erewrite <- exec_body_dont_move_PC; eauto.
Qed.

Lemma step_simulation s t s':
  Asmblock.step ge s t s' -> plus Asm.step tge s t s'.
Proof.
  intros STEP.
  inv STEP; simpl; exploit functions_translated; eauto;
  intros (tf0 & FINDtf & TRANSf);
  monadInv TRANSf.
  - (* internal step *) eapply exec_bblock_simulation; eauto.
  - (* external step *)
    apply plus_one.
    exploit external_call_symbols_preserved; eauto. apply senv_preserved.
    intros ?.
    eapply Asm.exec_step_external; eauto.
Qed.

Lemma transf_program_correct:
  forward_simulation (Asmblock.semantics prog) (Asm.semantics tprog).
Proof.
  eapply forward_simulation_plus.
  - apply senv_preserved.
  - eexact transf_initial_states.
  - eexact transf_final_states.
  - unfold match_states.
    simpl; intros; subst; eexists; split; eauto.
    eapply step_simulation; eauto.
Qed.

End PRESERVATION.

End Asmblock_PRESERVATION.

Local Open Scope linking_scope.

Definition block_passes :=
      mkpass Machblockgenproof.match_prog
  ::: mkpass Asmblockgenproof.match_prog
  ::: mkpass PostpassSchedulingproof.match_prog
  ::: mkpass Asmblock_PRESERVATION.match_prog
  ::: pass_nil _.

Definition match_prog := pass_match (compose_passes block_passes).

Lemma transf_program_match:
  forall p tp, Asmgen.transf_program p = OK tp -> match_prog p tp.
Proof.
  intros p tp H.
  unfold Asmgen.transf_program in H.
  remember (Machblockgen.transf_program p) as mbp.
  unfold Compopts.time in *.
  apply bind_inversion in H. destruct H as [abp [HABG REST1]].
  apply bind_inversion in REST1. destruct REST1 as [abp' [HPSS HTRANSF]].
  unfold match_prog; simpl.
  exists mbp; split. apply Machblockgenproof.transf_program_match; auto.
  exists abp; split. apply Asmblockgenproof.transf_program_match; auto.
  exists abp'; split. apply PostpassSchedulingproof.transf_program_match; auto.
  exists tp; split. apply Asmblock_PRESERVATION.transf_program_match; auto. auto.
Qed.

Return Address Offset

Definition return_address_offset: Mach.function -> Mach.code -> ptrofs -> Prop :=
  Machblockgenproof.Mach_return_address_offset (Asmblockgenproof0.return_address_offset).

Lemma return_address_exists:
  forall f sg ros c, is_tail (Mach.Mcall sg ros :: c) f.(Mach.fn_code) ->
  exists ra, return_address_offset f c ra.
Proof.
  intros; unfold return_address_offset; eapply Machblockgenproof.Mach_return_address_exists; eauto.
  intros; eapply Asmblockgenproof.return_address_exists; eauto.
Qed.

Section PRESERVATION.

Variable prog: Mach.program.
Variable tprog: Asm.program.
Hypothesis TRANSF: match_prog prog tprog.
Let ge := Genv.globalenv prog.
Let tge := Genv.globalenv tprog.

Theorem transf_program_correct:
  forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog).
Proof.
  unfold match_prog in TRANSF. simpl in TRANSF.
  inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H.
  
  eapply compose_forward_simulations.
  { exploit Machblockgenproof.transf_program_correct; eauto. }

  eapply compose_forward_simulations.
  { apply Asmblockgenproof.transf_program_correct; eauto. }

  eapply compose_forward_simulations.
  { apply PostpassSchedulingproof.transf_program_correct; eauto. }

  apply Asmblock_PRESERVATION.transf_program_correct; eauto.
Qed.

End PRESERVATION.

#[global] Instance TransfAsm: TransfLink match_prog := pass_match_link (compose_passes block_passes).


Module Asmgenproof0.

Definition return_address_offset := return_address_offset.

End Asmgenproof0.