Module Memcpy


Semantic model of memcpy/memmove operations

Require Import Coqlib Maps.
Require Intv.
Require Import Integers.
Require Import Values.
Require Import Memory.
Require Import Lia.
Require Import AST.
Require Import Decidableplus.
Require Import OptionMonad.

Local Open Scope list_scope.
Local Open Scope option_monad_scope.

Raw version of memmove without alignment and overlap checks memmove and memcpy below depends on two memory rm and wm, even if most of the time we have rm = wm. Actually, rm and wm are distinct only for the semantics of symbolic values (in BTL). - rm contain the "read-after-write" dependencies on memory - wm contain the "write-after-write" dependencies on memory


Definition memmove (sz: positive) bdst odst bsrc osrc rm wm : option mem :=
   SOME bytes <- Mem.loadbytes rm bsrc osrc (Zpos sz) IN
   Mem.storebytes wm bdst odst bytes.

Lemma memmove_align sz bdst odst bsrc osrc rm m1 m2
  (MEMCPY: memmove sz bdst odst bsrc osrc rm m1 = Some m2)
  : Mem.mem_align m2 = Mem.mem_align m1.
Proof.
  revert MEMCPY; unfold memmove. simplify_option.
  intros; erewrite Mem.storebytes_align; eauto.
Qed.

Lemma memmove_perm_preserv sz bdst odst bsrc osrc rm m1 m2
  (MEMCPY: memmove sz bdst odst bsrc osrc rm m1 = Some m2)
  b ofs k p:
  Mem.perm m1 b ofs k p <-> Mem.perm m2 b ofs k p.
Proof.
  revert MEMCPY; unfold memmove. simplify_option.
  intros _ STORE; split.
  - eapply Mem.perm_storebytes_1; eauto.
  - eapply Mem.perm_storebytes_2; eauto.
Qed.

Lemma memmove_range_perm_preserv sz bdst odst bsrc osrc rm m1 m2
  (MEMCPY: memmove sz bdst odst bsrc osrc rm m1 = Some m2)
  b hi lo k p:
  Mem.range_perm m1 b hi lo k p <-> Mem.range_perm m2 b hi lo k p.
Proof.
  intros; unfold Mem.range_perm at 1. setoid_rewrite memmove_perm_preserv; eauto.
  reflexivity.
Qed.

Lemma memmove_range_perm sz bdst odst bsrc osrc rm m1 m2
  (MEMCPY: memmove sz bdst odst bsrc osrc rm m1 = Some m2):
     Mem.range_perm rm bsrc osrc (osrc + (Zpos sz)) Cur Readable
  /\ Mem.range_perm m1 bdst odst (odst + (Zpos sz)) Cur Writable.
Proof.
  revert MEMCPY; unfold memmove. simplify_option.
  split.
  - eapply Mem.loadbytes_range_perm; eauto.
  - intros ofs RANGE. exploit Mem.storebytes_range_perm; eauto.
    erewrite Mem.loadbytes_length, Z2Nat.id; eauto with zarith.
Qed.

Lemma range_perm_memmove sz bdst odst bsrc osrc rm m1
  (READ: Mem.range_perm rm bsrc osrc (osrc + Zpos sz) Cur Readable)
  (WRITE: Mem.range_perm m1 bdst odst (odst + Zpos sz) Cur Writable):
  exists m2, memmove sz bdst odst bsrc osrc rm m1 = Some m2.
Proof.
  unfold memmove.
  exploit Mem.range_perm_loadbytes; eauto.
  intros (bytes & LOAD); rewrite LOAD.
  destruct (Mem.range_perm_storebytes m1 bdst odst bytes) as [m2 EQ2]; eauto.
  erewrite Mem.loadbytes_length, Z2Nat.id; eauto with zarith.
Qed.

Definition disjoint (b0: block) ofs0 sz0 b1 ofs1 sz1: Prop :=
  b0 <> b1 \/ ofs0 + sz0 <= ofs1 \/ ofs1 + sz1 <= ofs0.

Lemma disjoint_Intv b0 o0 sz0 b1 o1 sz1:
  disjoint b0 o0 sz0 b1 o1 sz1 ->
  sz0 >= 0 ->
  sz1 >= 0 ->
  b0 <> b1 \/ Intv.disjoint (o0, o0 + sz0) (o1, o1 + sz1).
Proof.
  unfold disjoint. intros DISJ H0 H1.
  case DISJ. { left; auto. }
  right; intros i; unfold Intv.In; cbn. lia.
Qed.

Lemma disjoint_sym b0 o0 sz0 b1 o1 sz1:
  disjoint b0 o0 sz0 b1 o1 sz1 -> disjoint b1 o1 sz1 b0 o0 sz0.
Proof.
  unfold disjoint. lia.
Qed.

Lemma memmove_storebytes_diamond sz bdst odst bsrc osrc b o bytes rm wm m0 m1
  (MOVE: memmove sz bdst odst bsrc osrc rm wm = Some m0)
  (STORE: Mem.storebytes wm b o bytes = Some m1):
  let len := Z.of_nat (Datatypes.length bytes) in
  disjoint bdst odst (Zpos sz) b o len ->
  exists m2, Mem.storebytes m0 b o bytes = Some m2 /\ memmove sz bdst odst bsrc osrc rm m1 = Some m2.
Proof.
  cbn; intros DD.
  revert MOVE; unfold memmove. autodestruct.
  intros LB SB.
  destruct (Mem.range_perm_storebytes m1 bdst odst l) as (m2 & SB1).
  {
    unfold Mem.range_perm. intros; eapply Mem.perm_storebytes_1; eauto.
    eapply Mem.storebytes_range_perm; eauto.
  }
  exploit Mem.storebytes_storebytes_disjoint. eapply STORE. eapply SB1.
  { eapply disjoint_Intv; eauto with zarith.
    erewrite Mem.loadbytes_length, Z2Nat.id; eauto with zarith. }
  intros (m3 & SB0 & SB3).
  try_simplify_someHyps.
Qed.

Lemma memmove_store_diamond sz bdst odst bsrc osrc chk b o v rm wm m0 m1
  (MOVE: memmove sz bdst odst bsrc osrc rm wm = Some m0)
  (STORE: Mem.store chk wm b o v = Some m1):
  let len := size_chunk chk in
  disjoint bdst odst (Zpos sz) b o len ->
  exists m3, Mem.store chk m0 b o v = Some m3 /\ memmove sz bdst odst bsrc osrc rm m1 = Some m3.
Proof.
  cbn; intros.
  exploit Mem.store_storebytes; eauto.
  intros SB.
  exploit memmove_storebytes_diamond; eauto.
  { rewrite encode_val_length, <- size_chunk_conv; auto. }
  intros (m3 & SB0 & MOVE1).
  exploit Mem.store_valid_access_3; eauto. intros [? [? HBA]].
  exploit Mem.storebytes_store. eexact SB0. auto.
  { rewrite (memmove_align _ _ _ _ _ _ _ _ MOVE). exact HBA. }
  intros HS. eauto.
Qed.

Lemma memmove_diamond sz0 bdst0 odst0 bsrc0 osrc0 rm0 wm wm0 sz1 bdst1 odst1 bsrc1 osrc1 rm1 wm1
  (MOVE0: memmove sz0 bdst0 odst0 bsrc0 osrc0 rm0 wm = Some wm0)
  (MOVE1: memmove sz1 bdst1 odst1 bsrc1 osrc1 rm1 wm = Some wm1):
  disjoint bdst0 odst0 (Zpos sz0) bdst1 odst1 (Zpos sz1) ->
  exists wm2, memmove sz1 bdst1 odst1 bsrc1 osrc1 rm1 wm0 = Some wm2 /\ memmove sz0 bdst0 odst0 bsrc0 osrc0 rm0 wm1 = Some wm2.
Proof.
  cbn; intros.
  revert MOVE1; unfold memmove at 1 2. autodestruct.
  intros LB SB.
  exploit memmove_storebytes_diamond; eauto.
  erewrite Mem.loadbytes_length, Z2Nat.id; eauto with zarith.
Qed.

memmove lemma with a single memory

Lemma memmove_extends sz bdst odst bsrc osrc m1 m2 m1'
  (MOVE: memmove sz bdst odst bsrc osrc m1 m1 = Some m2)
  (MEMEXT: Mem.extends m1 m1'):
  exists (m2' : mem),
    memmove sz bdst odst bsrc osrc m1' m1' = Some m2' /\
    Mem.extends m2 m2'.
Proof.
  revert MOVE; unfold memmove. autodestruct.
  intros; feapply Mem.loadbytes_extends.
  feapply Mem.storebytes_within_extends.
  try_simplify_someHyps.
Qed.

Lemma memmove_max_size sz bdst odst bsrc osrc m1 m2:
  memmove sz bdst odst bsrc osrc m1 m1 = Some m2 ->
  Zpos sz <= Ptrofs.max_unsigned - Z.max osrc odst.
Proof.
  intros; exploit memmove_range_perm; eauto.
  intros (PERM1 & PERM2).
  lapply (PERM1 (osrc + Zpos sz - 1)); try lia.
  lapply (PERM2 (odst + Zpos sz - 1)); try lia.
  unfold Mem.perm, Mem.perm_order'.
  do 2 autodestruct.
  intros BND1 BND2.
  lapply (Mem.only_representable_ofs m1 bsrc (osrc + Zpos sz - 1) Cur). 2: { intro X; simplify_option. }
  lapply (Mem.only_representable_ofs m1 bdst (odst + Zpos sz - 1) Cur). 2: { intro X; simplify_option. }
  try lia.
Qed.

Lemma memmove_skip sz b o m1 m2:
  memmove sz b o b o m1 m1 = Some m2 -> m2 = m1.
Proof.
  unfold memmove; autodestruct.
  Local Transparent Mem.loadbytes Mem.storebytes.
  unfold Mem.storebytes. autodestruct. intros _.
  intros LOAD. exploit Mem.loadbytes_length; eauto.
  intros SIZE. revert LOAD.
  unfold Mem.loadbytes. rewrite pred_dec_true.
  2: { intros ofs RANGE; eapply Mem.perm_implies.
       - eapply r.
         rewrite SIZE, Z2Nat.id by lia. eauto.
       - econstructor.
     }
  intros X; inv X.
  intros X; inv X.
  destruct m1; apply Mem.mem_same_proof_irr; cbn in *; auto.
  rewrite Mem.setN_getN_same, Maps.PXMap.set_get_id. auto.
Qed.

Lemma memmove_split_forward_aux (sz1 sz2: positive) bdst odst bsrc osrc m
 (OK: bsrc <> bdst \/ odst < osrc \/ osrc + Zpos (sz1 + sz2) <= odst):
   memmove (sz1 + sz2) bdst odst bsrc osrc m m =
     SOME m1 <- memmove sz1 bdst odst bsrc osrc m m IN
     memmove sz2 bdst (odst+Zpos sz1) bsrc (osrc+Zpos sz1) m1 m1.
Proof.
  unfold memmove.
  assert (DISJ: bsrc <> bdst \/ Intv.disjoint (osrc + Zpos sz1, osrc + Zpos sz1 + Zpos sz2) (odst, odst + Zpos sz1)). {
    intuition idtac; right; intros i; unfold Intv.In; cbn; lia.
  }
  clear OK.
  autodestruct; replace (Zpos (sz1+sz2)) with (Zpos sz1 + Zpos sz2) by lia;
  intros BIGLD.
  + exploit Mem.loadbytes_split; eauto with zarith.
    intros (bytes1 & bytes2 & LD1 & LD2 & EQ); subst.
    exploit Mem.loadbytes_length. eapply LD1.
    rewrite LD1. intros L1.
    destruct (Mem.storebytes _ _ _) eqn: BIGST.
    * exploit Mem.storebytes_split; eauto.
      rewrite L1, Z2Nat.id by lia.
      intros (m1 & ST1 & ST2).
      rewrite ST1.
      erewrite Mem.loadbytes_storebytes_disjoint; eauto with zarith.
      2: { rewrite L1, Z2Nat.id by lia; apply DISJ. }
      rewrite LD2; auto.
    * destruct (Mem.storebytes m bdst odst bytes1) eqn: ST1; auto.
      erewrite Mem.loadbytes_storebytes_disjoint; eauto with zarith.
      2: { rewrite L1, Z2Nat.id by lia; apply DISJ. }
      rewrite LD2; auto.
      destruct (Mem.storebytes _ bdst (odst + Zpos sz1) bytes2) eqn: ST2; auto.
      exploit Mem.storebytes_concat.
      - eapply ST1.
      - rewrite L1, Z2Nat.id by lia. eapply ST2.
      - simplify_option.
  + autodestruct; intros LD1.
    exploit Mem.loadbytes_length. eapply LD1.
    intros L1; autodestruct; intros ST1.
    erewrite Mem.loadbytes_storebytes_disjoint; eauto with zarith.
    2: { rewrite L1, Z2Nat.id by lia; apply DISJ. }
    autodestruct; intros LD2.
    exploit Mem.loadbytes_concat.
    { eapply LD1. }
    1-3: eauto with zarith.
    simplify_option.
Qed.

Lemma memmove_split_forward sz1 sz2 bdst odst bsrc osrc m
 (OK: bsrc <> bdst \/ odst <= osrc \/ osrc + Zpos (sz1 + sz2) <= odst):
   memmove (sz1 + sz2) bdst odst bsrc osrc m m =
     SOME m1 <- memmove sz1 bdst odst bsrc osrc m m IN
      memmove sz2 bdst (odst+Zpos sz1) bsrc (osrc+Zpos sz1) m1 m1.
Proof.
  destruct OK as [OK|OK]. 1: { apply memmove_split_forward_aux; auto. }
  destruct (Z.eq_dec osrc odst).
  + subst. destruct (Pos.eq_dec bsrc bdst).
    * (* A TEDIOUS CASE for proving SKIP ! *)
      subst. intros.
      destruct (memmove (sz1+sz2) _ _ _ _) eqn: BIG.
      -- feapply memmove_skip.
         intros; subst.
         exploit memmove_range_perm; eauto.
         intros (READ & WRITE).
         exploit range_perm_memmove.
         3: { intros (? & C1). rewrite C1.
              feapply memmove_skip.
              intros; subst.
              exploit range_perm_memmove.
              3: { intros (? & C2). rewrite C2.
                   feapply memmove_skip.
                   congruence.
              }
              ++ intros ? ?; apply READ; try lia.
              ++ intros ? ?; apply WRITE; try lia.
           }
        ++ intros ? ?; apply READ; try lia.
        ++ intros ? ?; apply WRITE; try lia.
     -- destruct (memmove sz1 _ _ _ _) eqn: C1; auto.
        exploit memmove_range_perm; eauto.
        intros (READ1 & WRITE1).
        feapply memmove_skip. intros; subst.
        destruct (memmove sz2 _ _ _ _) eqn: C2; auto.
        exploit memmove_range_perm; eauto.
        intros (READ2 & WRITE2).
        exploit memmove_range_perm; eauto.
        exploit range_perm_memmove.
        3: { intros (? & C). rewrite C in BIG. congruence. }
        ++ intros ofs ?. destruct (ofs <? odst + Zpos sz1) eqn: CASE.
           ** apply READ1; try lia.
           ** apply READ2; try lia.
        ++ intros ofs ?; destruct (ofs <? odst + Zpos sz1) eqn: CASE.
           ** apply WRITE1; try lia.
           ** apply WRITE2; try lia.
    * intros; apply memmove_split_forward_aux; auto.
  + intros; apply memmove_split_forward_aux; auto.
    lia.
Qed.

Lemma memmove_split_backward_aux (sz1 sz2: positive) bdst odst bsrc osrc m
 (OK: bsrc <> bdst \/ osrc < odst \/ odst + Zpos (sz1 + sz2) <= osrc):
   memmove (sz1 + sz2) bdst odst bsrc osrc m m =
     SOME m1 <- memmove sz2 bdst (odst+Zpos sz1) bsrc (osrc+Zpos sz1) m m IN
     memmove sz1 bdst odst bsrc osrc m1 m1.
Proof.
  unfold memmove.
  assert (DISJ1: Intv.disjoint (odst + Z.pos sz1, odst + Z.pos sz1 + Z.pos sz2) (odst, odst + Z.pos sz1)). {
    intros i; unfold Intv.In; cbn; lia.
  }
  assert (DISJ2: bsrc <> bdst \/ Intv.disjoint (osrc, osrc + Z.pos sz1) (odst + Z.pos sz1, odst + Z.pos sz1 + Z.pos sz2)). {
    destruct OK. left; auto. right.
    intros i. unfold Intv.In; cbn. lia.
  }
  clear OK.
  autodestruct; replace (Zpos (sz1+sz2)) with (Zpos sz1 + Zpos sz2) by lia; intros BIGLD.
  + exploit Mem.loadbytes_split; eauto with zarith.
    intros (bytes1 & bytes2 & LD1 & LD2 & EQ); subst.
    exploit Mem.loadbytes_length. eapply LD1.
    exploit Mem.loadbytes_length. eapply LD2.
    rewrite LD2. intros L2 L1.
    destruct (Mem.storebytes _ _ _) eqn: BIGST.
    * exploit Mem.storebytes_split; eauto.
      rewrite L1, Z2Nat.id by lia.
      intros (m1 & ST1 & ST2).
      exploit Mem.storebytes_storebytes_disjoint.
      - eapply ST1.
      - eapply ST2.
      - rewrite L1, L2, !Z2Nat.id by lia. right. eapply DISJ1.
      - intros (m2 & ST1' & ST2'). rewrite ST1'.
        erewrite Mem.loadbytes_storebytes_disjoint; eauto with zarith.
        ++ erewrite LD1; eauto.
        ++ rewrite L2, Z2Nat.id by lia. apply DISJ2.
    * destruct (Mem.storebytes m bdst _ bytes2) eqn: ST1'; auto.
      erewrite Mem.loadbytes_storebytes_disjoint; eauto with zarith.
      2: { rewrite L2, Z2Nat.id by lia. apply DISJ2. }
      rewrite LD1; auto.
      destruct (Mem.storebytes _ bdst _ bytes1) eqn: ST2'; auto.
      exploit Mem.storebytes_storebytes_disjoint.
      - eapply ST1'.
      - eapply ST2'.
      - rewrite L1, L2, !Z2Nat.id by lia. right. eapply Intv.disjoint_sym, DISJ1.
      - intros (m2 & ST1 & ST2).
        exploit Mem.storebytes_concat.
        ++ eapply ST1.
        ++ rewrite L1, Z2Nat.id by lia. eapply ST2.
        ++ simplify_option.
  + autodestruct; intros LD1.
    exploit Mem.loadbytes_length. eapply LD1.
    intros L1; autodestruct; intros ST1.
    erewrite Mem.loadbytes_storebytes_disjoint; eauto with zarith.
    2: { rewrite L1, Z2Nat.id by lia. apply DISJ2. }
    autodestruct; intros LD2.
    exploit Mem.loadbytes_concat.
    2: apply LD1.
    1-3: eauto with zarith.
    simplify_option.
Qed.

Lemma memmove_split_backward sz1 sz2 bdst odst bsrc osrc m
 (OK: bsrc <> bdst \/ osrc <= odst \/ odst + Zpos (sz1 + sz2) <= osrc):
   memmove (sz1 + sz2) bdst odst bsrc osrc m m =
     SOME m1 <- memmove sz2 bdst (odst+Zpos sz1) bsrc (osrc+Zpos sz1) m m IN
     memmove sz1 bdst odst bsrc osrc m1 m1.
Proof.
  destruct OK as [OK|OK]. 1: { apply memmove_split_backward_aux; auto. }
  destruct (Z.eq_dec osrc odst).
  + subst. destruct (Pos.eq_dec bsrc bdst).
    * (* A TEDIOUS CASE for proving SKIP ! *)
      subst. intros.
      destruct (memmove (sz1+sz2) _ _ _ _) eqn: BIG.
      -- feapply memmove_skip.
         intros; subst.
         exploit memmove_range_perm; eauto.
         intros (READ & WRITE).
         exploit range_perm_memmove.
         3: { intros (? & C1). rewrite C1.
              feapply memmove_skip.
              intros; subst.
              exploit range_perm_memmove.
              3: { intros (? & C2). rewrite C2.
                   feapply memmove_skip.
                   congruence.
              }
              ++ intros ? ?; apply READ; try lia.
              ++ intros ? ?; apply WRITE; try lia.
           }
        ++ intros ? ?; apply READ; try lia.
        ++ intros ? ?; apply WRITE; try lia.
     -- destruct (memmove sz2 _ _ _ _) eqn: C1; auto.
        exploit memmove_range_perm; eauto.
        intros (READ1 & WRITE1).
        feapply memmove_skip. intros; subst.
        destruct (memmove sz1 _ _ _ _) eqn: C2; auto.
        exploit memmove_range_perm; eauto.
        intros (READ2 & WRITE2).
        exploit memmove_range_perm; eauto.
        exploit range_perm_memmove.
        3: { intros (? & C). rewrite C in BIG. congruence. }
        ++ intros ofs ?. destruct (ofs <? odst + Zpos sz1) eqn: CASE.
           ** apply READ2; try lia.
           ** apply READ1; try lia.
        ++ intros ofs ?; destruct (ofs <? odst + Zpos sz1) eqn: CASE.
           ** apply WRITE2; try lia.
           ** apply WRITE1; try lia.
    * intros; apply memmove_split_backward_aux; auto.
  + intros; apply memmove_split_backward_aux; auto.
    lia.
Qed.

memcpy size and alignment checks

Definition is_align (al: Z) : Prop := al = 1 \/ al = 2 \/ al = 4 \/ al = 8.

Definition mlal: nat := 3.

Lemma is_align_two_power_nat al:
  is_align al -> exists n, (n <= mlal)%nat /\ al = two_power_nat n.
Proof.
  unfold two_power_nat, mlal.
  destruct 1 as [ -> | [ -> | [ -> | -> ] ] ]; cbn.
  - exists (0%nat); cbn; lia.
  - exists (1%nat); cbn; lia.
  - exists (2%nat); cbn; lia.
  - exists (3%nat); cbn; lia.
Qed.

Lemma two_power_nat_is_align l2: (l2 <= mlal)%nat -> is_align (two_power_nat l2).
Proof.
  unfold mlal. intros BND.
  assert (OK: (l2 = 0 \/ l2 = 1 \/ l2 = 2 \/ l2 = 3)%nat) by lia.
  unfold is_align.
  repeat (destruct OK as [REW | OK]; [rewrite REW; left; reflexivity | right]).
  rewrite OK. reflexivity.
Qed.

Lemma is_align_pos z: is_align z -> 1 <= z.
Proof.
  intros [ -> | [ -> | [ -> | -> ]]]; auto with zarith.
Qed.

Lemma is_align_Zpos z: is_align z -> z = Zpos (Z.to_pos z).
Proof.
  intros H; rewrite Z2Pos.id; auto. apply is_align_pos in H. lia.
Qed.

Lemma two_power_nat_Zpos n: Zpos (Z.to_pos (two_power_nat n)) = two_power_nat n.
Proof.
  rewrite Z2Pos.id; auto. specialize (two_power_nat_pos n). lia.
Qed.

Global Opaque mlal.

Lemma is_align_le_divides:
  forall al1 al2, is_align al1 -> is_align al2 ->
      al1 <= al2 <-> (al1 | al2).
Proof.
  intros al1 al2 AL1 AL2.
  feapply is_align_two_power_nat. clear AL2.
  feapply is_align_two_power_nat. clear AL1.
  subst.
  symmetry; apply two_power_nat_div_le.
Qed.

Lemma is_align_divides:
  forall n al, is_align al -> 1 <= n -> (n | al) -> is_align n.
Proof.
  intros n al ALIGN DIV.
  eapply is_align_two_power_nat in ALIGN.
  destruct ALIGN as (m & BND & ->).
  intros; exploit two_power_nat_div; eauto with zarith.
  intros (k & EQ). rewrite EQ in *.
  apply two_power_nat_is_align.
  etransitivity. 2: apply BND.
  rewrite <- two_power_nat_div_iff; auto.
Qed.

No strict overlap ( = nondestructive memcpy)
Definition noover (sz: positive) (bdst: block) (odst: Z) (bsrc: block) (osrc: Z) : Prop :=
  (bsrc <> bdst \/ osrc = odst \/ osrc + Zpos sz <= odst \/ odst + Zpos sz <= osrc).

Lemma Z_divide_bound_eq a b: (a | b) -> 0 < b <= a -> a = b.
Proof.
  intros; assert (X: a <= b). { apply Z.divide_pos_le; auto with zarith. }
  lia.
Qed.

Lemma Z_divide_le_reduce a b c x:
  0 < a <= x ->
  (x | a) ->
  (x | b) ->
  (x | c) ->
  b < c ->
  b + a <= c.
Proof.
  intros H Ha Hb Hc H0; exploit (@Z_divide_bound_eq x a); eauto.
  intros; subst. clear Ha.
  destruct Hb as (x & Hb).
  destruct Hc as (y & Hc). subst.
  replace (x * a + a) with ((x+1)*a). 2: lia.
  exploit Zmult_lt_reg_r; eauto. 1: lia.
  intros; apply Zmult_le_compat_r. 2: lia.
  lia.
Qed.

Lemma noover_trivial sz al bdst odst bsrc osrc:
   Zpos sz <= al ->
   (al | Zpos sz) ->
   (al | osrc) ->
   (al | odst) ->
   noover sz bdst odst bsrc osrc.
Proof.
  intros. unfold noover. right. destruct (Z.compare osrc odst) eqn: CMP.
  - left; apply Z.compare_eq; auto.
  - right; left. eapply Z_divide_le_reduce; eauto. lia.
  - right; right. eapply Z_divide_le_reduce; eauto. lia.
    apply Z.compare_gt_iff. auto.
Qed.

Lemma noover_full sz al bdst odst bsrc osrc:
   (Zpos sz > al -> noover sz bdst odst bsrc osrc) ->
   (al | Zpos sz) ->
   (al | osrc) ->
   (al | odst) ->
   (noover sz bdst odst bsrc osrc).
Proof.
  intros ? ? ? NOOVER. case (zle (Zpos sz) al); auto.
  intros; eapply noover_trivial; eauto.
Qed.

Global Hint Resolve noover_full: core.

the current memory chunk of sz bytes and with al can be split into: - a memcpy of ofs bytes with alignment al (which can be itself be split with intermediate alignment between al and bal) - a memcpy sz - ofs - ((sz - ofs) mod bal) bytes with alignment bal. - a memcpy of (sz - ofs) mod bal bytes with al (which can be itself split...) - bmal max authorized alignment from base addresses (ie block alignment)
Definition best_align (sz: positive) (bmal: Z) (al odst osrc ofs bal: Z) : Prop :=
      is_align bal
   /\ 0 <= ofs <= bal - al
   /\ ofs + bal <= Zpos sz
   /\ (bal | osrc + ofs)
   /\ (bal | odst + ofs)
   /\ (bal | bmal)
   .

Alignment and aliasing precondition of memcpy
Definition memcpy_args_ok (chk: cpychunk) (bmal: Z) (bdst: block) (odst: Z) (bsrc: block) (osrc: Z) : Prop :=
      is_align (cpy_al chk)
   /\ (cpy_al chk | Zpos (cpy_sz chk))
   /\ (cpy_al chk | bmal)
   /\ (cpy_al chk | osrc)
   /\ (cpy_al chk | odst)
   /\ (cpy_mode chk = None -> noover (cpy_sz chk) bdst odst bsrc osrc)
   /\ if_Some (cpy_mode chk) (fun b => bsrc = bdst /\ if b then odst < osrc else osrc < odst)
   /\ if_Some (cpy_bal chk) (fun p => best_align (cpy_sz chk) bmal (cpy_al chk) odst osrc (fst p) (snd p))
   .

Lemma memcpy_al_divides_best_align chk bmal bdst odst bsrc osrc
  (OK: memcpy_args_ok chk bmal bdst odst bsrc osrc)
  ofs bal (BAL: cpy_bal chk = Some (ofs, bal))
  : (cpy_al chk | bal) /\ (cpy_al chk | ofs).
Proof.
  destruct OK as (ALA & _ & _ & SRC & _ & _ & _ & BOK).
  rewrite BAL in BOK. destruct BOK as (BALA & BND & _ & BSRC & _). cbn in *.
  assert (DIV1: cpy_al chk <= bal) by lia.
  erewrite is_align_le_divides in DIV1; eauto.
  split; auto.
  assert (ALPOS: 0 < cpy_al chk). {
    lapply (is_align_pos (cpy_al chk)); auto with zarith.
  }
  assert (BALPOS: 0 < bal). {
    lapply (is_align_pos bal); auto with zarith.
  }
  generalize BSRC.
  rewrite <- !Z.mod_divide; auto with zarith.
  intros X.
  assert (AUX: ((osrc + ofs) mod bal) mod (cpy_al chk) = 0). {
    rewrite X; reflexivity.
  }
  rewrite <- Zmod_div_mod, Z.add_mod in AUX; eauto with zarith.
  rewrite <- Z.mod_divide in SRC; auto with zarith.
  rewrite SRC in AUX; auto with zarith. cbn in AUX.
  rewrite Zmod_mod in AUX. auto.
Qed.

NB: in the concrete semantics, we can find a valid chunk for any size (and any src/dst).
Program Definition find_valid_chk sz bmal bdst odst bsrc osrc: { chk | cpy_sz chk = sz
                                                                  /\ memcpy_args_ok chk bmal bdst odst bsrc osrc } :=
  let mode :=
    if peq bdst bsrc then
      match Z.compare odst osrc with
      | Lt => Some true
      | Eq => None
      | Gt => Some false
      end
    else None
  in cpy mode sz 1 None.
Next Obligation.
  generalize Z.divide_1_l, Z.compare_eq.
  unfold memcpy_args_ok, noover; cbn.
  do 6 (constructor; cbn; auto).
  split. { simplify_option.
    right; left; symmetry; auto.
  }
  split; auto. {
    simplify_option; cbn;
    rewrite ?Zcompare_Gt_Lt_antisym, ?Z.compare_lt_iff in *;
    auto.
  }
Qed.


Lemma best_align_div_bmal sz bmal1 bmal2 al odst osrc ofs bal:
 best_align sz bmal1 al odst osrc ofs bal ->
  ((bmal1 | bmal2)) ->
 best_align sz bmal2 al odst osrc ofs bal.
Proof.
  intros (BALA & BND & ? & BSRC & ? & ?) DIV.
  repeat (constructor; auto).
  etransitivity. 2: eassumption. eauto.
Qed.

Lemma memcpy_args_div_bmal chk bmal1 bmal2 bdst odst bsrc osrc:
  memcpy_args_ok chk bmal1 bdst odst bsrc osrc ->
  ((bmal1 | bmal2)) ->
  memcpy_args_ok chk bmal2 bdst odst bsrc osrc.
Proof.
  intros (ALA & ? & ? & SRC & ? & ? & ? & BOK) DIV.
  repeat (constructor; auto).
  + etransitivity. 2: eassumption. eauto.
  + rewrite !if_Some_iff in *. intros; eapply best_align_div_bmal; eauto.
Qed.

memcpy with two memories (see explanation above for memmove) NB: this memcpy may be "destructive" like a memmove, but in a controlled way by the compiler !

Definition memcpy chk bmal bdst odst bsrc osrc rm wm: option mem :=
  ASSERT decide (memcpy_args_ok chk bmal bdst odst bsrc osrc) IN
  memmove (cpy_sz chk) bdst odst bsrc osrc rm wm
  .

Local Opaque is_align.

Lemma memcpy_align chk bmal bdst odst bsrc osrc rm m1 m2 :
  memcpy chk bmal bdst odst bsrc osrc rm m1 = Some m2 ->
  Mem.mem_align m2 = Mem.mem_align m1.
Proof.
  unfold memcpy.
  set (P:=memcpy_args_ok chk _ _ _ _ _).
  destruct (decide P) eqn: DEC; [ exploit (Decidable_sound P); eauto | try_simplify_someHyps ].
  intros; eapply memmove_align; eauto.
Qed.

Lemma memcpy_Some_equiv chk bmal bdst odst bsrc osrc rm m1 m2:
  memcpy chk bmal bdst odst bsrc osrc rm m1 = Some m2 <-> (memcpy_args_ok chk bmal bdst odst bsrc osrc /\ memmove (cpy_sz chk) bdst odst bsrc osrc rm m1 = Some m2).
Proof.
  unfold memcpy.
  set (P:=memcpy_args_ok chk _ _ _ _ _).
  split.
  - destruct (decide P) eqn: DEC; [ exploit (Decidable_sound P); eauto | try_simplify_someHyps ].
  - intros (OK & MOVE). rewrite (Decidable_complete P); auto.
Qed.

Lemma memcpy_Some_intros chk bmal bdst odst bsrc osrc rm m1 m2:
  memmove (cpy_sz chk) bdst odst bsrc osrc rm m1 = Some m2 ->
  memcpy_args_ok chk bmal bdst odst bsrc osrc ->
  memcpy chk bmal bdst odst bsrc osrc rm m1 = Some m2.
Proof.
  rewrite memcpy_Some_equiv.
  tauto.
Qed.

Lemma memcpy_None_equiv chk bmal bdst odst bsrc osrc rm m:
  memcpy chk bmal bdst odst bsrc osrc rm m = None <-> (memcpy_args_ok chk bmal bdst odst bsrc osrc -> memmove (cpy_sz chk) bdst odst bsrc osrc rm m = None).
Proof.
  unfold memcpy.
  set (P:=memcpy_args_ok chk _ _ _ _ _).
  split.
  - intros; rewrite (Decidable_complete P) in *; auto.
  - destruct (decide P) eqn: DEC; [ exploit (Decidable_sound P); eauto | try_simplify_someHyps ].
Qed.

Lemma memcpy_ok_move chk bmal bdst odst bsrc osrc rm m1:
  memcpy_args_ok chk bmal bdst odst bsrc osrc ->
  memcpy chk bmal bdst odst bsrc osrc rm m1 = memmove (cpy_sz chk) bdst odst bsrc osrc rm m1.
Proof.
  destruct memcpy eqn: MEMCPY.
  + rewrite memcpy_Some_equiv in MEMCPY. intuition congruence.
  + rewrite memcpy_None_equiv in MEMCPY. intuition congruence.
Qed.

Lemma perm_memcpy_iff [chk bmal bdst odst bsrc osrc rm m1 m2]:
  memcpy chk bmal bdst odst bsrc osrc rm m1 = Some m2 ->
  forall (b' : Values.block) (ofs' : Z) (k : perm_kind) (p : permission),
  Mem.perm m1 b' ofs' k p <-> Mem.perm m2 b' ofs' k p.
Proof.
  rewrite memcpy_Some_equiv. intros (OK & MOVE). intros; eapply memmove_perm_preserv; eauto.
Qed.

Lemma range_perm_memcpy_iff [chk bmal bdst odst bsrc osrc rm m1 m2]:
  memcpy chk bmal bdst odst bsrc osrc rm m1 = Some m2 ->
  forall (b' : Values.block) (lo hi: Z) (k : perm_kind) (p : permission),
  Mem.range_perm m1 b' lo hi k p <-> Mem.range_perm m2 b' lo hi k p.
Proof.
  intros; unfold Mem.range_perm. setoid_rewrite perm_memcpy_iff at 1; eauto.
  reflexivity.
Qed.

Lemma memcpy_store_diamond cchk bmal bdst odst bsrc osrc chk b o v rm m m0 m1
  (MCPY: memcpy cchk bmal bdst odst bsrc osrc rm m = Some m0)
  (STORE: Mem.store chk m b o v = Some m1):
  let len := size_chunk chk in
  disjoint bdst odst (Zpos (cpy_sz cchk)) b o len ->
  exists m3, Mem.store chk m0 b o v = Some m3 /\ memcpy cchk bmal bdst odst bsrc osrc rm m1 = Some m3.
Proof.
  intros. rewrite memcpy_Some_equiv in MCPY.
  destruct MCPY as (OK & MOVE).
  rewrite memcpy_ok_move; auto.
  eapply memmove_store_diamond; eauto.
Qed.

Lemma memcpy_diamond chk0 bmal0 bdst0 odst0 bsrc0 osrc0 rm0 wm wm0 chk1 bmal1 bdst1 odst1 bsrc1 osrc1 rm1 wm1
  (MCPY0: memcpy chk0 bmal0 bdst0 odst0 bsrc0 osrc0 rm0 wm = Some wm0)
  (MCPY1: memcpy chk1 bmal1 bdst1 odst1 bsrc1 osrc1 rm1 wm = Some wm1):
  disjoint bdst0 odst0 (Zpos (cpy_sz chk0)) bdst1 odst1 (Zpos (cpy_sz chk1)) ->
  exists wm2, memcpy chk1 bmal1 bdst1 odst1 bsrc1 osrc1 rm1 wm0 = Some wm2 /\ memcpy chk0 bmal0 bdst0 odst0 bsrc0 osrc0 rm0 wm1 = Some wm2.
Proof.
  intros. rewrite !memcpy_Some_equiv in *.
  destruct MCPY0 as (OK0 & MOVE0).
  destruct MCPY1 as (OK1 & MOVE1).
  rewrite !memcpy_ok_move; auto.
  eapply memmove_diamond; eauto.
Qed.


Lemma memcpy_skip chk bmal b o m1 m2:
  memcpy chk bmal b o b o m1 m1 = Some m2 -> m2 = m1.
Proof.
  intros H; rewrite memcpy_Some_equiv in H.
  destruct H as (_ & H).
  eapply memmove_skip; eauto.
Qed.


Import ListNotations.

Definition bmal_of (align: block -> Z) (bdst bsrc: block) : Z :=
  Z.gcd (align bdst) (align bsrc).

Lemma bmal_of_preserv m1 m2 bdst odst bsrc osrc:
  (forall b ofs k p, Mem.perm m1 b ofs k p -> (Mem.block_align m1 b | Mem.block_align m2 b)) ->
  (Mem.perm m1 bsrc osrc Cur Readable) ->
  (Mem.perm m1 bdst odst Cur Writable) ->
  (bmal_of (Mem.block_align m1) bdst bsrc | bmal_of (Mem.block_align m2) bdst bsrc).
Proof.
  intros PRESERV SRC DST.
  set (a := bmal_of (Mem.block_align m1) bdst bsrc).
  unfold bmal_of in *.
  apply Z.gcd_greatest; etransitivity; eauto.
  - eapply Z.gcd_divide_l.
  - eapply Z.gcd_divide_r.
Qed.


Lemma memmove_memcpy_args_preserv chk bdst odst bsrc osrc m1 bytes m2 m0:
  memcpy_args_ok chk (bmal_of (Mem.block_align m1) bdst bsrc) bdst odst bsrc osrc ->
  Mem.loadbytes m1 bsrc osrc (Z.pos (cpy_sz chk)) = Some bytes ->
  Mem.storebytes m1 bdst odst bytes = Some m0 ->
  (forall b ofs k p, Mem.perm m1 b ofs k p -> (Mem.block_align m1 b | Mem.block_align m2 b)) ->
  memcpy_args_ok chk (bmal_of (Mem.block_align m2) bdst bsrc) bdst odst bsrc osrc.
Proof.
  intros;
  eapply memcpy_args_div_bmal; eauto.
  eapply bmal_of_preserv; eauto.
  + eapply Mem.loadbytes_range_perm; eauto.
    split. reflexivity. lia.
  + eapply Mem.storebytes_range_perm; eauto.
    split. reflexivity.
    erewrite Mem.loadbytes_length; eauto. cbn. rewrite positive_nat_Z.
    lia.
Qed.

Definition memcpyv (chk: cpychunk) (align: block -> Z) (args: list val) (rm wm: mem) : option mem :=
  match args with
  | [Vptr bdst odst;Vptr bsrc osrc] => memcpy chk (bmal_of align bdst bsrc) bdst (Ptrofs.unsigned odst) bsrc (Ptrofs.unsigned osrc) rm wm
  | _ => None
  end.

Lemma memcpyv_perm_preserv chk align vargs rm m1 m2
  (MEMCPY: memcpyv chk align vargs rm m1 = Some m2)
  b ofs k p:
  Mem.perm m1 b ofs k p <-> Mem.perm m2 b ofs k p.
Proof.
  revert MEMCPY; unfold memcpyv; repeat autodestruct.
  intros; eapply perm_memcpy_iff; eauto.
Qed.

Lemma memcpyv_align chk align vargs rm m1 m2
  (MEMCPY: memcpyv chk align vargs rm m1 = Some m2):
  Mem.mem_align m2 = Mem.mem_align m1.
Proof.
  revert MEMCPY; unfold memcpyv; repeat autodestruct.
  intros. eapply memcpy_align; eauto.
Qed.

Definition memcpy_run (chk: cpychunk) (args: list val) (m: mem) : option mem :=
  match args with
  | [Vptr bdst odst;Vptr bsrc osrc] => memcpy chk (bmal_of (Mem.block_align m) bdst bsrc) bdst (Ptrofs.unsigned odst) bsrc (Ptrofs.unsigned osrc) m m
  | _ => None
  end.

Local Opaque Ptrofs.max_unsigned Decidable_witness.

Lemma memcpy_extends chk vargs m1 m2 m1' vargs'
  (MEMCPY: memcpy_run chk vargs m1 = Some m2)
  (MEMEXT: Mem.extends m1 m1')
  (ARGLESS: Val.lessdef_list vargs vargs'):
  exists (m2' : mem),
    memcpy_run chk vargs' m1' = Some m2' /\
    Mem.extends m2 m2'.
Proof.
  revert MEMCPY; unfold memcpy_run, memcpy; do 6 autodestruct.
  intros; subst.
  inv ARGLESS. inv H1. inv H3. inv H1. inv H4.
  erewrite (decide_impl EQ).
  + rewrite EQ.
    feapply memmove_extends.
  + intros. revert MEMCPY.
    unfold memmove; autodestruct.
    intros; eapply memmove_memcpy_args_preserv; eauto.
    intros. exploit Mem.mi_base_align; eauto.
    eapply Mem.mext_inj; eauto.
    - unfold inject_id; eauto.
    - unfold Mem.block_align; intros (_ & ?); auto.
Qed.

Lemma memcpy_perm_preserv chk vargs m1 m2
  (MEMCPY: memcpy_run chk vargs m1 = Some m2)
  b ofs k p:
  Mem.perm m1 b ofs k p <-> Mem.perm m2 b ofs k p.
Proof.
  exploit memcpyv_perm_preserv; eauto.
Qed.

Lemma memcpy_perm_preserv_valid chk vargs m1 m2
  (MEMCPY: memcpy_run chk vargs m2 = None)
  (PERM_PRESERV: forall b ofs k p, Mem.perm m1 b ofs k p -> Mem.perm m2 b ofs k p)
  (ALIGN_PRESERV: Mem.block_align m2 = Mem.block_align m1)
  : memcpy_run chk vargs m1 = None.
Proof.
  revert MEMCPY; unfold memcpy_run. repeat autodestruct. intros; subst.
  rewrite !memcpy_None_equiv in *. intros; lapply MEMCPY; auto.
  clear MEMCPY H. intros MEMCPY.
  destruct (memmove _ _ _ _ _ m1) eqn: C1; auto.
  exploit memmove_range_perm; eauto.
  intros (READ & WRITE).
  exploit range_perm_memmove.
  3: { intros (? & C). rewrite C in MEMCPY. congruence. }
  1-2: (intros ? ?; eauto).
  rewrite ALIGN_PRESERV; auto.
Qed.

Lemma memcpy_perm_preserv_valid_equiv chk vargs m1 m2
  (PERM_PRESERV: forall b ofs k p, Mem.perm m1 b ofs k p <-> Mem.perm m2 b ofs k p)
  (ALIGN_PRESERV: Mem.block_align m2 = Mem.block_align m1):
  memcpy_run chk vargs m1 <> None <-> memcpy_run chk vargs m2 <> None.
Proof.
  destruct (memcpy_run chk vargs m1) eqn: H1.
  + destruct (memcpy_run chk vargs m2) eqn: H2; intuition try congruence.
    erewrite memcpy_perm_preserv_valid in H1; eauto.
    eapply PERM_PRESERV.
  + erewrite memcpy_perm_preserv_valid; eauto; intuition try congruence.
    eapply PERM_PRESERV; auto.
Qed.

Realignment


Definition set_bal chk bmal odst osrc :=
  let base_al := al_Z mlal bmal in
  let gal := al_Z (al_Z (al_pos base_al (cpy_sz chk)) odst) osrc in
  let al := two_power_nat gal in
  let log2_sz := bpos_log2 base_al (cpy_sz chk) in
  let bal := two_power_nat (al_Z log2_sz (odst - osrc)) in
  let rem := osrc mod bal in
  let ofs := if zeq rem 0 then 0 else bal - rem in
  let obal :=
   if zle bal al then None
   else if zle (bal+ofs) (Zpos (cpy_sz chk))
   then Some (ofs, bal)
   else if zle (4*al) bal
   then let bal := Z.div2 bal in
        let rem := osrc mod bal in
        Some (if zeq rem 0 then 0 else bal - rem, bal)
   else None
  in cpy (cpy_mode chk) (cpy_sz chk) al obal.

Local Opaque two_power_nat Z.add Z.mul al_Z al_pos bpos_log2.

Lemma memcpy_args_ok_set_bal chk xbmal bmal bdst odst bsrc osrc:
  memcpy_args_ok chk bmal bdst odst bsrc osrc ->
  (xbmal | bmal) ->
  memcpy_args_ok (set_bal chk xbmal odst osrc) bmal bdst odst bsrc osrc.
Proof.
  intros (_ & SZ & MAXAL & _ & _ & NOOV & MAYOV & _) XBMAL.
  unfold memcpy_args_ok, set_bal. cbn.
  assert (POS: forall n, 0 < two_power_nat n). {
    intros; specialize (two_power_nat_pos n); auto with zarith.
  }
  assert (POS1: forall n, 1 <= two_power_nat n). {
    intros; specialize (POS n); auto with zarith.
  }
  set (base_al := al_Z mlal xbmal).
  assert (BASE_AL: (base_al <= mlal)%nat). {
    apply al_Z_bnd.
  }
  set (gal := al_Z (al_Z (al_pos base_al (cpy_sz chk)) odst) osrc).
  assert (GAL_BND: (gal <= base_al)%nat). {
    do 2 (etransitivity; [ apply al_Z_bnd | idtac ]).
    apply al_pos_bnd.
  }
  set (al := two_power_nat gal).
  assert (ALA: is_align al). {
    eapply two_power_nat_is_align; eauto with arith.
  }
  assert (ASRC: (al | osrc)). {
     apply al_Z_div.
  }
  set (log2_sz := bpos_log2 base_al (cpy_sz chk)).
  set (bal := two_power_nat (al_Z log2_sz (odst - osrc))).
  set (rem := osrc mod bal).
  set (ofs := if zeq rem 0 then 0 else bal - rem).
 (do 7 try split); auto.
  - eapply Z.divide_trans.
    2: eapply al_pos_div.
    apply two_power_nat_less_div.
    do 2 (etransitivity; [ apply al_Z_bnd | idtac ]); eauto with zarith.
  - etransitivity. eapply two_power_nat_less_div.
      etransitivity. apply al_Z_bnd.
        etransitivity. apply al_Z_bnd.
                       apply al_pos_bnd.
    etransitivity. eapply al_Z_div. auto.
  - eapply Z.divide_trans.
     2: eapply al_Z_div.
     apply two_power_nat_less_div.
     (etransitivity; [ apply al_Z_bnd | idtac ]); eauto with zarith.
  - case (zle bal al); cbn; auto.
    intros GT.
    assert (BAL: is_align bal). {
      eapply two_power_nat_is_align.
      etransitivity. apply al_Z_bnd.
      etransitivity. apply bpos_log2_bnd.
      auto.
    }
    assert (BZ: bal <> 0). {
      intros; specialize (POS (al_Z log2_sz (odst - osrc))); lia.
    }
    assert (AZ: al <> 0). {
      intros; specialize (POS gal); lia.
    }
    assert (BSZ: bal <= Zpos (cpy_sz chk)). {
      etransitivity. 2: eapply bpos_log2_le.
      apply two_power_nat_monotonic, al_Z_bnd.
    }
    assert (BDIV: odst mod bal = rem). {
      assert (X: (bal | odst - osrc)). {
        apply al_Z_div.
      }
      rewrite <- Z.mod_divide, Z_sub_mod_eq_zero in X; auto.
    }
    assert (ABAL: (al | bal)). {
      unfold bal, al in *. rewrite two_power_nat_div_le.
      lia.
    }
    assert (OFAL: (al | ofs)). {
      unfold ofs. autodestruct; intros _.
      + apply Z.divide_0_r.
      + apply Z.divide_sub_r; auto.
        unfold rem, al, bal in *.
        rewrite <- Z.mod_divide, <- Zmod_div_mod, Z.mod_divide;
        auto with zarith.
    }
    assert (OFBND: 0 <= ofs < bal). {
      unfold ofs. autodestruct.
      + unfold bal; auto with zarith.
      + intros _. lapply (Z.mod_pos_bound osrc bal). 2: apply POS.
        unfold rem in *. lia.
    }
    assert (OFBAL: ofs + al <= bal). {
      apply (Z_divide_le_reduce al ofs bal al); unfold al; lia || auto with zarith.
    }
    assert (BALD: forall x, rem = (x mod bal) -> (bal | x + ofs)). {
      unfold ofs. intros x EQx. autodestruct; intros _.
      * unfold bal in *; replace (x + 0) with x by (auto with zarith).
        rewrite <- Z.mod_divide; congruence.
      * replace (x + (bal - rem)) with ((x - rem) + bal) by lia.
        apply Z.divide_add_r; auto. 2: apply Z.divide_refl.
        unfold rem, bal in *.
        rewrite <- Z.mod_divide, EQx, Zminus_mod_idemp_r; auto with zarith.
        replace (x - x) with 0; auto. lia.
    }
    unfold best_align.
    assert (BAL_BMAL: (bal | bmal)). {
      etransitivity. eapply two_power_nat_less_div.
        etransitivity. apply al_Z_bnd.
                       apply bpos_log2_bnd.
        etransitivity. eapply al_Z_div. auto.
    }
    case (zle _ _).
    { intros OK; cbn; do 5 try (split; lia || auto). }
    intros _. case (zle _ _); cbn; auto.
    intros OK.
    set (bal0 := Z.div2 bal).
    set (rem0 := osrc mod bal0).
    set (ofs0 := if zeq rem0 0 then 0 else bal0 - rem0).
    destruct (al_Z log2_sz (odst - osrc)) as [|log2_bal0] eqn: LBAL0. {
      (* absurd case *)
      revert OK; unfold bal. rewrite two_power_nat_O.
      apply is_align_pos in ALA; lia.
    }
    assert (BAL0: bal0 = two_power_nat log2_bal0). {
     unfold bal0, bal. Transparent two_power_nat. cbn; auto.
     Local Opaque two_power_nat.
    }
    assert (BAL_BAL0: bal = 2 * bal0). {
      unfold bal. rewrite two_power_nat_S, <- BAL0; auto.
    }
    assert (BAL0_A: is_align bal0). {
      eapply is_align_divides. apply BAL. lia.
      rewrite BAL0. apply two_power_nat_less_div; auto with arith.
    }
    assert (BZ0: bal0 <> 0). {
      intros; specialize (POS log2_bal0); lia.
    }
    assert (BDIV0: odst mod bal0 = rem0). {
      assert (X: (bal0 | odst - osrc)). {
        etransitivity. 2: apply al_Z_div.
        revert BAL_BAL0. unfold bal. rewrite <- LBAL0.
        intros ->. apply Z.divide_factor_r.
      }
      rewrite <- Z.mod_divide, Z_sub_mod_eq_zero in X; auto.
    }
    assert (ABAL0: (al | bal0)). {
      eapply is_align_le_divides; eauto. lia.
    }
    assert (OFAL0: (al | ofs0)). { (* TODO: remove copy-paste *)
      unfold ofs0. autodestruct; intros _.
      + apply Z.divide_0_r.
      + apply Z.divide_sub_r; auto.
        unfold rem0, al in *.
        rewrite <- Z.mod_divide, <- Zmod_div_mod, Z.mod_divide;
        lia || auto.
    }
    assert (OFBND0: 0 <= ofs0 < bal0). {(* TODO: remove copy-paste *)
      unfold ofs0. autodestruct.
      + unfold bal; lia || auto.
      + intros _. lapply (Z.mod_pos_bound osrc bal0).
         2: rewrite BAL0; apply POS.
        unfold rem0 in *. lia.
    }
    assert (OFBAL0: ofs0 + al <= bal0). { (* TODO: remove copy-paste *)
      apply (Z_divide_le_reduce al ofs0 bal0 al); auto with zarith.
      unfold al; auto with zarith.
    }
    assert (BALD0: forall x, rem0 = (x mod bal0) -> (bal0 | x + ofs0)). { (* TODO: remove copy-paste *)
      unfold ofs0. intros x EQx. autodestruct; intros _.
      * rewrite BAL0; replace (x + 0) with x by (auto with zarith).
        rewrite <- Z.mod_divide; congruence.
      * replace (x + (bal0 - rem0)) with ((x - rem0) + bal0) by lia.
        apply Z.divide_add_r; auto. 2: apply Z.divide_refl.
        unfold rem0 in *. rewrite BAL0 in *.
        rewrite <- Z.mod_divide, EQx, Zminus_mod_idemp_r; auto with zarith.
        replace (x - x) with 0; auto with zarith.
    }
    do 5 try (split; lia || auto with zarith).
    etransitivity. 2: eapply BAL_BMAL.
    rewrite BAL_BAL0. auto with zarith.
Qed.
Global Opaque set_bal.


Lemma memcpy_set_bal chk bmal bdst odst bsrc osrc m m':
  memcpy_run chk [Vptr bdst odst; Vptr bsrc osrc] m = Some m' ->
  (bmal | bmal_of (Mem.block_align m) bdst bsrc) ->
  memcpy_run (set_bal chk bmal (Ptrofs.unsigned odst) (Ptrofs.unsigned osrc)) [Vptr bdst odst; Vptr bsrc osrc] m = Some m'.
Proof.
  cbn. rewrite memcpy_Some_equiv.
  intros (OK & MOVE) BMAL.
  rewrite memcpy_ok_move; cbn; auto.
  apply memcpy_args_ok_set_bal; auto.
Qed.




Axiom update_al_warning: Z -> Z -> bool.
Extract Constant update_al_warning => "RTLcommonaux.memcpy_update_al_warning".

Definition update_bal chk bmal odst osrc :=
  let opt := set_bal chk bmal odst osrc in
  if zle (cpy_al chk) (cpy_al opt)
  then opt
  else if update_al_warning (cpy_al chk) (cpy_al opt) then opt else chk.

Lemma memcpy_update_bal chk bmal bdst odst bsrc osrc m m':
  memcpy_run chk [Vptr bdst odst; Vptr bsrc osrc] m = Some m' ->
  (bmal | bmal_of (Mem.block_align m) bdst bsrc) ->
  memcpy_run (update_bal chk bmal (Ptrofs.unsigned odst) (Ptrofs.unsigned osrc)) [Vptr bdst odst; Vptr bsrc osrc] m = Some m'.
Proof.
  intros; unfold update_bal; destruct zle.
  - intros; apply memcpy_set_bal; auto.
  - intros; autodestruct. intros; apply memcpy_set_bal; auto.
Qed.


Rewriting lemmas to split memcpy with realignment

Lemma size_offset_representable sz ofs:
   Zpos sz <= Ptrofs.max_unsigned - Ptrofs.unsigned ofs ->
   Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (Zpos sz))) = Ptrofs.unsigned ofs + (Zpos sz).
Proof.
  generalize (Ptrofs.unsigned_range ofs). intros.
  unfold Ptrofs.add.
  rewrite (Ptrofs.unsigned_repr (Zpos sz)), Ptrofs.unsigned_repr; auto.
  all: lia.
Qed.


Lemma Z_divide_add_lr n m p: (n | m + p) -> (n | m) -> (n | p).
Proof.
  intros.
  replace p with (m + p - m).
  - auto with zarith.
  - lia.
Qed.

Lemma Z_divide_add_ll n m p: (n | p + m) -> (n | m) -> (n | p).
Proof.
  intros.
  replace p with (p + m - m).
  - auto with zarith.
  - lia.
Qed.


Local Hint Resolve Z_divide_add_lr Z_divide_add_ll: zarith.

Definition atomic (sz: positive): cpychunk :=
  cpy None sz (Zpos sz) None.

Lemma memcpy_args_ok_atomic sz bmal bdst odst bsrc osrc:
  is_align (Zpos sz) ->
  (Zpos sz | odst) ->
  (Zpos sz | osrc) ->
  (Zpos sz | bmal) ->
  memcpy_args_ok (atomic sz) bmal bdst odst bsrc osrc.
Proof.
  intros ALIGN DST SRC.
  unfold memcpy_args_ok, atomic; cbn.
  specialize (Z.divide_refl (Zpos sz)).
  intuition (try congruence).
  eapply noover_trivial; eauto with zarith.
Qed.

Definition split chk ofs bal: positive * option cpychunk :=
  let al := Z.to_pos ofs in
  (al, match Zpos (cpy_sz chk) - ofs with
       | Z0 | Zneg _ => None
       | Zpos sz => Some (cpy (cpy_mode chk) sz (cpy_al chk) bal)
       end).

Definition select_split chk: positive * option cpychunk :=
  match cpy_bal chk with
  | None => split chk (cpy_al chk) None
  | Some (ofs, bal) as obal =>
    if zeq ofs 0 then
      let sz := Zpos (cpy_sz chk) in
      let new_obal :=
        if zle (2*bal) sz
        then obal
        else Some (0, two_power_nat (bpos_log2 mlal (Z.to_pos (sz - bal))))
      in split chk bal new_obal
    else
      let nal := two_power_nat (al_Z mlal ofs) in
      let nofs := ofs - nal in
      split chk nal (Some (nofs, bal))
    end.

Lemma select_split_atomic chk sz bmal bdst odst bsrc osrc:
  memcpy_args_ok chk bmal bdst odst bsrc osrc ->
  sz = fst (select_split chk) ->
  is_align (Zpos sz) /\ (Zpos sz | odst) /\ (Zpos sz | osrc) /\ (Zpos sz | bmal).
Proof.
  intros (ALIGN & ? & ? & ? & ? & _ & _ & BAL).
  unfold select_split; do 2 (try autodestruct); cbn in *.
  autodestruct; cbn in *.
  - (* [bal] splits *)
    intros _ ? ?; subst.
    destruct BAL as (BALIGN & ? & ? & ? & ? & ?).
    intros ?.
    replace z0 with (Zpos sz) in *. 2: {
       exploit is_align_Zpos. apply BALIGN. congruence.
    }
    rewrite !Z.add_0_r in *. auto.
  - (* [bal] not yet reached *)
    intros _ ? ?; subst.
    destruct BAL as (BALIGN & ? & ? & ? & ? & ?).
    set (lal := al_Z mlal z).
    intros EQ.
    replace (Zpos sz) with (two_power_nat lal) in *. 2: {
       rewrite EQ, two_power_nat_Zpos. lia.
    }
    clear sz EQ.
    apply is_align_two_power_nat in BALIGN.
    destruct BALIGN as (lbal & BNDlb & EQ).
    subst.
    assert (DIV1:(two_power_nat lal | two_power_nat lbal)). {
      rewrite two_power_nat_div_le.
      transitivity z. 2: { apply is_align_pos in ALIGN; lia. }
      apply Z.divide_pos_le. lia.
      apply al_Z_div.
    }
    do 3 (try split).
    + eapply two_power_nat_is_align.
      apply al_Z_bnd.
    + eapply Z_divide_add_ll.
      * eapply Z.divide_trans; eauto.
      * apply al_Z_div.
    + eapply Z_divide_add_ll.
      * eapply Z.divide_trans; eauto.
      * apply al_Z_div.
    + eapply Z.divide_trans; eauto.
  - (* [al] split *)
    exploit is_align_Zpos; eauto.
    intros EQ _ EQ0. rewrite <- EQ0, <- EQ in *.
    eauto.
Qed.

Lemma select_split_None chk bmal sz bdst odst bsrc osrc:
  memcpy_args_ok chk bmal bdst odst bsrc osrc ->
  select_split chk = (sz, None) ->
  sz = cpy_sz chk.
Proof.
  intros (? & SZ & ? & _ & ? & _ & _ & BAL).
  unfold select_split, split; repeat autodestruct; cbn in *.
  - (* normal [bal] split: 0 *)
    intros ? _ ? ? P; subst. inv P.
    replace z0 with (Z.pos (cpy_sz chk)). 2: lia.
    apply Pos2Z.id.
  - (* normal [bal] split: neg *)
    unfold best_align in *. lia.
  - (* end [bal] split: 0 *)
    intros ? _ ? ? P; subst. inv P.
    replace (two_power_nat (al_Z mlal z)) with (Z.pos (cpy_sz chk)). 2: lia.
    apply Pos2Z.id.
  - (* end [bal] split: neg *)
    destruct BAL as (BALIGN & ? & ? & ? & ?).
    assert (X: two_power_nat (al_Z mlal z) <= z). {
      apply Z.divide_pos_le. lia.
      apply al_Z_div.
    }
    apply is_align_pos in BALIGN. lia.
  - (* [al] split: 0 *)
    intros ? ? P. inv P.
    replace (cpy_al chk) with (Z.pos (cpy_sz chk)). 2: lia.
    apply Pos2Z.id.
  - (* [al] split: neg *)
    intros.
    apply Z.divide_pos_le in SZ; lia.
Qed.

Lemma memcpy_select_split_None chk sz args m m'
   (SELECT: select_split chk = (sz, None))
   (MEMCPY: memcpy_run chk args m = Some m')
   : Some m' = memcpy_run (atomic sz) args m.
Proof.
  revert MEMCPY. unfold memcpy_run; do 5 autodestruct. intros; subst.
  revert MEMCPY. rewrite memcpy_Some_equiv; cbn.
  intros (OK & MOVE).
  feapply select_split_atomic.
  rewrite SELECT in *. cbn in *.
  eapply select_split_None in SELECT; eauto.
  subst. rewrite memcpy_ok_move; eauto.
  apply memcpy_args_ok_atomic; auto.
Qed.

Lemma memcpy_args_ok_split chk bmal sz chk2 bdst odst bsrc osrc:
  select_split chk = (sz, Some chk2) ->
  memcpy_args_ok chk bmal bdst odst bsrc osrc ->
  memcpy_args_ok (atomic sz) bmal bdst odst bsrc osrc /\
  (cpy_sz chk = sz + cpy_sz chk2)%positive /\
  memcpy_args_ok chk2 bmal bdst (odst + Zpos sz) bsrc (osrc + Zpos sz).
Proof.
  intros SELECT OK.
  exploit select_split_atomic; eauto.
  intros (AL1 & DST1 & SRC1 & BMAL).
  lapply (memcpy_al_divides_best_align chk bmal bdst odst bsrc osrc); auto.
  intros ALDIV.
  destruct OK as (ALIGN & SZ & BMAL0 & DST & SRC & NOOV & MOV & BAL).
  rewrite SELECT in *. cbn in *.
  split. eapply memcpy_args_ok_atomic; eauto.
  revert SELECT.
  unfold select_split, split; repeat autodestruct; cbn in *.
  - (* [bal] loop *)
    intros _ ? _ ? ? P; subst. inv P; cbn.
    feapply ALDIV.
    destruct BAL as (BALIGN & ? & ? & ? & ? & ?).
    lapply (is_align_pos z0); eauto with zarith.
    intros POSz0; destruct z0 as [| p |]; try lia.
    unfold memcpy_args_ok, noover; cbn in *.
    assert (EQsz: cpy_sz chk = (p + p0)%positive) by lia.
    repeat (split; auto with zarith).
    + rewrite <- EQ. auto with zarith.
    + intros; destruct NOOV; lia || auto.
    + destruct (cpy_mode chk) as [ [ | ] | ]; cbn in *; lia.
  - (* end [bal] loop *)
    intros _ ? _ ? ? P; subst. inv P; cbn.
    feapply ALDIV.
    destruct BAL as (BALIGN & ? & ? & ? & ? & ?).
    lapply (is_align_pos z0); eauto with zarith.
    intros POSz0; destruct z0 as [| p |]; try lia.
    unfold memcpy_args_ok, noover; cbn in *.
    assert (EQsz: cpy_sz chk = (p + p0)%positive) by lia.
    assert (DIV0: (cpy_al chk | Z.pos p0)). {
      rewrite <- EQ; auto with zarith.
    }
    set (bal0:=two_power_nat (bpos_log2 mlal p0)).
    assert (DIV1: (bal0 | Zpos p)). {
       destruct (is_align_two_power_nat (Z.pos p))
         as (bal & _ & EQbal); eauto.
       unfold bal0; rewrite EQbal, two_power_nat_div_le.
       etransitivity.
       + apply bpos_log2_le.
       + lia.
    }
    repeat (split; auto with zarith).
    + intros; destruct NOOV; lia || auto.
    + destruct (cpy_mode chk) as [ [ | ] | ]; cbn in *; lia.
    + apply two_power_nat_is_align.
      apply bpos_log2_bnd.
    + destruct (is_align_two_power_nat (cpy_al chk))
       as (al0 & LEAL0 & EQal0); eauto.
      assert (X: two_power_nat al0 <= bal0). 2: lia.
      eapply two_power_nat_monotonic.
      etransitivity.
      2: eapply al_pos_bpos_log2.
      specialize (al_Z_two_power_nat mlal al0).
      rewrite Nat.min_r; auto.
      intros X; eapply al_Z_div_mono in DIV0.
      rewrite EQal0, X in DIV0; auto.
    + replace (0 + bal0) with bal0 by lia.
      apply bpos_log2_le.
    + assert (bal0 | osrc); auto with zarith.
      eapply Z.divide_trans; eauto.
    + assert (bal0 | odst); auto with zarith.
      eapply Z.divide_trans; eauto.
    + etransitivity; eassumption.
  - (* [bal] not yet reached *)
    intros ? _ ? ? P; subst. inv P; cbn.
    destruct BAL as (BALIGN & ? & ? & ? & ? & ?).
    exploit ALDIV; eauto. clear ALDIV.
    intros (? & DIV).
    specialize (two_power_nat_pos (al_Z mlal z)).
    intros POSp.
    destruct (two_power_nat (al_Z mlal z)) as [ | p | ] eqn: EQp; try lia; cbn in *.
    unfold memcpy_args_ok, noover; cbn in *.
    assert (EQsz: cpy_sz chk = (p + p0)%positive) by lia.
    assert (DIV0: (cpy_al chk | Zpos p)). {
      destruct (is_align_two_power_nat (cpy_al chk))
       as (al0 & LEAL0 & EQal0); eauto.
      specialize (al_Z_two_power_nat mlal al0).
      rewrite Nat.min_r; auto.
      intros X; eapply al_Z_div_mono in DIV.
      rewrite EQal0, X in DIV.
      rewrite <- EQp, EQal0. apply two_power_nat_less_div; auto.
    }
    repeat (split; auto with zarith).
    + rewrite <- EQ. auto with zarith.
    + intros; destruct NOOV; lia || auto.
    + destruct (cpy_mode chk) as [ [ | ] | ]; cbn in *; lia.
    + assert (Z.pos p <= z); auto with zarith.
      apply Z.divide_pos_le; auto with zarith.
      rewrite <- EQp. apply al_Z_div.
    + replace (osrc + Z.pos p + (z - Z.pos p)) with (osrc + z); auto with zarith.
    + replace (odst + Z.pos p + (z - Z.pos p)) with (odst + z); auto with zarith.
  - (* [al] split *)
    intros ? ? P. inv P; cbn.
    lapply (is_align_pos (cpy_al chk)); auto.
    intros POSal.
    destruct (cpy_al chk) as [ | al | ] eqn: EQp; try lia; cbn in *.
    unfold memcpy_args_ok, noover; cbn in *.
    assert (EQsz: cpy_sz chk = (al + p)%positive) by lia.
    repeat (split; auto with zarith).
    + rewrite <- EQ. auto with zarith.
    + intros; destruct NOOV; lia || auto.
    + destruct (cpy_mode chk) as [ [ | ] | ]; cbn in *; lia.
Qed.

Lemma memcpy_split_forward chk sz chk2 args m m'
   (SELECT: select_split chk = (sz, Some chk2))
   (MODE: cpy_mode chk <> Some false)
   (MEMCPY: memcpy_run chk args m = Some m')
   : Some m' = SOME m1 <- memcpy_run (atomic sz) args m IN
               memcpy_run chk2 (List.map (fun v => Val.offset_ptr v (Ptrofs.repr (Zpos sz))) args) m1.
Proof.
  revert MEMCPY. unfold memcpy_run; do 5 autodestruct. intros; subst.
  revert MEMCPY. rewrite memcpy_Some_equiv; cbn.
  intros (OK & SIZE).
  generalize SIZE; eapply memmove_max_size in SIZE; eauto. cbn in *.
  generalize OK. eapply memcpy_args_ok_split in OK; eauto.
  destruct OK as (OK1 & EQ & OK2).
  rewrite !EQ in *.
  exploit (@size_offset_representable sz i). { lia. }
  intros REP1.
  exploit (@size_offset_representable sz i0). { lia. }
  intros REP2 OK. rewrite REP1, REP2.
  intros MOVE. autodestruct.
  * intros MOVE0; unfold Mem.block_align; erewrite memcpy_align; eauto.
    revert MOVE0. unfold memcpy; rewrite! Decidable_complete; eauto.
    cbn. rewrite <- MOVE. intros; rewrite memmove_split_forward; eauto.
    + rewrite MOVE0; auto.
    + destruct OK as (? & ? & ? & ? & ? & NOOV & MAYOV); cbn in *.
      destruct (cpy_mode chk) as [[]|]; cbn in *; auto with zarith.
      feapply NOOV. unfold noover; lia.
  * unfold memcpy; rewrite! Decidable_complete; eauto.
    cbn. rewrite <- MOVE. intros; rewrite memmove_split_forward; eauto.
    + rewrite EQ0; auto.
    + destruct OK as (? & ? & ? & ? & ? & NOOV & MAYOV); cbn in *.
      destruct (cpy_mode chk) as [[]|]; cbn in *; auto with zarith.
      feapply NOOV. unfold noover; lia.
Qed.

Lemma memcpy_split_backward chk sz chk2 args m m'
   (SELECT: select_split chk = (sz, Some chk2))
   (MODE: cpy_mode chk = Some false)
   (MEMCPY: memcpy_run chk args m = Some m')
   : Some m' = SOME m1 <- memcpy_run chk2 (List.map (fun v => Val.offset_ptr v (Ptrofs.repr (Zpos sz))) args) m IN
               memcpy_run (atomic sz) args m1.
Proof.
  revert MEMCPY. unfold memcpy_run; do 5 autodestruct. intros; subst.
  revert MEMCPY. rewrite memcpy_Some_equiv; cbn.
  intros (OK & SIZE).
  generalize SIZE; eapply memmove_max_size in SIZE; eauto. cbn in *.
  generalize OK. eapply memcpy_args_ok_split in OK; eauto.
  destruct OK as (OK1 & EQ & OK2).
  rewrite !EQ in *.
  exploit (@size_offset_representable sz i). { lia. }
  intros REP1.
  exploit (@size_offset_representable sz i0). { lia. }
  intros REP2 OK. rewrite REP1, REP2.
  intros MOVE. autodestruct.
  * intros MOVE0; unfold Mem.block_align; erewrite memcpy_align; eauto.
    revert MOVE0. unfold memcpy; rewrite! Decidable_complete; eauto.
    cbn. rewrite <- MOVE. intros; rewrite memmove_split_backward; eauto.
    + rewrite MOVE0; auto.
    + destruct OK as (? & ? & ? & ? & ? & _ & MAYOV & _); cbn in *.
      rewrite MODE in *; cbn in *; auto with zarith.
  * unfold memcpy; rewrite! Decidable_complete; eauto.
    cbn. rewrite <- MOVE. intros; rewrite memmove_split_backward; eauto.
    + rewrite EQ0; auto.
    + destruct OK as (? & ? & ? & ? & ? & _ & MAYOV & _); cbn in *.
      rewrite MODE in *; cbn in *; auto with zarith.
Qed.

If cpy_bal chk = None, we loop on Z.div (cpy_sz chk) (cpy_al chk) splits of size cpy_al chk (exact divison by OK hypothesis)
Lemma select_split_nobal_sz chk bmal bdst odst bsrc osrc
  (OK: memcpy_args_ok chk bmal bdst odst bsrc osrc)
  (NOBAL: cpy_bal chk = None):
  Zpos (fst (select_split chk)) = cpy_al chk.
Proof.
  destruct OK as (ALIGN & _ ).
  unfold select_split, split. rewrite NOBAL.
  lapply (is_align_pos (cpy_al chk)); auto.
  intros POSal.
  destruct (cpy_al chk) as [ | al | ] eqn: EQp;
  try lia; cbn in *; auto.
Qed.

Lemma select_split_nobal_loop chk sz chk2
  (NOBAL: cpy_bal chk = None)
  (SELECT: select_split chk = (sz, Some chk2)):
  cpy_bal chk2 = cpy_bal chk.
Proof.
  revert SELECT; unfold select_split, split.
  rewrite NOBAL. autodestruct.
  intros EQ P; inv P. cbn. auto.
Qed.

On a cpy_bal chk = Some (0, bal), we loop Z.div (cpy_sz chk) bal splits of size bal  (but possibly with remaining split below bal).
Lemma select_split_bal_sz chk bmal bal bdst odst bsrc osrc
  (OK: memcpy_args_ok chk bmal bdst odst bsrc osrc)
  (BAL: cpy_bal chk = Some (0, bal)):
  Zpos (fst (select_split chk)) = bal.
Proof.
  destruct OK as (_ & _ & _ & _ & _ & _ & _ & BALOK).
  revert BALOK; rewrite BAL; cbn.
  destruct 1 as (ALIGN & _ ).
  unfold select_split, split. rewrite BAL.
  lapply (is_align_pos bal); auto.
  intros POSal.
  destruct bal as [ | al | ] eqn: EQp;
  try lia; cbn in *; auto.
Qed.

Lemma select_split_bal_loop chk sz chk2 bal
  (BAL: cpy_bal chk = Some (0, bal))
  (SIZE: 2 * bal <= Zpos (cpy_sz chk))
  (SELECT: select_split chk = (sz, Some chk2)):
  cpy_bal chk2 = cpy_bal chk.
Proof.
  revert SELECT; unfold select_split, split.
  rewrite BAL. repeat (autodestruct; try lia).
  intros _ ? _ P; inv P. cbn; auto.
Qed.

Lemma select_split_sz chk bmal bal bdst odst bsrc osrc
  (OK: memcpy_args_ok chk bmal bdst odst bsrc osrc)
  (BAL: cpy_bal chk = Some (0, bal) \/
        (cpy_bal chk = None /\ cpy_al chk = bal)):
  Zpos (fst (select_split chk)) = bal.
Proof.
  destruct BAL as [ ? | [ ? ? ]]; subst.
  - eapply select_split_bal_sz; eauto.
  - exploit select_split_nobal_sz; eauto.
Qed.

Lemma select_split_keeps_mode:
  forall chk1 chk2 sz
  (SELECT: select_split chk1 = (sz, Some chk2)),
  cpy_mode chk1 = cpy_mode chk2.
Proof.
  intros until sz.
  unfold select_split, split.
  repeat autodestruct; intros; inv SELECT; auto.
Qed.

Lemma select_split_al chk chk2 sz
  (SELECT: select_split chk = (sz, Some chk2)):
  cpy_al chk = cpy_al chk2.
Proof.
  revert SELECT. unfold select_split, split.
  repeat autodestruct; intros;
  inv SELECT; auto.
Qed.