Module Stacklayout


Machine- and ABI-dependent layout information for activation records.

Require Import Coqlib.
Require Import Memory Separation.
Require Import Bounds.
Require Compopts.

The general shape of activation records is as follows, from bottom (lowest offsets) to top: The frame_env compilation environment records the positions of the boundaries between areas in the frame part.

Definition fe_ofs_arg := 0.

Computation of the frame environment from the bounds of the current function.

Definition make_env (b: bounds) :=
  let olink := 4 * b.(bound_outgoing) in
  let ora := olink + 4 in
  let ol := align (ora + 4) 8 in
  let ocs := ol + 4 * b.(bound_local) in
  let ostkdata := align (size_callee_save_area b ocs) 8 in
  let sz := align (ostkdata + b.(bound_stack_data)) 8 in
  {| fe_size := sz;
     fe_ofs_link := olink;
     fe_ofs_retaddr := ora;
     fe_ofs_local := ol;
     fe_ofs_callee_save := ocs;
     fe_stack_data := ostkdata;
     fe_used_callee_save := b.(used_callee_save) |}.

Separation property

Local Open Scope sep_scope.

Lemma frame_env_separated:
  forall b sp m P,
  let fe := make_env b in
  m |= range sp 0 (fe_stack_data fe) ** range sp (fe_stack_data fe + bound_stack_data b) (fe_size fe) ** P ->
  m |= range sp (fe_ofs_local fe) (fe_ofs_local fe + 4 * bound_local b)
       ** range sp fe_ofs_arg (fe_ofs_arg + 4 * bound_outgoing b)
       ** range sp (fe_ofs_link fe) (fe_ofs_link fe + 4)
       ** range sp (fe_ofs_retaddr fe) (fe_ofs_retaddr fe + 4)
       ** range sp (fe_ofs_callee_save fe) (size_callee_save_area b (fe_ofs_callee_save fe))
       ** P.
Proof.
Local Opaque Z.add Z.mul sepconj range.
  intros; simpl.
  set (olink := 4 * b.(bound_outgoing));
  set (ora := olink + 4);
  set (ol := align (ora + 4) 8);
  set (ocs := ol + 4 * b.(bound_local));
  set (ostkdata := align (size_callee_save_area b ocs) 8).
  generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
  assert (0 <= olink) by (unfold olink; lia).
  assert (olink <= ora) by (unfold ora; lia).
  assert (ora + 4 <= ol) by (apply align_le; lia).
  assert (ol + 4 * b.(bound_local) <= ocs) by (unfold ocs; lia).
  assert (ocs <= size_callee_save_area b ocs) by apply size_callee_save_area_incr.
  assert (size_callee_save_area b ocs <= ostkdata) by (apply align_le; lia).
(* Reorder as:
     outgoing
     back link
     retaddr
     local
     callee-save *)

  rewrite sep_swap12.
  rewrite sep_swap23.
  rewrite sep_swap34.
(* Apply range_split and range_split2 repeatedly *)
  unfold fe_ofs_arg.
  apply range_split. lia.
  apply range_split. lia.
  apply range_split_2. fold ol; lia. lia.
  apply range_split. lia.
  apply range_drop_right with ostkdata. lia.
  eapply sep_drop2. eexact H.
Qed.

Lemma frame_env_range:
  forall b,
  let fe := make_env b in
  0 <= fe_stack_data fe /\ fe_stack_data fe + bound_stack_data b <= fe_size fe.
Proof.
  intros; simpl.
  set (olink := 4 * b.(bound_outgoing));
  set (ora := olink + 4);
  set (ol := align (ora + 4) 8);
  set (ocs := ol + 4 * b.(bound_local));
  set (ostkdata := align (size_callee_save_area b ocs) 8).
  generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
  assert (0 <= olink) by (unfold olink; lia).
  assert (olink <= ora) by (unfold ora; lia).
  assert (ora + 4 <= ol) by (apply align_le; lia).
  assert (ol + 4 * b.(bound_local) <= ocs) by (unfold ocs; lia).
  assert (ocs <= size_callee_save_area b ocs) by apply size_callee_save_area_incr.
  assert (size_callee_save_area b ocs <= ostkdata) by (apply align_le; lia).
  split. lia. apply align_le; lia.
Qed.

Lemma frame_env_aligned:
  forall b,
  let fe := make_env b in
     (8 | fe_ofs_arg)
  /\ (8 | fe_ofs_local fe)
  /\ (8 | fe_stack_data fe)
  /\ (4 | fe_ofs_link fe)
  /\ (4 | fe_ofs_retaddr fe).
Proof.
  intros; simpl.
  set (olink := 4 * b.(bound_outgoing));
  set (ora := olink + 4);
  set (ol := align (ora + 4) 8);
  set (ocs := ol + 4 * b.(bound_local));
  set (ostkdata := align (size_callee_save_area b ocs) 8).
  split. apply Z.divide_0_r.
  split. apply align_divides; lia.
  split. apply align_divides; lia.
  unfold ora, olink; auto using Z.divide_mul_l, Z.divide_add_r, Z.divide_refl.
Qed.

Whether Stacking should bundle contiguous same-type callee-save registers into a single Msavecallee/Mrestorecallee instruction. On ARM this is gated by the -fbundle-callee-saves flag (default on); the lowering is in arm/Asmblockgen.v. Other backends keep this false regardless until they implement the lowering. Defined as a Notation (rather than a Definition) so that the OCaml-extracted code re-reads the Clflags reference at every use site, not once at module-load time.
Notation bundle_callee_saves := (Compopts.bundle_callee_saves tt).

Minimum same-type run length to emit Msavecallee/Mrestorecallee instead of falling back to per-register Msetstack/Mgetstack, dispatched per typ: - Tint/Tany32-fbundle-callee-saves-int-threshold (default 3, since ARM strd/ldrd handles 2-reg int groups in one instruction, beating add lr + stm of 2); - Tany64/Tfloat/Tsingle-fbundle-callee-saves-fp-threshold (default 2: no VFP analogue of strd, so 2-reg fp groups already break even with add lr + vstm of 2). Defined as a Definition (rather than a Notation) so the OCaml extraction is a function whose body is re-evaluated at each call site — the Compopts children read Clflags refs at call time.
Definition bundle_callee_saves_threshold (ty: AST.typ) : Z :=
  match ty with
  | AST.Tint | AST.Tany32 => Compopts.bundle_callee_saves_int_threshold tt
  | _ => Compopts.bundle_callee_saves_fp_threshold tt
  end.