Additional operations and proofs about IEEE-754 binary
floating-point numbers, on top of the Flocq library.
Require Import Reals.
Require Import SpecFloat.
From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
BinarySingleNaN Binary Round_odd.
Require Import ZArith.
Require Import Psatz.
Require Import Bool.
Require Import Eqdep_dec.
Require Import Coq.Logic.FunctionalExtensionality.
Local Open Scope Z_scope.
Lemma Znearest_lub :
forall choice (
n :
Z) (
x :
R), (
IZR n <=
x)%
R -> (
n <=
Znearest choice x)%
Z.
Proof.
intros until x. intro BND.
pose proof (Zfloor_lub n x BND).
pose proof (Znearest_ge_floor choice x).
lia.
Qed.
Lemma Znearest_glb :
forall choice (
n :
Z) (
x :
R), (
x <=
IZR n)%
R -> (
Znearest choice x <=
n)%
Z.
Proof.
intros until x. intro BND.
pose proof (Zceil_glb n x BND).
pose proof (Znearest_le_ceil choice x).
lia.
Qed.
Lemma Znearest_IZR :
forall choice n, (
Znearest choice (
IZR n)) =
n.
Proof.
intros.
unfold Znearest.
case Rcompare_spec ; intro ORDER.
- apply Zfloor_IZR.
- destruct choice.
+ apply Zceil_IZR.
+ apply Zfloor_IZR.
- apply Zceil_IZR.
Qed.
Lemma ZnearestE_IZR:
forall n, (
ZnearestE (
IZR n)) =
n.
Proof.
Lemma Zfloor_opp :
forall x :
R, (
Zfloor (-
x)) = - (
Zceil x).
Proof.
Lemma Zceil_opp :
forall x :
R, (
Zceil (-
x)) = - (
Zfloor x).
Proof.
Lemma ZnearestE_opp
:
forall x :
R,
ZnearestE (-
x) = -
ZnearestE x.
Proof.
Lemma Zceil_non_floor:
forall x :
R, (
x >
IZR(
Zfloor x))%
R ->
Zceil x =
Z.succ(
Zfloor x).
Proof.
intros x BETWEEN.
unfold Z.succ.
apply Zceil_imp.
split.
{
rewrite minus_IZR.
rewrite plus_IZR.
lra.
}
rewrite plus_IZR.
pose proof (
Zfloor_ub x).
lra.
Qed.
more complicated way of proving
Lemma Zceil_non_ceil:
forall x : R, (x < IZR(Zceil x))%R -> Zceil x = Z.succ(Zfloor x).
Proof.
intros x BETWEEN.
unfold Z.succ.
cut (Zfloor x = (Zceil x) - 1). { intros; lia. }
apply Zfloor_imp.
split.
{ rewrite minus_IZR.
pose proof (Zceil_lb x).
lra.
}
rewrite plus_IZR.
rewrite minus_IZR.
lra.
Qed.
Lemma ZnearestE_opp
: forall x : R, ZnearestE (- x) = - ZnearestE x.
Proof.
intro x.
unfold ZnearestE.
case (Rcompare_spec (x - IZR (Zfloor x)) (/ 2)); intro CMP.
- pose proof (Zfloor_lb x) as LB.
destruct (Rcompare_spec x (IZR (Zfloor x))) as ABSURD | EXACT | INEXACT.
lra.
{ set (n := Zfloor x) in *.
rewrite EXACT.
rewrite <- opp_IZR.
rewrite Zfloor_IZR.
rewrite opp_IZR.
rewrite Rcompare_Lt by lra.
reflexivity.
}
rewrite Rcompare_Gt.
{ apply Zceil_opp. }
rewrite Zfloor_opp.
rewrite opp_IZR.
rewrite Zceil_non_floor by assumption.
unfold Z.succ.
rewrite plus_IZR.
lra.
- rewrite Rcompare_Eq.
{ rewrite Zceil_opp.
rewrite Zfloor_opp.
rewrite Z.even_opp.
rewrite Zceil_non_floor by lra.
rewrite Z.even_succ.
rewrite Z.negb_odd.
destruct (Z.even (Zfloor x)); reflexivity.
}
rewrite Zfloor_opp.
rewrite opp_IZR.
ring_simplify.
rewrite Zceil_non_floor by lra.
unfold Z.succ.
rewrite plus_IZR.
lra.
- rewrite Rcompare_Lt.
{ apply Zfloor_opp. }
rewrite Zfloor_opp.
rewrite opp_IZR.
rewrite Zceil_non_floor by lra.
unfold Z.succ.
rewrite plus_IZR.
lra.
Qed.
Lemma Znearest_imp2:
forall choice x, (
Rabs (
IZR (
Znearest choice x) -
x) <= /2)%
R.
Proof.
intros.
unfold Znearest.
pose proof (
Zfloor_lb x)
as FL.
pose proof (
Zceil_ub x)
as CU.
pose proof (
Zceil_non_floor x)
as NF.
case Rcompare_spec;
intro CMP;
apply Rabs_le;
split;
try lra.
-
destruct choice;
lra.
-
destruct choice. 2:
lra.
rewrite NF. 2:
lra.
unfold Z.succ.
rewrite plus_IZR.
lra.
-
rewrite NF. 2:
lra.
unfold Z.succ.
rewrite plus_IZR.
lra.
Qed.
Theorem Znearest_le
:
forall choice (
x y :
R), (
x <=
y)%
R ->
Znearest choice x <=
Znearest choice y.
Proof.
intros.
destruct (
Z_le_gt_dec (
Znearest choice x) (
Znearest choice y))
as [
LE |
GT].
assumption.
exfalso.
assert (1 <=
IZR (
Znearest choice x) -
IZR(
Znearest choice y))%
R as GAP.
{
rewrite <-
minus_IZR.
apply IZR_le.
lia.
}
pose proof (
Znearest_imp2 choice x)
as Rx.
pose proof (
Znearest_imp2 choice y)
as Ry.
apply Rabs_le_inv in Rx.
apply Rabs_le_inv in Ry.
assert (
x =
y)
by lra.
subst y.
lia.
Qed.
Section Extra_ops.
prec is the number of bits of the mantissa including the implicit one.
emax is the exponent of the infinities.
Typically p=24 and emax = 128 in single precision.
Variable prec emax :
Z.
Context (
prec_gt_0_ :
Prec_gt_0 prec).
Context (
prec_lt_emax_ :
Prec_lt_emax prec emax).
Notation emin := (
emin prec emax).
Notation fexp := (
fexp prec emax).
Notation binary_float := (
binary_float prec emax).
Remarks on is_finite
Remark is_finite_not_is_nan:
forall (
f:
binary_float),
is_finite _ _
f =
true ->
is_nan _ _
f =
false.
Proof.
destruct f; reflexivity || discriminate.
Qed.
Remark is_finite_strict_finite:
forall (
f:
binary_float),
is_finite_strict _ _
f =
true ->
is_finite _ _
f =
true.
Proof.
destruct f; reflexivity || discriminate.
Qed.
Digression on FP numbers that cannot be -0.0.
Definition is_finite_pos0 (
f:
binary_float) :
bool :=
match f with
|
B754_zero _ _
s =>
negb s
|
B754_infinity _ _ _ =>
false
|
B754_nan _ _ _ _ _ =>
false
|
B754_finite _ _ _ _ _ _ =>
true
end.
Lemma Bsign_pos0:
forall x,
is_finite_pos0 x =
true ->
Bsign _ _
x =
Rlt_bool (
B2R _ _
x) 0%
R.
Proof.
intros.
destruct x as [ [] | | | []
ex mx Bx ];
try discriminate;
simpl.
-
rewrite Rlt_bool_false;
auto.
lra.
-
rewrite Rlt_bool_true;
auto.
apply F2R_lt_0.
compute;
auto.
-
rewrite Rlt_bool_false;
auto.
assert ((
F2R (
Float radix2 (
Z.pos ex)
mx) > 0)%
R)
by
(
apply F2R_gt_0;
compute;
auto ).
lra.
Qed.
Theorem B2R_inj_pos0:
forall x y,
is_finite_pos0 x =
true ->
is_finite_pos0 y =
true ->
B2R _ _
x =
B2R _ _
y ->
x =
y.
Proof.
intros.
apply B2R_Bsign_inj.
destruct x;
reflexivity||
discriminate.
destruct y;
reflexivity||
discriminate.
auto.
rewrite !
Bsign_pos0 by auto.
rewrite H1;
auto.
Qed.
Decidable equality
Definition Beq_dec:
forall (
f1 f2:
binary_float), {
f1 =
f2} + {
f1 <>
f2}.
Proof.
assert (
UIP_bool:
forall (
b1 b2:
bool) (
e e':
b1 =
b2),
e =
e').
{
intros.
apply UIP_dec.
decide equality. }
Ltac try_not_eq :=
try solve [
right;
congruence].
destruct f1 as [
s1|
s1|
s1 p1 H1|
s1 m1 e1 H1],
f2 as [
s2|
s2|
s2 p2 H2|
s2 m2 e2 H2];
try destruct s1;
try destruct s2;
try solve [
left;
auto];
try_not_eq.
destruct (
Pos.eq_dec p1 p2);
try_not_eq;
subst;
left;
f_equal;
f_equal;
apply UIP_bool.
destruct (
Pos.eq_dec p1 p2);
try_not_eq;
subst;
left;
f_equal;
f_equal;
apply UIP_bool.
destruct (
Pos.eq_dec m1 m2);
try_not_eq;
destruct (
Z.eq_dec e1 e2);
try solve [
right;
intro H;
inversion H;
congruence];
subst;
left;
f_equal;
apply UIP_bool.
destruct (
Pos.eq_dec m1 m2);
try_not_eq;
destruct (
Z.eq_dec e1 e2);
try solve [
right;
intro H;
inversion H;
congruence];
subst;
left;
f_equal;
apply UIP_bool.
Defined.
Conversion from an integer to a FP number
Integers that can be represented exactly as FP numbers.
Definition integer_representable (
n:
Z):
Prop :=
Z.abs n <= 2^
emax - 2^(
emax -
prec) /\
generic_format radix2 fexp (
IZR n).
Lemma int_upper_bound_eq: 2^
emax - 2^(
emax -
prec) = (2^
prec - 1) * 2^(
emax -
prec).
Proof.
red in prec_gt_0_,
prec_lt_emax_.
ring_simplify.
rewrite <- (
Zpower_plus radix2)
by lia.
now replace (
emax -
prec +
prec)%
Z with emax by ring.
Qed.
Lemma integer_representable_n2p:
forall n p,
-2^
prec <
n < 2^
prec -> 0 <=
p ->
p <=
emax -
prec ->
integer_representable (
n * 2^
p).
Proof.
intros;
split.
-
red in prec_gt_0_,
prec_lt_emax_.
replace (
Z.abs (
n * 2^
p))
with (
Z.abs n * 2^
p).
rewrite int_upper_bound_eq.
apply Zmult_le_compat.
lia.
apply (
Zpower_le radix2);
lia.
lia.
apply (
Zpower_ge_0 radix2).
rewrite Z.abs_mul.
f_equal.
rewrite Z.abs_eq.
auto.
apply (
Zpower_ge_0 radix2).
-
apply generic_format_FLT.
exists (
Float radix2 n p).
unfold F2R;
simpl.
rewrite <-
IZR_Zpower by auto.
apply mult_IZR.
simpl;
lia.
unfold emin,
Fexp;
red in prec_gt_0_,
prec_lt_emax_;
lia.
Qed.
Lemma integer_representable_2p:
forall p,
0 <=
p <=
emax - 1 ->
integer_representable (2^
p).
Proof.
intros;
split.
-
red in prec_gt_0_.
rewrite Z.abs_eq by (
apply (
Zpower_ge_0 radix2)).
apply Z.le_trans with (2^(
emax-1)).
apply (
Zpower_le radix2);
lia.
assert (2^
emax = 2^(
emax-1)*2).
{
change 2
with (2^1)
at 3.
rewrite <- (
Zpower_plus radix2)
by lia.
f_equal.
lia. }
assert (2^(
emax -
prec) <= 2^(
emax - 1)).
{
apply (
Zpower_le radix2).
lia. }
lia.
-
red in prec_gt_0_,
prec_lt_emax_.
apply generic_format_FLT.
exists (
Float radix2 1
p).
unfold F2R;
simpl.
rewrite Rmult_1_l.
rewrite <-
IZR_Zpower.
auto.
lia.
simpl Z.abs.
change 1
with (2^0).
apply (
Zpower_lt radix2).
lia.
auto.
unfold emin,
Fexp;
lia.
Qed.
Lemma integer_representable_opp:
forall n,
integer_representable n ->
integer_representable (-
n).
Proof.
intros n (
A &
B);
split.
rewrite Z.abs_opp.
auto.
rewrite opp_IZR.
apply generic_format_opp;
auto.
Qed.
Lemma integer_representable_n2p_wide:
forall n p,
-2^
prec <=
n <= 2^
prec -> 0 <=
p ->
p <
emax -
prec ->
integer_representable (
n * 2^
p).
Proof.
Lemma integer_representable_n:
forall n, -2^
prec <=
n <= 2^
prec ->
integer_representable n.
Proof.
red in prec_gt_0_,
prec_lt_emax_.
intros.
replace n with (
n * 2^0)
by (
change (2^0)
with 1;
ring).
apply integer_representable_n2p_wide.
auto.
lia.
lia.
Qed.
Lemma round_int_no_overflow:
forall n,
Z.abs n <= 2^
emax - 2^(
emax-
prec) ->
(
Rabs (
round radix2 fexp (
round_mode mode_NE) (
IZR n)) <
bpow radix2 emax)%
R.
Proof.
intros.
red in prec_gt_0_,
prec_lt_emax_.
rewrite <-
round_NE_abs.
apply Rle_lt_trans with (
IZR (2^
emax - 2^(
emax-
prec))).
apply round_le_generic.
apply fexp_correct;
auto.
apply valid_rnd_N.
apply generic_format_FLT.
exists (
Float radix2 (2^
prec-1) (
emax-
prec)).
rewrite int_upper_bound_eq.
unfold F2R;
simpl.
rewrite <-
IZR_Zpower by lia.
rewrite <-
mult_IZR.
auto.
assert (0 < 2^
prec)
by (
apply (
Zpower_gt_0 radix2);
lia).
unfold Fnum;
simpl;
zify;
lia.
unfold emin,
Fexp;
lia.
rewrite <-
abs_IZR.
apply IZR_le.
auto.
rewrite <-
IZR_Zpower by lia.
apply IZR_lt.
simpl.
assert (0 < 2^(
emax-
prec))
by (
apply (
Zpower_gt_0 radix2);
lia).
lia.
apply fexp_correct.
auto.
Qed.
Conversion from an integer. Round to nearest.
Definition BofZ (
n:
Z) :
binary_float :=
binary_normalize prec emax _ _
mode_NE n 0
false.
Theorem BofZ_correct:
forall n,
if Rlt_bool (
Rabs (
round radix2 fexp (
round_mode mode_NE) (
IZR n))) (
bpow radix2 emax)
then
B2R prec emax (
BofZ n) =
round radix2 fexp (
round_mode mode_NE) (
IZR n) /\
is_finite _ _ (
BofZ n) =
true /\
Bsign prec emax (
BofZ n) =
Z.ltb n 0
else
B2FF prec emax (
BofZ n) =
binary_overflow prec emax mode_NE (
Z.ltb n 0).
Proof.
intros.
generalize (
binary_normalize_correct prec emax _ _
mode_NE n 0
false).
fold emin;
fold fexp;
fold (
BofZ n).
replace (
F2R {|
Fnum :=
n;
Fexp := 0 |})
with (
IZR n).
destruct Rlt_bool.
-
intros (
A &
B &
C).
split; [|
split].
+
auto.
+
auto.
+
rewrite C.
rewrite Rcompare_IZR.
unfold Z.ltb.
auto.
-
intros A;
rewrite A.
f_equal.
generalize (
Z.ltb_spec n 0);
intros SPEC;
inversion SPEC.
apply Rlt_bool_true;
apply IZR_lt;
auto.
apply Rlt_bool_false;
apply IZR_le;
auto.
-
unfold F2R;
simpl.
ring.
Qed.
Theorem BofZ_finite:
forall n,
Z.abs n <= 2^
emax - 2^(
emax-
prec) ->
B2R _ _ (
BofZ n) =
round radix2 fexp (
round_mode mode_NE) (
IZR n)
/\
is_finite _ _ (
BofZ n) =
true
/\
Bsign _ _ (
BofZ n) =
Z.ltb n 0%
Z.
Proof.
Theorem BofZ_representable:
forall n,
integer_representable n ->
B2R _ _ (
BofZ n) =
IZR n
/\
is_finite _ _ (
BofZ n) =
true
/\
Bsign _ _ (
BofZ n) = (
n <? 0).
Proof.
intros.
destruct H as (
P &
Q).
destruct (
BofZ_finite n)
as (
A &
B &
C).
auto.
intuition.
rewrite A.
apply round_generic.
apply valid_rnd_round_mode.
auto.
Qed.
Theorem BofZ_exact:
forall n,
-2^
prec <=
n <= 2^
prec ->
B2R _ _ (
BofZ n) =
IZR n
/\
is_finite _ _ (
BofZ n) =
true
/\
Bsign _ _ (
BofZ n) =
Z.ltb n 0%
Z.
Proof.
Lemma BofZ_finite_pos0:
forall n,
Z.abs n <= 2^
emax - 2^(
emax-
prec) ->
is_finite_pos0 (
BofZ n) =
true.
Proof.
intros.
generalize (
binary_normalize_correct prec emax _ _
mode_NE n 0
false).
fold emin;
fold fexp;
fold (
BofZ n).
replace (
F2R {|
Fnum :=
n;
Fexp := 0 |})
with (
IZR n)
by
(
unfold F2R;
simpl;
ring).
rewrite Rlt_bool_true by (
apply round_int_no_overflow;
auto).
intros (
A &
B &
C).
destruct (
BofZ n);
auto;
try discriminate.
simpl in *.
rewrite C.
rewrite Rcompare_IZR.
generalize (
Zcompare_spec n 0);
intros SPEC;
destruct SPEC;
auto.
assert ((
round radix2 fexp ZnearestE (
IZR n) <= -1)%
R).
{
apply round_le_generic.
apply fexp_correct.
auto.
apply valid_rnd_N.
apply (
integer_representable_opp 1).
apply (
integer_representable_2p 0).
red in prec_gt_0_,
prec_lt_emax_;
lia.
apply IZR_le;
lia.
}
lra.
Qed.
Lemma BofZ_finite_equal:
forall x y,
Z.abs x <= 2^
emax - 2^(
emax-
prec) ->
Z.abs y <= 2^
emax - 2^(
emax-
prec) ->
B2R _ _ (
BofZ x) =
B2R _ _ (
BofZ y) ->
BofZ x =
BofZ y.
Proof.
Commutation properties with addition, subtraction, multiplication.
Theorem BofZ_plus:
forall nan p q,
integer_representable p ->
integer_representable q ->
Bplus _ _ _ _
nan mode_NE (
BofZ p) (
BofZ q) =
BofZ (
p +
q).
Proof.
intros.
destruct (
BofZ_representable p)
as (
A &
B &
C);
auto.
destruct (
BofZ_representable q)
as (
D &
E &
F);
auto.
generalize (
Bplus_correct _ _ _ _
nan mode_NE (
BofZ p) (
BofZ q)
B E).
fold emin;
fold fexp.
rewrite A,
D.
rewrite <-
plus_IZR.
generalize (
BofZ_correct (
p +
q)).
destruct Rlt_bool.
-
intros (
P &
Q &
R) (
U &
V &
W).
apply B2R_Bsign_inj;
auto.
rewrite P,
U;
auto.
rewrite R,
W,
C,
F.
rewrite Rcompare_IZR.
unfold Z.ltb at 3.
generalize (
Zcompare_spec (
p +
q) 0);
intros SPEC;
inversion SPEC;
auto.
assert (
EITHER: 0 <=
p \/ 0 <=
q)
by lia.
destruct EITHER; [
apply andb_false_intro1 |
apply andb_false_intro2];
apply Zlt_bool_false;
auto.
-
intros P (
U &
V).
apply B2FF_inj.
rewrite P,
U,
C.
f_equal.
rewrite C,
F in V.
generalize (
Zlt_bool_spec p 0) (
Zlt_bool_spec q 0).
rewrite <-
V.
intros SPEC1 SPEC2;
inversion SPEC1;
inversion SPEC2;
try congruence;
symmetry.
apply Zlt_bool_true;
lia.
apply Zlt_bool_false;
lia.
Qed.
Theorem BofZ_minus:
forall nan p q,
integer_representable p ->
integer_representable q ->
Bminus _ _ _ _
nan mode_NE (
BofZ p) (
BofZ q) =
BofZ (
p -
q).
Proof.
intros.
destruct (
BofZ_representable p)
as (
A &
B &
C);
auto.
destruct (
BofZ_representable q)
as (
D &
E &
F);
auto.
generalize (
Bminus_correct _ _ _ _
nan mode_NE (
BofZ p) (
BofZ q)
B E).
fold emin;
fold fexp.
rewrite A,
D.
rewrite <-
minus_IZR.
generalize (
BofZ_correct (
p -
q)).
destruct Rlt_bool.
-
intros (
P &
Q &
R) (
U &
V &
W).
apply B2R_Bsign_inj;
auto.
rewrite P,
U;
auto.
rewrite R,
W,
C,
F.
rewrite Rcompare_IZR.
unfold Z.ltb at 3.
generalize (
Zcompare_spec (
p -
q) 0);
intros SPEC;
inversion SPEC;
auto.
assert (
EITHER: 0 <=
p \/
q < 0)
by lia.
destruct EITHER; [
apply andb_false_intro1 |
apply andb_false_intro2].
rewrite Zlt_bool_false;
auto.
rewrite Zlt_bool_true;
auto.
-
intros P (
U &
V).
apply B2FF_inj.
rewrite P,
U,
C.
f_equal.
rewrite C,
F in V.
generalize (
Zlt_bool_spec p 0) (
Zlt_bool_spec q 0).
rewrite V.
intros SPEC1 SPEC2;
inversion SPEC1;
inversion SPEC2;
symmetry.
rewrite <-
H3 in H1;
discriminate.
apply Zlt_bool_true;
lia.
apply Zlt_bool_false;
lia.
rewrite <-
H3 in H1;
discriminate.
Qed.
Theorem BofZ_mult:
forall nan p q,
integer_representable p ->
integer_representable q ->
0 <
q ->
Bmult _ _ _ _
nan mode_NE (
BofZ p) (
BofZ q) =
BofZ (
p *
q).
Proof.
intros.
assert (
SIGN:
xorb (
p <? 0) (
q <? 0) = (
p *
q <? 0)).
{
rewrite (
Zlt_bool_false q)
by lia.
generalize (
Zlt_bool_spec p 0);
intros SPEC;
inversion SPEC;
simpl;
symmetry.
apply Zlt_bool_true.
rewrite Z.mul_comm.
apply Z.mul_pos_neg;
lia.
apply Zlt_bool_false.
apply Zsame_sign_imp;
lia.
}
destruct (
BofZ_representable p)
as (
A &
B &
C);
auto.
destruct (
BofZ_representable q)
as (
D &
E &
F);
auto.
generalize (
Bmult_correct _ _ _ _
nan mode_NE (
BofZ p) (
BofZ q)).
fold emin;
fold fexp.
rewrite A,
B,
C,
D,
E,
F.
rewrite <-
mult_IZR.
generalize (
BofZ_correct (
p *
q)).
destruct Rlt_bool.
-
intros (
P &
Q &
R) (
U &
V &
W).
apply B2R_Bsign_inj;
auto.
rewrite P,
U;
auto.
rewrite R,
W;
auto.
apply is_finite_not_is_nan;
auto.
-
intros P U.
apply B2FF_inj.
rewrite P,
U.
f_equal.
auto.
Qed.
Theorem BofZ_mult_2p:
forall nan x p,
Z.abs x <= 2^
emax - 2^(
emax-
prec) ->
2^
prec <=
Z.abs x ->
0 <=
p <=
emax - 1 ->
Bmult _ _ _ _
nan mode_NE (
BofZ x) (
BofZ (2^
p)) =
BofZ (
x * 2^
p).
Proof.
intros.
destruct (
Z.eq_dec x 0).
-
subst x.
apply BofZ_mult.
apply integer_representable_n.
generalize (
Zpower_ge_0 radix2 prec).
simpl;
lia.
apply integer_representable_2p.
auto.
apply (
Zpower_gt_0 radix2).
lia.
-
assert (
IZR x <> 0%
R)
by (
apply (
IZR_neq _ _
n)).
destruct (
BofZ_finite x H)
as (
A &
B &
C).
destruct (
BofZ_representable (2^
p))
as (
D &
E &
F).
apply integer_representable_2p.
auto.
assert (
cexp radix2 fexp (
IZR (
x * 2^
p)) =
cexp radix2 fexp (
IZR x) +
p).
{
unfold cexp,
fexp.
rewrite mult_IZR.
change (2^
p)
with (
radix2^
p).
rewrite IZR_Zpower by lia.
rewrite mag_mult_bpow by auto.
assert (
prec + 1 <=
mag radix2 (
IZR x)).
{
rewrite <- (
mag_abs radix2 (
IZR x)).
rewrite <- (
mag_bpow radix2 prec).
apply mag_le.
apply bpow_gt_0.
rewrite <-
IZR_Zpower by (
red in prec_gt_0_;
lia).
rewrite <-
abs_IZR.
apply IZR_le;
auto. }
unfold FLT_exp.
unfold emin;
red in prec_gt_0_;
zify;
lia.
}
assert (
forall m,
round radix2 fexp m (
IZR x) *
IZR (2^
p) =
round radix2 fexp m (
IZR (
x * 2^
p)))%
R.
{
intros.
unfold round,
scaled_mantissa.
rewrite H3.
rewrite mult_IZR.
rewrite Z.opp_add_distr.
rewrite bpow_plus.
set (
a :=
IZR x);
set (
b :=
bpow radix2 (-
cexp radix2 fexp a)).
replace (
a *
IZR (2^
p) * (
b *
bpow radix2 (-
p)))%
R with (
a *
b)%
R.
unfold F2R;
simpl.
rewrite Rmult_assoc.
f_equal.
rewrite bpow_plus.
f_equal.
apply (
IZR_Zpower radix2).
lia.
transitivity ((
a *
b) * (
IZR (2^
p) *
bpow radix2 (-
p)))%
R.
rewrite (
IZR_Zpower radix2).
rewrite <-
bpow_plus.
replace (
p + -
p)
with 0
by lia.
change (
bpow radix2 0)
with 1%
R.
ring.
lia.
ring.
}
assert (
forall m x,
round radix2 fexp (
round_mode m) (
round radix2 fexp (
round_mode m)
x) =
round radix2 fexp (
round_mode m)
x).
{
intros.
apply round_generic.
apply valid_rnd_round_mode.
apply generic_format_round.
apply fexp_correct;
auto.
apply valid_rnd_round_mode.
}
assert (
xorb (
x <? 0) (2^
p <? 0) = (
x * 2^
p <? 0)).
{
assert (0 < 2^
p)
by (
apply (
Zpower_gt_0 radix2);
lia).
rewrite (
Zlt_bool_false (2^
p))
by lia.
rewrite xorb_false_r.
symmetry.
generalize (
Zlt_bool_spec x 0);
intros SPEC;
inversion SPEC.
apply Zlt_bool_true.
apply Z.mul_neg_pos;
auto.
apply Zlt_bool_false.
apply Z.mul_nonneg_nonneg;
lia.
}
generalize (
Bmult_correct _ _ _ _
nan mode_NE (
BofZ x) (
BofZ (2^
p)))
(
BofZ_correct (
x * 2^
p)).
fold emin;
fold fexp.
rewrite A,
B,
C,
D,
E,
F,
H4,
H5.
destruct Rlt_bool.
+
intros (
P &
Q &
R) (
U &
V &
W).
apply B2R_Bsign_inj;
auto.
rewrite P,
U.
auto.
rewrite R,
W.
auto.
apply is_finite_not_is_nan;
auto.
+
intros P U.
apply B2FF_inj.
rewrite P,
U.
f_equal;
auto.
Qed.
Rounding to odd the argument of BofZ.
Lemma round_odd_flt:
forall prec' emin' x choice,
prec > 1 ->
prec' > 1 ->
prec' >=
prec + 2 ->
emin' <=
emin - 2 ->
round radix2 fexp (
Znearest choice) (
round radix2 (
FLT_exp emin' prec')
Zrnd_odd x) =
round radix2 fexp (
Znearest choice)
x.
Proof.
intros. apply round_N_odd. auto. apply fexp_correct; auto.
apply exists_NE_FLT. right; lia.
apply FLT_exp_valid. red; lia.
apply exists_NE_FLT. right; lia.
unfold fexp, FLT_exp; intros. zify; lia.
Qed.
Corollary round_odd_fix:
forall x p choice,
prec > 1 ->
0 <=
p ->
(
bpow radix2 (
prec +
p + 1) <=
Rabs x)%
R ->
round radix2 fexp (
Znearest choice) (
round radix2 (
FIX_exp p)
Zrnd_odd x) =
round radix2 fexp (
Znearest choice)
x.
Proof.
intros.
destruct (
Req_EM_T x 0%
R).
-
subst x.
rewrite round_0.
auto.
apply valid_rnd_odd.
-
set (
prec' :=
mag radix2 x -
p).
set (
emin' :=
emin - 2).
assert (
PREC:
mag radix2 (
bpow radix2 (
prec +
p + 1)) <=
mag radix2 x).
{
rewrite <- (
mag_abs radix2 x).
apply mag_le;
auto.
apply bpow_gt_0. }
rewrite mag_bpow in PREC.
assert (
CANON:
cexp radix2 (
FLT_exp emin' prec')
x =
cexp radix2 (
FIX_exp p)
x).
{
unfold cexp,
FLT_exp,
FIX_exp.
replace (
mag radix2 x -
prec')
with p by (
unfold prec';
lia).
apply Z.max_l.
unfold emin',
emin.
red in prec_gt_0_,
prec_lt_emax_;
lia.
}
assert (
RND:
round radix2 (
FIX_exp p)
Zrnd_odd x =
round radix2 (
FLT_exp emin' prec')
Zrnd_odd x).
{
unfold round,
scaled_mantissa.
rewrite CANON.
auto.
}
rewrite RND.
apply round_odd_flt.
auto.
unfold prec'.
red in prec_gt_0_;
lia.
unfold prec'.
lia.
unfold emin'.
lia.
Qed.
Definition int_round_odd (
x:
Z) (
p:
Z) :=
(
if Z.eqb (
x mod 2^
p) 0 ||
Z.odd (
x / 2^
p)
then x / 2^
p else x / 2^
p + 1) * 2^
p.
Lemma Zrnd_odd_int:
forall n p, 0 <=
p ->
Zrnd_odd (
IZR n *
bpow radix2 (-
p)) * 2^
p =
int_round_odd n p.
Proof.
clear.
intros.
assert (0 < 2^
p)
by (
apply (
Zpower_gt_0 radix2);
lia).
assert (
n = (
n / 2^
p) * 2^
p +
n mod 2^
p)
by (
rewrite Z.mul_comm;
apply Z.div_mod;
lia).
assert (0 <=
n mod 2^
p < 2^
p)
by (
apply Z_mod_lt;
lia).
unfold int_round_odd.
set (
q :=
n / 2^
p)
in *;
set (
r :=
n mod 2^
p)
in *.
f_equal.
pose proof (
bpow_gt_0 radix2 (-
p)).
assert (
bpow radix2 p *
bpow radix2 (-
p) = 1)%
R.
{
rewrite <-
bpow_plus.
replace (
p + -
p)
with 0
by lia.
auto. }
assert (
IZR n *
bpow radix2 (-
p) =
IZR q +
IZR r *
bpow radix2 (-
p))%
R.
{
rewrite H1.
rewrite plus_IZR,
mult_IZR.
change (
IZR (2^
p))
with (
IZR (
radix2^
p)).
rewrite IZR_Zpower by lia.
ring_simplify.
rewrite Rmult_assoc.
rewrite H4.
ring. }
assert (0 <=
IZR r <
bpow radix2 p)%
R.
{
split.
apply IZR_le;
lia.
rewrite <-
IZR_Zpower by lia.
apply IZR_lt;
tauto. }
assert (0 <=
IZR r *
bpow radix2 (-
p) < 1)%
R.
{
generalize (
bpow_gt_0 radix2 (-
p)).
intros.
split.
apply Rmult_le_pos;
lra.
rewrite <-
H4.
apply Rmult_lt_compat_r.
auto.
tauto. }
assert (
Zfloor (
IZR n *
bpow radix2 (-
p)) =
q).
{
apply Zfloor_imp.
rewrite H5.
rewrite plus_IZR.
lra. }
unfold Zrnd_odd.
destruct Req_EM_T.
-
assert (
IZR r *
bpow radix2 (-
p) = 0)%
R.
{
rewrite H8 in e.
rewrite e in H5.
lra. }
apply Rmult_integral in H9.
destruct H9; [ |
lra ].
apply (
eq_IZR r 0)
in H9.
apply <-
Z.eqb_eq in H9.
rewrite H9.
assumption.
-
assert (
IZR r *
bpow radix2 (-
p) <> 0)%
R.
{
rewrite H8 in n0.
lra. }
destruct (
Z.eqb r 0)
eqn:
RZ.
apply Z.eqb_eq in RZ.
rewrite RZ in H9.
rewrite Rmult_0_l in H9.
congruence.
rewrite Zceil_floor_neq by lra.
rewrite H8.
change Zeven with Z.even.
rewrite Zodd_even_bool.
destruct (
Z.even q);
auto.
Qed.
Lemma int_round_odd_le:
forall p x y, 0 <=
p ->
x <=
y ->
int_round_odd x p <=
int_round_odd y p.
Proof.
clear.
intros.
assert (
Zrnd_odd (
IZR x *
bpow radix2 (-
p)) <=
Zrnd_odd (
IZR y *
bpow radix2 (-
p))).
{
apply Zrnd_le.
apply valid_rnd_odd.
apply Rmult_le_compat_r.
apply bpow_ge_0.
apply IZR_le;
auto. }
rewrite <- !
Zrnd_odd_int by auto.
apply Zmult_le_compat_r.
auto.
apply (
Zpower_ge_0 radix2).
Qed.
Lemma int_round_odd_exact:
forall p x, 0 <=
p ->
(2^
p |
x) ->
int_round_odd x p =
x.
Proof.
Theorem BofZ_round_odd:
forall x p,
prec > 1 ->
Z.abs x <= 2^
emax - 2^(
emax-
prec) ->
0 <=
p <=
emax -
prec ->
2^(
prec +
p + 1) <=
Z.abs x ->
BofZ x =
BofZ (
int_round_odd x p).
Proof.
Lemma int_round_odd_shifts:
forall x p, 0 <=
p ->
int_round_odd x p =
Z.shiftl (
if Z.eqb (
x mod 2^
p) 0
then Z.shiftr x p else Z.lor (
Z.shiftr x p) 1)
p.
Proof.
clear.
intros.
unfold int_round_odd.
rewrite Z.shiftl_mul_pow2 by auto.
f_equal.
rewrite Z.shiftr_div_pow2 by auto.
destruct (
x mod 2^
p =? 0)
eqn:
E.
auto.
assert (
forall n, (
if Z.odd n then n else n + 1) =
Z.lor n 1).
{
destruct n;
simpl;
auto.
destruct p0;
auto.
destruct p0;
auto.
induction p0;
auto. }
simpl.
apply H0.
Qed.
Lemma int_round_odd_bits:
forall x y p, 0 <=
p ->
(
forall i, 0 <=
i <
p ->
Z.testbit y i =
false) ->
Z.testbit y p = (
if Z.eqb (
x mod 2^
p) 0
then Z.testbit x p else true) ->
(
forall i,
p <
i ->
Z.testbit y i =
Z.testbit x i) ->
int_round_odd x p =
y.
Proof.
Conversion from a FP number to an integer
Always rounds toward zero.
Definition ZofB (
f:
binary_float):
option Z :=
match f with
|
B754_finite _ _
s m (
Zpos e) _ =>
Some (
cond_Zopp s (
Zpos m) *
Z.pow_pos radix2 e)%
Z
|
B754_finite _ _
s m 0 _ =>
Some (
cond_Zopp s (
Zpos m))
|
B754_finite _ _
s m (
Zneg e) _ =>
Some (
cond_Zopp s (
Zpos m /
Z.pow_pos radix2 e))%
Z
|
B754_zero _ _ _ =>
Some 0%
Z
| _ =>
None
end.
Theorem ZofB_correct:
forall f,
ZofB f =
if is_finite _ _
f then Some (
Ztrunc (
B2R _ _
f))
else None.
Proof.
destruct f as [
s|
s|
s p H|
s m e H];
simpl;
auto.
-
f_equal.
symmetry.
apply (
Ztrunc_IZR 0).
-
destruct e;
f_equal.
+
unfold F2R;
simpl.
rewrite Rmult_1_r.
rewrite Ztrunc_IZR.
auto.
+
unfold F2R;
simpl.
rewrite <-
mult_IZR.
rewrite Ztrunc_IZR.
auto.
+
unfold F2R;
simpl.
rewrite IZR_cond_Zopp.
rewrite <-
cond_Ropp_mult_l.
assert (
EQ:
forall x,
Ztrunc (
cond_Ropp s x) =
cond_Zopp s (
Ztrunc x)).
{
intros.
destruct s;
simpl;
auto.
apply Ztrunc_opp.
}
rewrite EQ.
f_equal.
generalize (
Zpower_pos_gt_0 2
p (
eq_refl _));
intros.
rewrite Ztrunc_floor.
symmetry.
apply Zfloor_div.
lia.
apply Rmult_le_pos.
apply IZR_le.
compute;
congruence.
apply Rlt_le.
apply Rinv_0_lt_compat.
apply IZR_lt.
auto.
Qed.
Interval properties.
Remark Ztrunc_range_pos:
forall x, 0 <
Ztrunc x -> (
IZR (
Ztrunc x) <=
x <
IZR (
Ztrunc x + 1)%
Z)%
R.
Proof.
intros.
rewrite Ztrunc_floor.
split.
apply Zfloor_lb.
rewrite plus_IZR.
apply Zfloor_ub.
generalize (
Rle_bool_spec 0%
R x).
intros RLE;
inversion RLE;
subst;
clear RLE.
auto.
rewrite Ztrunc_ceil in H by lra.
unfold Zceil in H.
assert (-
x < 0)%
R.
{
apply Rlt_le_trans with (
IZR (
Zfloor (-
x)) + 1)%
R.
apply Zfloor_ub.
rewrite <-
plus_IZR.
apply IZR_le.
lia. }
lra.
Qed.
Remark Ztrunc_range_zero:
forall x,
Ztrunc x = 0 -> (-1 <
x < 1)%
R.
Proof.
intros;
generalize (
Rle_bool_spec 0%
R x).
intros RLE;
inversion RLE;
subst;
clear RLE.
-
rewrite Ztrunc_floor in H by auto.
split.
+
apply Rlt_le_trans with 0%
R;
auto.
rewrite <-
Ropp_0.
apply Ropp_lt_contravar.
apply Rlt_0_1.
+
replace 1%
R with (
IZR (
Zfloor x) + 1)%
R.
apply Zfloor_ub.
rewrite H.
simpl.
apply Rplus_0_l.
-
rewrite Ztrunc_ceil in H by (
apply Rlt_le;
auto).
split.
+
apply (
Ropp_lt_cancel (-(1))).
rewrite Ropp_involutive.
replace 1%
R with (
IZR (
Zfloor (-
x)) + 1)%
R.
apply Zfloor_ub.
unfold Zceil in H.
replace (
Zfloor (-
x))
with 0
by lia.
simpl.
apply Rplus_0_l.
+
apply Rlt_le_trans with 0%
R;
auto.
apply Rle_0_1.
Qed.
Theorem ZofB_range_pos:
forall f n,
ZofB f =
Some n -> 0 <
n -> (
IZR n <=
B2R _ _
f <
IZR (
n + 1)%
Z)%
R.
Proof.
Theorem ZofB_range_neg:
forall f n,
ZofB f =
Some n ->
n < 0 -> (
IZR (
n - 1)%
Z <
B2R _ _
f <=
IZR n)%
R.
Proof.
intros.
rewrite ZofB_correct in H.
destruct (
is_finite prec emax f)
eqn:
FIN;
inversion H.
set (
x :=
B2R prec emax f)
in *.
set (
y := (-
x)%
R).
assert (
A: (
IZR (
Ztrunc y) <=
y <
IZR (
Ztrunc y + 1)%
Z)%
R).
{
apply Ztrunc_range_pos.
unfold y.
rewrite Ztrunc_opp.
lia. }
destruct A as [
B C].
unfold y in B,
C.
rewrite Ztrunc_opp in B,
C.
replace (-
Ztrunc x + 1)
with (- (
Ztrunc x - 1))
in C by lia.
rewrite opp_IZR in B,
C.
lra.
Qed.
Theorem ZofB_range_zero:
forall f,
ZofB f =
Some 0 -> (-1 <
B2R _ _
f < 1)%
R.
Proof.
Theorem ZofB_range_nonneg:
forall f n,
ZofB f =
Some n -> 0 <=
n -> (-1 <
B2R _ _
f <
IZR (
n + 1)%
Z)%
R.
Proof.
For representable integers, ZofB is left inverse of BofZ.
Theorem ZofBofZ_exact:
forall n,
integer_representable n ->
ZofB (
BofZ n) =
Some n.
Proof.
Compatibility with subtraction
Remark Zfloor_minus:
forall x n,
Zfloor (
x -
IZR n) =
Zfloor x -
n.
Proof.
Theorem ZofB_minus:
forall minus_nan m f p q,
ZofB f =
Some p -> 0 <=
p < 2*
q ->
q <= 2^
prec -> (
IZR q <=
B2R _ _
f)%
R ->
ZofB (
Bminus _ _ _ _
minus_nan m f (
BofZ q)) =
Some (
p -
q).
Proof.
intros.
assert (
Q: -2^
prec <=
q <= 2^
prec).
{
split;
auto.
generalize (
Zpower_ge_0 radix2 prec);
simpl;
lia. }
assert (
RANGE: (-1 <
B2R _ _
f <
IZR (
p + 1)%
Z)%
R)
by (
apply ZofB_range_nonneg;
auto;
lia).
rewrite ZofB_correct in H.
destruct (
is_finite prec emax f)
eqn:
FIN;
try discriminate.
assert (
PQ2: (
IZR (
p + 1) <=
IZR q * 2)%
R).
{
rewrite <-
mult_IZR.
apply IZR_le.
lia. }
assert (
EXACT:
round radix2 fexp (
round_mode m) (
B2R _ _
f -
IZR q)%
R = (
B2R _ _
f -
IZR q)%
R).
{
apply round_generic.
apply valid_rnd_round_mode.
apply sterbenz_aux.
now apply FLT_exp_valid.
apply FLT_exp_monotone.
apply generic_format_B2R.
apply integer_representable_n.
auto.
lra. }
destruct (
BofZ_exact q Q)
as (
A &
B &
C).
generalize (
Bminus_correct _ _ _ _
minus_nan m f (
BofZ q)
FIN B).
rewrite Rlt_bool_true.
-
fold emin;
fold fexp.
intros (
D &
E &
F).
rewrite ZofB_correct.
rewrite E.
rewrite D.
rewrite A.
rewrite EXACT.
inversion H.
f_equal.
rewrite !
Ztrunc_floor.
apply Zfloor_minus.
lra.
lra.
-
rewrite A.
fold emin;
fold fexp.
rewrite EXACT.
apply Rle_lt_trans with (
bpow radix2 prec).
apply Rle_trans with (
IZR q).
apply Rabs_le.
lra.
rewrite <-
IZR_Zpower.
apply IZR_le;
auto.
red in prec_gt_0_;
lia.
apply bpow_lt.
auto.
Qed.
A variant of ZofB that bounds the range of representable integers.
Definition ZofB_range (
f:
binary_float) (
zmin zmax:
Z):
option Z :=
match ZofB f with
|
None =>
None
|
Some z =>
if Z.leb zmin z &&
Z.leb z zmax then Some z else None
end.
Theorem ZofB_range_correct:
forall f min max,
let n :=
Ztrunc (
B2R _ _
f)
in
ZofB_range f min max =
if is_finite _ _
f &&
Z.leb min n &&
Z.leb n max then Some n else None.
Proof.
Lemma ZofB_range_inversion:
forall f min max n,
ZofB_range f min max =
Some n ->
min <=
n /\
n <=
max /\
ZofB f =
Some n.
Proof.
intros.
rewrite ZofB_range_correct in H.
rewrite ZofB_correct.
destruct (
is_finite prec emax f);
try discriminate.
set (
n1 :=
Ztrunc (
B2R _ _
f))
in *.
destruct (
min <=?
n1)
eqn:
MIN;
try discriminate.
destruct (
n1 <=?
max)
eqn:
MAX;
try discriminate.
simpl in H.
inversion H.
subst n.
split.
apply Zle_bool_imp_le;
auto.
split.
apply Zle_bool_imp_le;
auto.
auto.
Qed.
Theorem ZofB_range_minus:
forall minus_nan m f p q,
ZofB_range f 0 (2 *
q - 1) =
Some p ->
q <= 2^
prec -> (
IZR q <=
B2R _ _
f)%
R ->
ZofB_range (
Bminus _ _ _ _
minus_nan m f (
BofZ q)) (-
q) (
q - 1) =
Some (
p -
q).
Proof.
intros.
destruct (
ZofB_range_inversion _ _ _ _
H)
as (
A &
B &
C).
set (
f' :=
Bminus prec emax _ _
minus_nan m f (
BofZ q)).
assert (
D:
ZofB f' =
Some (
p -
q)).
{
apply ZofB_minus.
auto.
lia.
auto.
auto. }
unfold ZofB_range.
rewrite D.
rewrite Zle_bool_true by lia.
rewrite Zle_bool_true by lia.
auto.
Qed.
ZofB_ne : convert float to integer, round to nearest
Definition Zdiv_ne (
a b :
Z) :=
let q :=
Z.div a b in
let q1 :=
Z.succ q in
match Z.compare (
a-
b*
q) (
b*
q1-
a)
with
|
Lt =>
q
|
Gt =>
q1
|
Eq => (
if Z.even q then q else q1)
end.
Definition ZofB_ne (
f:
binary_float):
option Z :=
match f with
|
B754_finite _ _
s m (
Zpos e) _ =>
Some (
cond_Zopp s (
Zpos m) *
Z.pow_pos radix2 e)%
Z
|
B754_finite _ _
s m 0 _ =>
Some (
cond_Zopp s (
Zpos m))
|
B754_finite _ _
s m (
Zneg e) _ =>
Some (
cond_Zopp s (
Zdiv_ne (
Zpos m) (
Z.pow_pos radix2 e)))%
Z
|
B754_zero _ _ _ =>
Some 0%
Z
| _ =>
None
end.
Ltac field_simplify_den :=
field_simplify ; [
idtac |
lra].
Ltac Rdiv_lt_0_den :=
apply Rdiv_lt_0_compat ; [
idtac |
lra].
Hint Rewrite <-
plus_IZR minus_IZR opp_IZR mult_IZR :
l_IZR.
Ltac l_IZR :=
autorewrite with l_IZR.
Theorem ZofB_ne_correct:
forall f,
ZofB_ne f =
if is_finite _ _
f then Some (
ZnearestE (
B2R _ _
f))
else None.
Proof.
destruct f as [
s|
s|
s p H|
s m e H];
simpl;
auto.
-
f_equal.
symmetry.
apply (
ZnearestE_IZR 0).
-
destruct e;
f_equal.
+
unfold F2R;
cbn.
rewrite Rmult_1_r.
rewrite ZnearestE_IZR.
auto.
+
unfold F2R;
cbn.
rewrite <-
mult_IZR.
rewrite ZnearestE_IZR.
auto.
+
unfold F2R;
cbn.
rewrite IZR_cond_Zopp.
rewrite <-
cond_Ropp_mult_l.
assert (
EQ:
forall x,
ZnearestE (
cond_Ropp s x) =
cond_Zopp s (
ZnearestE x)).
{
intros.
destruct s;
cbn;
auto.
apply ZnearestE_opp. }
rewrite EQ.
f_equal.
generalize (
Zpower_pos_gt_0 2
p (
eq_refl _));
intros.
set (
p2p := (
Z.pow_pos 2
p))
in *.
set (
zm :=
Z.pos m)
in *.
assert (
p2p > 0)
as POS by lia.
assert (0 <
IZR p2p)%
R as POS2.
{
apply IZR_lt.
assumption. }
unfold Zdiv_ne,
Z.succ in *.
case Z.compare_spec;
intro CMP.
*
pose proof (
Z_div_mod_eq_full zm p2p)
as DECOMPOSE.
destruct (
Z_mod_lt zm p2p POS)
as [
MOD1 MOD2].
set (
q :=
zm /
p2p)
in *.
set (
r :=
zm mod p2p)
in *.
rewrite inbetween_int_NE with (
m :=
q) (
l :=
loc_Inexact Eq).
{
cbn.
unfold cond_incr.
destruct Z.even;
reflexivity.
}
constructor.
split.
**
assert (0 <
IZR zm /
IZR p2p -
IZR q)%
R.
2:
lra.
field_simplify_den.
Rdiv_lt_0_den.
l_IZR.
apply IZR_lt.
lia.
**
assert (0 <
IZR (
q + 1) - (
IZR zm * /
IZR p2p))%
R.
2:
lra.
field_simplify_den.
Rdiv_lt_0_den.
l_IZR.
apply IZR_lt.
lia.
**
apply Rcompare_Eq.
assert ((
IZR q +
IZR (
q + 1))/2 - (
IZR zm * /
IZR p2p) = 0)%
R; [
idtac|
lra].
field_simplify_den.
l_IZR.
replace (
q *
p2p + (
q + 1) *
p2p - 2 *
zm)
with 0
by lia.
field.
apply IZR_neq.
lia.
*
symmetry.
apply Znearest_imp with (
n :=
zm /
p2p).
apply Rabs_lt.
split.
**
pose proof (
Z_mult_div_ge zm p2p POS).
assert (0 <=
IZR zm * /
IZR p2p -
IZR (
zm /
p2p))%
R.
2:
lra.
field_simplify_den.
apply Rmult_le_pos.
{
l_IZR.
apply IZR_le.
lia.
}
assert (0 < /
IZR p2p)%
R.
2:
lra.
apply Rinv_0_lt_compat.
assumption.
**
assert (0 < 2*(
IZR p2p *
IZR (
zm /
p2p) -
IZR zm) + (
IZR p2p))%
R as LT.
{
l_IZR.
apply IZR_lt.
lia. }
assert (0 < -(
IZR zm * /
IZR p2p -
IZR (
zm /
p2p) - / 2))%
R as GT.
2:
lra.
field_simplify_den.
Rdiv_lt_0_den.
lra.
*
symmetry.
apply Znearest_imp.
apply Rabs_lt.
split.
**
assert (0 < (
IZR zm -
IZR p2p *
IZR (
zm /
p2p)) - (
IZR p2p * (
IZR (
zm /
p2p) + 1) -
IZR zm))%
R.
{
ring_simplify.
l_IZR.
apply IZR_lt.
lia.
}
assert (0 < (/ 2) +
IZR zm * /
IZR p2p -
IZR (
zm /
p2p + 1))%
R.
2:
lra.
field_simplify_den.
Rdiv_lt_0_den.
rewrite plus_IZR.
lra.
**
assert (0 <
IZR (
zm /
p2p + 1) - (
IZR zm * /
IZR p2p))%
R.
2:
lra.
field_simplify_den.
Rdiv_lt_0_den.
l_IZR.
apply IZR_lt.
pose proof (
Z_div_mod_eq_full zm p2p)
as DECOMPOSE.
ring_simplify.
set (
q := (
zm /
p2p))
in *.
pose proof (
Z_mod_lt zm p2p POS)
as MOD.
lia.
Qed.
Theorem ZofB_ne_ball:
forall f n,
ZofB_ne f =
Some n -> (
IZR n-1/2 <=
B2R _ _
f <=
IZR n+1/2)%
R.
Proof.
Definition ZofB_ne_range (
f:
binary_float) (
zmin zmax:
Z):
option Z :=
match ZofB_ne f with
|
None =>
None
|
Some z =>
if Z.leb zmin z &&
Z.leb z zmax then Some z else None
end.
Theorem ZofB_ne_range_correct:
forall f min max,
let n :=
ZnearestE (
B2R _ _
f)
in
ZofB_ne_range f min max =
if is_finite _ _
f &&
Z.leb min n &&
Z.leb n max then Some n else None.
Proof.
Lemma ZofB_ne_range_inversion:
forall f min max n,
ZofB_ne_range f min max =
Some n ->
min <=
n /\
n <=
max /\
ZofB_ne f =
Some n.
Proof.
intros.
rewrite ZofB_ne_range_correct in H.
rewrite ZofB_ne_correct.
destruct (
is_finite prec emax f);
try discriminate.
set (
n1 :=
ZnearestE (
B2R _ _
f))
in *.
destruct (
min <=?
n1)
eqn:
MIN;
try discriminate.
destruct (
n1 <=?
max)
eqn:
MAX;
try discriminate.
simpl in H.
inversion H.
subst n.
split.
apply Zle_bool_imp_le;
auto.
split.
apply Zle_bool_imp_le;
auto.
auto.
Qed.
Algebraic identities
Commutativity of addition and multiplication
Theorem Bplus_commut:
forall plus_nan mode (
x y:
binary_float),
plus_nan x y =
plus_nan y x ->
Bplus _ _ _ _
plus_nan mode x y =
Bplus _ _ _ _
plus_nan mode y x.
Proof.
intros until y;
intros NAN.
unfold Bplus.
rewrite NAN.
f_equal.
destruct x as [
sx|
sx|
sx px Hx|
sx mx ex Hx];
destruct y as [
sy|
sy|
sy py Hy|
sy my ey Hy];
auto;
simpl.
-
rewrite (
eqb_sym sy sx).
destruct (
eqb sx sy)
eqn:
EQB;
auto.
f_equal;
apply eqb_prop;
auto.
-
rewrite (
eqb_sym sy sx).
destruct (
eqb sx sy)
eqn:
EQB;
auto.
f_equal;
apply eqb_prop;
auto.
-
rewrite Z.min_comm.
f_equal.
apply Zplus_comm.
Qed.
Theorem Bmult_commut:
forall mult_nan mode (
x y:
binary_float),
mult_nan x y =
mult_nan y x ->
Bmult _ _ _ _
mult_nan mode x y =
Bmult _ _ _ _
mult_nan mode y x.
Proof.
intros until y;
intros NAN.
unfold Bmult.
rewrite NAN.
f_equal.
destruct x as [
sx|
sx|
sx px Hx|
sx mx ex Hx];
destruct y as [
sy|
sy|
sy py Hy|
sy my ey Hy];
auto;
simpl;
try rewrite xorb_comm;
auto.
apply B2SF_inj.
rewrite 2!
B2SF_SF2B.
now rewrite xorb_comm,
Pos.mul_comm,
Zplus_comm.
Qed.
Lemma F2R_not0:
forall radix s m e,
F2R (
beta:=
radix) {|
Fnum :=
cond_Zopp s (
Z.pos m);
Fexp :=
e |} <> 0%
R.
Proof.
Lemma fully_known:
forall x y
(
xNAN :
is_nan prec emax x=
false)
(
yNAN :
is_nan prec emax y=
false)
(
Req :
B2R prec emax x =
B2R prec emax y)
(
Feq :
is_finite prec emax x =
is_finite prec emax y)
(
Seq :
Bsign prec emax x =
Bsign prec emax y),
x=
y.
Proof.
intros.
destruct x;
destruct y;
cbn in *;
try congruence.
1, 2:
exfalso;
apply (
F2R_not0 radix2 s0 m e);
congruence.
unfold bounded in *.
{
subst s0.
assert ((
Float radix2 (
cond_Zopp s (
Z.pos m))
e) =
(
Float radix2 (
cond_Zopp s (
Z.pos m0))
e1)).
{
eapply canonical_unique.
{
apply canonical_canonical_mantissa with (
prec :=
prec) (
emax :=
emax).
eapply andb_true_iff.
rewrite andb_comm.
exact e0.
}
{
apply canonical_canonical_mantissa with (
prec :=
prec) (
emax :=
emax).
eapply andb_true_iff.
rewrite andb_comm.
exact e2.
}
assumption.
}
destruct s;
cbn in H;
inversion H;
subst m0;
subst e1;
f_equal;
apply Eqdep_dec.UIP_dec;
decide equality.
}
Qed.
Lemma round_mode_IZR :
forall mode x,
round_mode mode (
IZR x) =
x.
Proof.
intros.
destruct mode;
cbn.
-
apply Znearest_IZR.
-
apply Ztrunc_IZR.
-
apply Zfloor_IZR.
-
apply Zceil_IZR.
-
apply Znearest_IZR.
Qed.
Lemma round_F2R:
forall mode s m e
(
BOUNDED :
bounded prec emax m e =
true),
round radix2 fexp (
round_mode mode)
(
F2R (
Float radix2 (
cond_Zopp s (
Z.pos m))
e)) =
(
F2R (
Float radix2 (
cond_Zopp s (
Z.pos m))
e)).
Proof.
intros.
unfold bounded in BOUNDED.
rewrite andb_true_iff in BOUNDED.
destruct BOUNDED as (
CANONICAL &
EMAX).
unfold canonical_mantissa in CANONICAL.
pose proof (
canonical_canonical_mantissa _ _
s _ _
CANONICAL)
as CANON.
unfold canonical in CANON.
simpl Fexp in CANON.
unfold round.
rewrite CANON.
unfold scaled_mantissa,
cexp.
rewrite mag_F2R;
cycle 1.
{
destruct s;
cbn;
intro ZERO;
lia. }
rewrite mag_F2R;
cycle 1.
{
destruct s;
cbn;
intro ZERO;
lia. }
set (
m' := (
cond_Zopp s (
Z.pos m))).
unfold F2R.
simpl Fnum.
simpl Fexp.
rewrite Rmult_assoc.
rewrite <-
bpow_plus.
rewrite bpow_powerRZ.
unfold cexp in CANON.
fold m' in CANON.
unfold F2R in CANON.
simpl Fnum in CANON.
simpl Fexp in CANON.
rewrite mag_mult_bpow in CANON;
cycle 1.
{
unfold m';
destruct s;
cbn;
intro ZERO;
apply eq_IZR in ZERO;
lia.
}
rewrite <-
CANON.
rewrite <-
CANON.
replace (
e + -
e)
with 0
by ring.
simpl.
rewrite <-
mult_IZR.
rewrite round_mode_IZR.
rewrite ->
mult_IZR.
ring.
Qed.
Lemma round_B2R:
forall mode x,
round radix2 (
FLT_exp (3 -
emax -
prec)
prec) (
round_mode mode)
(
B2R prec emax x) =
B2R prec emax x.
Proof.
intros.
destruct x.
1,2,3:
apply round_0;
apply valid_rnd_round_mode.
apply round_F2R.
assumption.
Qed.
Multiplication by 2 is diagonal addition.
Theorem Bmult2_Bplus:
forall plus_nan mult_nan mode (
f:
binary_float),
(
forall (
x y:
binary_float),
is_nan _ _
x =
true ->
is_finite _ _
y =
true ->
plus_nan x x =
mult_nan x y) ->
Bplus _ _ _ _
plus_nan mode f f =
Bmult _ _ _ _
mult_nan mode f (
BofZ 2%
Z).
Proof.
intros until f;
intros NAN.
destruct (
BofZ_representable 2)
as (
A &
B &
C).
apply (
integer_representable_2p 1).
red in prec_gt_0_,
prec_lt_emax_;
lia.
pose proof (
Bmult_correct _ _ _ _
mult_nan mode f (
BofZ 2%
Z)).
fold emin in H.
rewrite A,
B,
C in H.
rewrite xorb_false_r in H.
destruct (
is_finite _ _
f)
eqn:
FIN.
-
pose proof (
Bplus_correct _ _ _ _
plus_nan mode f f FIN FIN).
fold emin in H0.
assert (
EQ: (
B2R prec emax f *
IZR 2%
Z =
B2R prec emax f +
B2R prec emax f)%
R).
{
ring. }
rewrite <-
EQ in H0.
destruct Rlt_bool.
+
destruct H0 as (
P &
Q &
R).
destruct H as (
S &
T &
U).
apply B2R_Bsign_inj;
auto.
rewrite P,
S.
auto.
rewrite R,
U.
replace 0%
R with (0 * 2)%
R by ring.
rewrite Rcompare_mult_r.
rewrite andb_diag,
orb_diag.
destruct f as [
s|
s|
s p H|
s m e H];
try discriminate;
simpl.
rewrite Rcompare_Eq by auto.
destruct mode;
auto.
replace 0%
R with (@
F2R radix2 {|
Fnum := 0%
Z;
Fexp :=
e |}).
rewrite Rcompare_F2R.
destruct s;
auto.
unfold F2R.
simpl.
ring.
apply IZR_lt.
lia.
destruct (
Bmult prec emax _ _
mult_nan mode f (
BofZ 2));
reflexivity ||
discriminate.
+
destruct H0 as (
P &
Q).
apply B2FF_inj.
rewrite P,
H.
auto.
-
destruct f as [
sf|
sf|
sf pf Hf|
sf mf ef Hf];
try discriminate.
+
unfold Bplus.
simpl BinarySingleNaN.Bplus.
rewrite eqb_true.
destruct (
BofZ 2)
as [| | |
s2 m2 e2 H2]
eqn:
B2;
try discriminate;
simpl in *.
assert ((0 = 2)%
Z)
by (
apply eq_IZR;
auto).
discriminate.
subst s2.
unfold Bmult.
simpl.
rewrite xorb_false_r.
auto.
auto.
+
unfold Bplus,
Bmult.
rewrite <-
NAN by auto.
auto.
Qed.
Divisions that can be turned into multiplications by an inverse
Definition Bexact_inverse_mantissa :=
Z.iter (
prec - 1)
xO xH.
Remark Bexact_inverse_mantissa_value:
Zpos Bexact_inverse_mantissa = 2 ^ (
prec - 1).
Proof.
Remark Bexact_inverse_mantissa_digits2_pos:
Z.pos (
digits2_pos Bexact_inverse_mantissa) =
prec.
Proof.
Remark bounded_Bexact_inverse:
forall e,
emin <=
e <=
emax -
prec <->
bounded prec emax Bexact_inverse_mantissa e =
true.
Proof.
Program Definition Bexact_inverse (
f:
binary_float) :
option binary_float :=
match f with
|
B754_finite _ _
s m e B =>
if Pos.eq_dec m Bexact_inverse_mantissa then
let e' := -
e - (
prec - 1) * 2
in
if Z_le_dec emin e' then
if Z_le_dec e' emax then
Some(
B754_finite _ _
s m e' _)
else None else None else None
| _ =>
None
end.
Next Obligation.
Lemma Bexact_inverse_correct:
forall f f',
Bexact_inverse f =
Some f' ->
is_finite_strict _ _
f =
true
/\
is_finite_strict _ _
f' =
true
/\
B2R _ _
f' = (/
B2R _ _
f)%
R
/\
B2R _ _
f <> 0%
R
/\
Bsign _ _
f' =
Bsign _ _
f.
Proof with
(
try discriminate).
intros f f' EI.
unfold Bexact_inverse in EI.
destruct f as [
s|
s|
s p H|
s m e H]...
destruct (
Pos.eq_dec m Bexact_inverse_mantissa)...
set (
e' := -
e - (
prec - 1) * 2)
in *.
destruct (
Z_le_dec emin e')...
destruct (
Z_le_dec e' emax)...
inversion EI;
clear EI;
subst f' m.
split.
auto.
split.
auto.
split.
unfold B2R.
rewrite Bexact_inverse_mantissa_value.
unfold F2R;
simpl.
rewrite IZR_cond_Zopp.
rewrite <- !
cond_Ropp_mult_l.
red in prec_gt_0_.
replace (
IZR (2 ^ (
prec - 1)))
with (
bpow radix2 (
prec - 1))
by (
symmetry;
apply (
IZR_Zpower radix2);
lia).
rewrite <- !
bpow_plus.
replace (
prec - 1 +
e')
with (- (
prec - 1 +
e))
by (
unfold e';
lia).
rewrite bpow_opp.
unfold cond_Ropp;
destruct s;
auto.
field.
apply Rgt_not_eq.
apply bpow_gt_0.
split.
simpl.
apply F2R_neq_0.
destruct s;
simpl in H;
discriminate.
auto.
Qed.
Theorem Bdiv_mult_inverse:
forall div_nan mult_nan mode x y z,
(
forall (
x y z:
binary_float),
is_nan _ _
x =
true ->
is_finite _ _
y =
true ->
is_finite _ _
z =
true ->
div_nan x y =
mult_nan x z) ->
Bexact_inverse y =
Some z ->
Bdiv _ _ _ _
div_nan mode x y =
Bmult _ _ _ _
mult_nan mode x z.
Proof.
intros until z;
intros NAN;
intros.
destruct (
Bexact_inverse_correct _ _
H)
as (
A &
B &
C &
D &
E).
pose proof (
Bmult_correct _ _ _ _
mult_nan mode x z).
fold emin in H0.
fold fexp in H0.
pose proof (
Bdiv_correct _ _ _ _
div_nan mode x y D).
fold emin in H1.
fold fexp in H1.
unfold Rdiv in H1.
rewrite <-
C in H1.
destruct (
is_finite _ _
x)
eqn:
FINX.
-
destruct Rlt_bool.
+
destruct H0 as (
P &
Q &
R).
destruct H1 as (
S &
T &
U).
apply B2R_Bsign_inj;
auto.
rewrite Q.
simpl.
apply is_finite_strict_finite;
auto.
rewrite P,
S.
auto.
rewrite R,
U,
E.
auto.
apply is_finite_not_is_nan;
auto.
apply is_finite_not_is_nan.
rewrite Q.
simpl.
apply is_finite_strict_finite;
auto. +
apply B2FF_inj.
rewrite H0,
H1.
rewrite E.
auto.
-
destruct y;
try discriminate.
destruct z;
try discriminate.
destruct x;
try discriminate;
simpl.
+
simpl in E.
now rewrite E.
+
unfold Bdiv.
now rewrite (
NAN _ _ (
B754_finite prec emax s0 m0 e1 e2)).
Qed.
Conversion from scientific notation
Russian peasant exponentiation
Fixpoint pos_pow (
x y:
positive) :
positive :=
match y with
|
xH =>
x
|
xO y =>
Pos.square (
pos_pow x y)
|
xI y =>
Pos.mul x (
Pos.square (
pos_pow x y))
end.
Lemma pos_pow_spec:
forall x y,
Z.pos (
pos_pow x y) =
Z.pos x ^
Z.pos y.
Proof.
Given a base base, a mantissa m and an exponent e, the following function
computes the FP number closest to m * base ^ e, using round to odd, ties break to even.
The algorithm is naive, computing base ^ |e| exactly before doing a multiplication or
division with m. However, we treat specially very large or very small values of e,
when the result is known to be +infinity or 0.0 respectively.
Program Definition Bparse (
base:
positive) (
m:
positive) (
e:
Z):
binary_float :=
match e with
|
Z0 =>
BofZ (
Zpos m)
|
Zpos p =>
if e *
Z.log2 (
Zpos base) <?
emax
then BofZ (
Zpos m *
Zpos (
pos_pow base p))
else B754_infinity _ _
false
|
Zneg p =>
if e *
Z.log2 (
Zpos base) +
Z.log2_up (
Zpos m) <?
emin
then B754_zero _ _
false
else BSN2B' prec emax (
SF2B _ (
proj1 (
Bdiv_correct_aux prec emax _ _
mode_NE
false m Z0 false (
pos_pow base p)
Z0))) _
end.
Next Obligation.
destruct Bdiv_correct_aux as [
H1 H2].
rewrite is_nan_SF2B.
clear H1.
destruct SFdiv_core_binary as [[
mz ez]
lz].
destruct Rlt_bool.
destruct H2 as [_ [
H _]].
now destruct BinarySingleNaN.binary_round_aux.
simpl in H2.
rewrite H2.
apply is_nan_binary_overflow.
Qed.
Properties of Z.log2 and Z.log2_up.
Lemma Zpower_log:
forall (
base:
radix)
n,
0 <
n ->
2 ^ (
n *
Z.log2 base) <=
base ^
n <= 2 ^ (
n *
Z.log2_up base).
Proof.
Lemma bpow_log_pos:
forall (
base:
radix)
n,
0 <
n ->
(
bpow radix2 (
n *
Z.log2 base)%
Z <=
bpow base n)%
R.
Proof.
Lemma bpow_log_neg:
forall (
base:
radix)
n,
n < 0 ->
(
bpow base n <=
bpow radix2 (
n *
Z.log2 base)%
Z)%
R.
Proof.
intros.
set (
m := -
n).
replace n with (-
m)
by (
unfold m;
lia).
rewrite !
Z.mul_opp_l, !
bpow_opp.
apply Rinv_le.
apply bpow_gt_0.
apply bpow_log_pos.
unfold m;
lia.
Qed.
Overflow and underflow conditions.
Lemma round_integer_overflow:
forall (
base:
radix)
e m,
0 <
e ->
emax <=
e *
Z.log2 base ->
(
bpow radix2 emax <=
round radix2 fexp (
round_mode mode_NE) (
IZR (
Zpos m) *
bpow base e))%
R.
Proof.
intros.
rewrite <- (
round_generic radix2 fexp (
round_mode mode_NE) (
bpow radix2 emax));
auto.
apply round_le;
auto.
apply fexp_correct;
auto.
apply valid_rnd_round_mode.
rewrite <- (
Rmult_1_l (
bpow radix2 emax)).
apply Rmult_le_compat.
apply Rle_0_1.
apply bpow_ge_0.
apply IZR_le.
zify;
lia.
eapply Rle_trans.
eapply bpow_le.
eassumption.
apply bpow_log_pos;
auto.
apply generic_format_FLT.
exists (
Float radix2 1
emax).
unfold F2R;
simpl.
ring.
simpl.
apply (
Zpower_gt_1 radix2);
auto.
simpl.
unfold emin;
red in prec_gt_0_,
prec_lt_emax_;
lia.
Qed.
Lemma round_NE_underflows:
forall x,
(0 <=
x <=
bpow radix2 (
emin - 1))%
R ->
round radix2 fexp (
round_mode mode_NE)
x = 0%
R.
Proof.
intros.
set (
eps :=
bpow radix2 (
emin - 1))
in *.
assert (
A:
round radix2 fexp (
round_mode mode_NE)
eps = 0%
R).
{
unfold round.
simpl.
assert (
E:
cexp radix2 fexp eps =
emin).
{
unfold cexp,
eps.
rewrite mag_bpow.
unfold fexp,
FLT_exp.
zify;
red in prec_gt_0_;
lia. }
unfold scaled_mantissa;
rewrite E.
assert (
P: (
eps *
bpow radix2 (-
emin) = / 2)%
R).
{
unfold eps.
rewrite <-
bpow_plus.
replace (
emin - 1 + -
emin)
with (-1)
by lia.
auto. }
rewrite P.
unfold Znearest.
assert (
F:
Zfloor (/ 2)%
R = 0).
{
apply Zfloor_imp.
simpl.
lra. }
rewrite F.
rewrite Rminus_0_r.
rewrite Rcompare_Eq by auto.
simpl.
unfold F2R;
simpl.
apply Rmult_0_l.
}
apply Rle_antisym.
-
rewrite <-
A.
apply round_le.
apply fexp_correct;
auto.
apply valid_rnd_round_mode.
tauto.
-
rewrite <- (
round_0 radix2 fexp (
round_mode mode_NE)).
apply round_le.
apply fexp_correct;
auto.
apply valid_rnd_round_mode.
tauto.
Qed.
Lemma round_integer_underflow:
forall (
base:
radix)
e m,
e < 0 ->
e *
Z.log2 base +
Z.log2_up (
Zpos m) <
emin ->
round radix2 fexp (
round_mode mode_NE) (
IZR (
Zpos m) *
bpow base e) = 0%
R.
Proof.
Correctness of Bparse
Theorem Bparse_correct:
forall b m e (
BASE: 2 <=
Zpos b),
let base := {|
radix_val :=
Zpos b;
radix_prop :=
Zle_imp_le_bool _ _
BASE |}
in
let r :=
round radix2 fexp (
round_mode mode_NE) (
IZR (
Zpos m) *
bpow base e)
in
if Rlt_bool (
Rabs r) (
bpow radix2 emax)
then
B2R _ _ (
Bparse b m e) =
r
/\
is_finite _ _ (
Bparse b m e) =
true
/\
Bsign _ _ (
Bparse b m e) =
false
else
B2FF _ _ (
Bparse b m e) =
F754_infinity false.
Proof.
End Extra_ops.
Conversions between two FP formats
Section Conversions.
Variable prec1 emax1 prec2 emax2 :
Z.
Context (
prec1_gt_0_ :
Prec_gt_0 prec1) (
prec2_gt_0_ :
Prec_gt_0 prec2).
Let emin1 := (3 -
emax1 -
prec1)%
Z.
Let fexp1 :=
FLT_exp emin1 prec1.
Let emin2 := (3 -
emax2 -
prec2)%
Z.
Let fexp2 :=
FLT_exp emin2 prec2.
Hypothesis Hmax1 : (
prec1 <
emax1)%
Z.
Hypothesis Hmax2 : (
prec2 <
emax2)%
Z.
Let binary_float1 :=
binary_float prec1 emax1.
Let binary_float2 :=
binary_float prec2 emax2.
Definition Bconv (
conv_nan:
binary_float1 -> {
x |
is_nan prec2 emax2 x =
true}) (
md:
mode) (
f:
binary_float1) :
binary_float2 :=
match f with
|
B754_nan _ _ _ _ _ =>
build_nan prec2 emax2 (
conv_nan f)
|
B754_infinity _ _
s =>
B754_infinity _ _
s
|
B754_zero _ _
s =>
B754_zero _ _
s
|
B754_finite _ _
s m e _ =>
binary_normalize _ _ _
Hmax2 md (
cond_Zopp s (
Zpos m))
e s
end.
Theorem Bconv_correct:
forall conv_nan m f,
is_finite _ _
f =
true ->
if Rlt_bool (
Rabs (
round radix2 fexp2 (
round_mode m) (
B2R _ _
f))) (
bpow radix2 emax2)
then
B2R _ _ (
Bconv conv_nan m f) =
round radix2 fexp2 (
round_mode m) (
B2R _ _
f)
/\
is_finite _ _ (
Bconv conv_nan m f) =
true
/\
Bsign _ _ (
Bconv conv_nan m f) =
Bsign _ _
f
else
B2FF _ _ (
Bconv conv_nan m f) =
binary_overflow prec2 emax2 m (
Bsign _ _
f).
Proof.
intros.
destruct f as [
sf|
sf|
sf pf Hf|
sf mf ef Hf];
try discriminate.
-
simpl.
rewrite round_0.
rewrite Rabs_R0.
rewrite Rlt_bool_true.
auto.
apply bpow_gt_0.
apply valid_rnd_round_mode.
-
generalize (
binary_normalize_correct _ _ _
Hmax2 m (
cond_Zopp sf (
Zpos mf))
ef sf).
fold emin2;
fold fexp2.
simpl.
destruct Rlt_bool.
+
intros (
A &
B &
C).
split.
auto.
split.
auto.
rewrite C.
destruct sf;
simpl.
rewrite Rcompare_Lt.
auto.
apply F2R_lt_0.
simpl.
compute;
auto.
rewrite Rcompare_Gt.
auto.
apply F2R_gt_0.
simpl.
compute;
auto.
+
intros A.
rewrite A.
f_equal.
destruct sf.
apply Rlt_bool_true.
apply F2R_lt_0.
simpl.
compute;
auto.
apply Rlt_bool_false.
apply Rlt_le.
apply Rgt_lt.
apply F2R_gt_0.
simpl.
compute;
auto.
Qed.
Converting a finite FP number to higher or equal precision preserves its value.
Theorem Bconv_widen_exact:
(
prec2 >=
prec1)%
Z -> (
emax2 >=
emax1)%
Z ->
forall conv_nan m f,
is_finite _ _
f =
true ->
B2R _ _ (
Bconv conv_nan m f) =
B2R _ _
f
/\
is_finite _ _ (
Bconv conv_nan m f) =
true
/\
Bsign _ _ (
Bconv conv_nan m f) =
Bsign _ _
f.
Proof.
intros PREC EMAX;
intros.
generalize (
Bconv_correct conv_nan m f H).
assert (
LT: (
Rabs (
B2R _ _
f) <
bpow radix2 emax2)%
R).
{
destruct f;
try discriminate;
simpl.
rewrite Rabs_R0.
apply bpow_gt_0.
apply Rlt_le_trans with (
bpow radix2 emax1).
rewrite F2R_cond_Zopp.
rewrite abs_cond_Ropp.
rewrite <-
F2R_Zabs.
simpl Z.abs.
eapply bounded_lt_emax;
eauto.
apply bpow_le.
lia.
}
assert (
EQ:
round radix2 fexp2 (
round_mode m) (
B2R prec1 emax1 f) =
B2R prec1 emax1 f).
{
apply round_generic.
apply valid_rnd_round_mode.
eapply generic_inclusion_le.
5:
apply generic_format_B2R.
apply fexp_correct;
auto.
apply fexp_correct;
auto.
instantiate (1 :=
emax2).
intros.
unfold fexp,
fexp2,
FLT_exp.
unfold emin,
emin2.
lia.
apply Rlt_le;
auto.
}
rewrite EQ.
rewrite Rlt_bool_true by auto.
auto.
Qed.
Conversion from integers and change of format
Theorem Bconv_BofZ:
forall conv_nan n,
integer_representable prec1 emax1 n ->
Bconv conv_nan mode_NE (
BofZ prec1 emax1 _
Hmax1 n) =
BofZ prec2 emax2 _
Hmax2 n.
Proof.
intros.
destruct (
BofZ_representable _ _ _
Hmax1 n H)
as (
A &
B &
C).
set (
f :=
BofZ prec1 emax1 prec1_gt_0_ Hmax1 n)
in *.
generalize (
Bconv_correct conv_nan mode_NE f B).
unfold BofZ.
generalize (
binary_normalize_correct _ _ _
Hmax2 mode_NE n 0
false).
fold emin2;
fold fexp2.
rewrite A.
replace (
F2R {|
Fnum :=
n;
Fexp := 0 |})
with (
IZR n).
destruct Rlt_bool.
-
intros (
P &
Q &
R) (
D &
E &
F).
apply B2R_Bsign_inj;
auto.
now rewrite P,
D.
rewrite F,
C,
R.
rewrite Rcompare_IZR.
unfold Z.ltb.
auto.
-
intros P Q.
apply B2FF_inj.
rewrite P,
Q.
rewrite C.
f_equal.
generalize (
Zlt_bool_spec n 0);
intros LT;
inversion LT.
rewrite Rlt_bool_true;
auto.
apply IZR_lt;
auto.
rewrite Rlt_bool_false;
auto.
apply IZR_le;
auto.
-
unfold F2R;
simpl.
rewrite Rmult_1_r.
auto.
Qed.
Change of format (to higher precision) and conversion to integer.
Theorem ZofB_Bconv:
prec2 >=
prec1 ->
emax2 >=
emax1 ->
forall conv_nan m f n,
ZofB _ _
f =
Some n ->
ZofB _ _ (
Bconv conv_nan m f) =
Some n.
Proof.
intros.
rewrite ZofB_correct in H1.
destruct (
is_finite _ _
f)
eqn:
FIN;
inversion H1.
destruct (
Bconv_widen_exact H H0 conv_nan m f)
as (
A &
B &
C).
auto.
rewrite ZofB_correct.
rewrite B.
rewrite A.
auto.
Qed.
Theorem ZofB_range_Bconv:
forall min1 max1 min2 max2,
prec2 >=
prec1 ->
emax2 >=
emax1 ->
min2 <=
min1 ->
max1 <=
max2 ->
forall conv_nan m f n,
ZofB_range _ _
f min1 max1 =
Some n ->
ZofB_range _ _ (
Bconv conv_nan m f)
min2 max2 =
Some n.
Proof.
Change of format (to higher precision) and comparison.
Theorem Bcompare_Bconv_widen:
prec2 >=
prec1 ->
emax2 >=
emax1 ->
forall conv_nan m x y,
Bcompare _ _ (
Bconv conv_nan m x) (
Bconv conv_nan m y) =
Bcompare _ _
x y.
Proof.
intros.
destruct (
is_finite _ _
x &&
is_finite _ _
y)
eqn:
FIN.
-
apply andb_true_iff in FIN.
destruct FIN.
destruct (
Bconv_widen_exact H H0 conv_nan m x H1)
as (
A &
B &
C).
destruct (
Bconv_widen_exact H H0 conv_nan m y H2)
as (
D &
E &
F).
rewrite !
Bcompare_correct by auto.
rewrite A,
D.
auto.
-
generalize (
Bconv_widen_exact H H0 conv_nan m x)
(
Bconv_widen_exact H H0 conv_nan m y);
intros P Q.
destruct x as [
sx|
sx|
sx px Hx|
sx mx ex Hx],
y as [
sy|
sy|
sy py Hy|
sy my ey Hy];
try discriminate;
simpl in P,
Q;
simpl;
repeat (
match goal with |-
context [
conv_nan ?
b ?
pl] =>
destruct (
conv_nan b pl)
end);
auto.
destruct Q as (
D &
E &
F);
auto.
now destruct binary_normalize.
destruct P as (
A &
B &
C);
auto.
now destruct binary_normalize.
destruct P as (
A &
B &
C);
auto.
now destruct binary_normalize.
Qed.
End Conversions.
Section Compose_Conversions.
Variable prec1 emax1 prec2 emax2 :
Z.
Context (
prec1_gt_0_ :
Prec_gt_0 prec1) (
prec2_gt_0_ :
Prec_gt_0 prec2).
Let emin1 := (3 -
emax1 -
prec1)%
Z.
Let fexp1 :=
FLT_exp emin1 prec1.
Let emin2 := (3 -
emax2 -
prec2)%
Z.
Let fexp2 :=
FLT_exp emin2 prec2.
Hypothesis Hmax1 : (
prec1 <
emax1)%
Z.
Hypothesis Hmax2 : (
prec2 <
emax2)%
Z.
Let binary_float1 :=
binary_float prec1 emax1.
Let binary_float2 :=
binary_float prec2 emax2.
Converting to a higher precision then down to the original format
is the identity.
Theorem Bconv_narrow_widen:
prec2 >=
prec1 ->
emax2 >=
emax1 ->
forall narrow_nan widen_nan m f,
is_nan _ _
f =
false ->
Bconv prec2 emax2 prec1 emax1 _
Hmax1 narrow_nan m (
Bconv prec1 emax1 prec2 emax2 _
Hmax2 widen_nan m f) =
f.
Proof.
intros.
destruct (
is_finite _ _
f)
eqn:
FIN.
-
assert (
EQ:
round radix2 fexp1 (
round_mode m) (
B2R prec1 emax1 f) =
B2R prec1 emax1 f).
{
apply round_generic.
apply valid_rnd_round_mode.
apply generic_format_B2R. }
generalize (
Bconv_widen_exact _ _ _ _ _ _
Hmax2 H H0 widen_nan m f FIN).
set (
f' :=
Bconv prec1 emax1 prec2 emax2 _
Hmax2 widen_nan m f).
intros (
A &
B &
C).
generalize (
Bconv_correct _ _ _ _ _
Hmax1 narrow_nan m f' B).
fold emin1.
fold fexp1.
rewrite A,
C,
EQ.
rewrite Rlt_bool_true.
intros (
D &
E &
F).
apply B2R_Bsign_inj;
auto.
destruct f;
try discriminate;
simpl.
rewrite Rabs_R0.
apply bpow_gt_0.
rewrite F2R_cond_Zopp.
rewrite abs_cond_Ropp.
rewrite <-
F2R_Zabs.
simpl Z.abs.
eapply bounded_lt_emax;
eauto.
-
destruct f;
try discriminate.
simpl.
auto.
Qed.
End Compose_Conversions.