@@ -883,6 +883,165 @@ def test_InverseSquareRootAnnealing(self):
883
883
884
884
assert final_lr == self .MIN_LR
885
885
886
+ class TestWarmupHoldAnnealSchedulers :
887
+ INITIAL_LR = 0.1
888
+ MIN_LR = 0.01
889
+ MAX_STEPS = 100
890
+
891
+ @pytest .mark .unit
892
+ def test_WarmupHoldAnnealOneMinusSquareRoot (self ):
893
+ model = TempModel ()
894
+ opt_cls = optim .get_optimizer ('novograd' )
895
+ opt = opt_cls (model .parameters (), lr = self .INITIAL_LR )
896
+
897
+ # Test case 1: No warmup, no hold
898
+ policy = optim .lr_scheduler .WarmupHoldAnnealOneMinusSquareRoot (
899
+ opt ,
900
+ warmup_ratio = None ,
901
+ hold_ratio = None ,
902
+ max_steps = self .MAX_STEPS ,
903
+ min_lr = self .MIN_LR
904
+ )
905
+ initial_lr = policy .get_last_lr ()[0 ]
906
+ assert initial_lr == self .INITIAL_LR
907
+
908
+ # Simulate training steps
909
+ lrs = []
910
+ for i in range (self .MAX_STEPS ):
911
+ current_lr = policy .get_last_lr ()[0 ]
912
+ lrs .append (current_lr )
913
+ assert current_lr <= self .INITIAL_LR
914
+ opt .step ()
915
+ policy .step ()
916
+
917
+ # Check final LR
918
+ policy .step ()
919
+ final_lr = policy .get_last_lr ()[0 ]
920
+ assert final_lr == self .MIN_LR
921
+
922
+ # Test case 2: With warmup and hold
923
+ warmup_ratio = 0.1 # 10% warmup
924
+ hold_ratio = 0.2 # 20% hold
925
+ warmup_steps = int (warmup_ratio * self .MAX_STEPS )
926
+ hold_steps = int (hold_ratio * self .MAX_STEPS )
927
+
928
+ policy = optim .lr_scheduler .WarmupHoldAnnealOneMinusSquareRoot (
929
+ opt ,
930
+ warmup_ratio = warmup_ratio ,
931
+ hold_ratio = hold_ratio ,
932
+ max_steps = self .MAX_STEPS ,
933
+ min_lr = self .MIN_LR
934
+ )
935
+
936
+ initial_lr = policy .get_last_lr ()[0 ]
937
+ assert initial_lr < self .INITIAL_LR # Should start at a lower LR
938
+
939
+ # Simulate training steps
940
+ lrs = []
941
+ for i in range (self .MAX_STEPS ):
942
+ current_lr = policy .get_last_lr ()[0 ]
943
+ lrs .append (current_lr )
944
+
945
+ # During warmup, LR should increase
946
+ if i < warmup_steps :
947
+ if i > 0 :
948
+ assert current_lr >= lrs [i - 1 ]
949
+ assert current_lr <= self .INITIAL_LR
950
+
951
+ # During hold, LR should remain constant
952
+ elif i < warmup_steps + hold_steps :
953
+ assert abs (current_lr - self .INITIAL_LR ) < 1e-6
954
+
955
+ # During annealing, LR should decrease
956
+ else :
957
+ if i > warmup_steps + hold_steps :
958
+ assert current_lr <= lrs [i - 1 ]
959
+
960
+ opt .step ()
961
+ policy .step ()
962
+
963
+ # Check final LR
964
+ policy .step ()
965
+ final_lr = policy .get_last_lr ()[0 ]
966
+ assert final_lr == self .MIN_LR
967
+
968
+ @pytest .mark .unit
969
+ def test_WarmupHoldAnnealLinear (self ):
970
+ model = TempModel ()
971
+ opt_cls = optim .get_optimizer ('novograd' )
972
+ opt = opt_cls (model .parameters (), lr = self .INITIAL_LR )
973
+
974
+ # Test case 1: No warmup, no hold
975
+ policy = optim .lr_scheduler .WarmupHoldAnnealLinear (
976
+ opt ,
977
+ warmup_ratio = None ,
978
+ hold_ratio = None ,
979
+ max_steps = self .MAX_STEPS ,
980
+ min_lr = self .MIN_LR
981
+ )
982
+ initial_lr = policy .get_last_lr ()[0 ]
983
+ assert initial_lr == self .INITIAL_LR
984
+
985
+ # Simulate training steps
986
+ lrs = []
987
+ for i in range (self .MAX_STEPS ):
988
+ current_lr = policy .get_last_lr ()[0 ]
989
+ lrs .append (current_lr )
990
+ assert current_lr <= self .INITIAL_LR
991
+ opt .step ()
992
+ policy .step ()
993
+
994
+ # Check final LR
995
+ policy .step ()
996
+ final_lr = policy .get_last_lr ()[0 ]
997
+ assert final_lr == self .MIN_LR
998
+
999
+ # Test case 2: With warmup and hold
1000
+ warmup_ratio = 0.1 # 10% warmup
1001
+ hold_ratio = 0.2 # 20% hold
1002
+ warmup_steps = int (warmup_ratio * self .MAX_STEPS )
1003
+ hold_steps = int (hold_ratio * self .MAX_STEPS )
1004
+
1005
+ policy = optim .lr_scheduler .WarmupHoldAnnealLinear (
1006
+ opt ,
1007
+ warmup_ratio = warmup_ratio ,
1008
+ hold_ratio = hold_ratio ,
1009
+ max_steps = self .MAX_STEPS ,
1010
+ min_lr = self .MIN_LR
1011
+ )
1012
+
1013
+ initial_lr = policy .get_last_lr ()[0 ]
1014
+ assert initial_lr < self .INITIAL_LR # Should start at a lower LR
1015
+
1016
+ # Simulate training steps
1017
+ lrs = []
1018
+ for i in range (self .MAX_STEPS ):
1019
+ current_lr = policy .get_last_lr ()[0 ]
1020
+ lrs .append (current_lr )
1021
+
1022
+ # During warmup, LR should increase
1023
+ if i < warmup_steps :
1024
+ if i > 0 :
1025
+ assert current_lr >= lrs [i - 1 ]
1026
+ assert current_lr <= self .INITIAL_LR
1027
+
1028
+ # During hold, LR should remain constant
1029
+ elif i < warmup_steps + hold_steps :
1030
+ assert abs (current_lr - self .INITIAL_LR ) < 1e-6
1031
+
1032
+ # During annealing, LR should decrease
1033
+ else :
1034
+ if i > warmup_steps + hold_steps :
1035
+ assert current_lr <= lrs [i - 1 ]
1036
+
1037
+ opt .step ()
1038
+ policy .step ()
1039
+
1040
+ # Check final LR
1041
+ policy .step ()
1042
+ final_lr = policy .get_last_lr ()[0 ]
1043
+ assert final_lr == self .MIN_LR
1044
+
886
1045
@pytest .mark .unit
887
1046
def test_CosineAnnealing_with_noop_steps (self ):
888
1047
model = TempModel ()
0 commit comments