Browse Source

minor performance improvment and bug fix

ipatix 8 years ago
parent
commit
c91f119764
1 changed files with 117 additions and 45 deletions
  1. 117
    45
      src/music/main_mixer.S

+ 117
- 45
src/music/main_mixer.S View File

@@ -442,7 +442,7 @@ hq_buffer_length:     @ TODO: Replace with variable on stack
442 442
 C_mixing_setup:
443 443
         /* frequency and mixing loading routine */
444 444
         LDR     R8, hq_buffer_length
445
-        ORRS    R11, R10, R11, LSL#16           @ R11 = 00RR00LL
445
+        ORRS    R11, R11, R10, LSL#16           @ R11 = 00LL00RR
446 446
         BEQ     C_mixing_epilogue               @ volume #0 --> branch and skip channel processing
447 447
         /* normal processing otherwise */
448 448
         TST     R0, #MODE_FIXED_FREQ
@@ -868,60 +868,132 @@ C_main_mixer_return:
868 868
     .align  2
869 869
 
870 870
 C_downsampler:
871
-    MOV     R4, #0
872
-    MOV     R5, #0
871
+    LDR     R4, =0xFFF80035
872
+    LDR     R5, =0x0034FFF8
873 873
     LDR     R8, hq_buffer_length
874 874
     LDR     R9, [SP, #ARG_BUFFER_POS]
875 875
     LDR     R10, hq_buffer
876
-    MOV     R11, #0xFF
877
-    MOV     R12, #0xFFFFFFFF
878
-    MOV     R12, R12, LSL#14
879
-    MOV     R7, #0x630
876
+    MOV     R11, #0xFF000000
880 877
 
881 878
 C_downsampler_loop:
882
-        LDRSH   R2, [R10]
883
-        LDRSH   R0, [R10, #2]
884
-        LDRSH   R3, [R10, #4]
885
-        LDRSH   R1, [R10, #6]
886
-        STMIA   R10!, {R4, R5}
887
-
888
-        CMP     R0, #0x4000
889
-        MOVGE   R0, #0x3F80
890
-        CMP     R0, #-0x4000
891
-        MOVLT   R0, R12
892
-
893
-        CMP     R1, #0x4000
894
-        MOVGE   R1, #0x3F80
895
-        CMP     R1, #-0x4000
896
-        MOVLT   R1, R12
897
-
898
-        CMP     R2, #0x4000
899
-        MOVGE   R2, #0x3F80
900
-        CMP     R2, #-0x4000
901
-        MOVLT   R2, R12
902
-
903
-        CMP     R3, #0x4000
904
-        MOVGE   R3, #0x3F80
905
-        CMP     R3, #-0x4000
906
-        MOVLT   R3, R12
907
-
908
-        AND     R0, R11, R0, ASR#7
909
-        AND     R1, R11, R1, ASR#7
910
-        AND     R2, R11, R2, ASR#7
911
-        AND     R3, R11, R3, ASR#7
912
-
913
-        ORR     R2, R2, R3, LSL#8
914
-        ORR     R0, R0, R1, LSL#8
915
-
916
-        STRH    R2, [R9, R7]
917
-        STRH    R0, [R9], #2
918
-
919
-        SUBS    R8, #2
879
+        LDMIA   R10, {R0, R1, R2, R3}
880
+
881
+        MOV     R12, R0             @ left sample #1
882
+        CMP     R12, #0x40000000
883
+        MOVGE   R12, #0x3F800000
884
+        CMP     R12, #-0x40000000
885
+        MOVLT   R12, #-0x40000000
886
+        AND     R6, R11, R12, LSL#1
887
+
888
+        MOV     R0, R0, LSL#16      @ right
889
+        CMP     R0, #0x40000000
890
+        MOVGE   R0, #0x3F800000
891
+        CMP     R0, #-0x40000000
892
+        MOVLT   R0, #-0x40000000
893
+        AND     R7, R11, R0, LSL#1
894
+
895
+        MOV     R12, R1             @ left sample #2
896
+        CMP     R12, #0x40000000
897
+        MOVGE   R12, #0x3F800000
898
+        CMP     R12, #-0x40000000
899
+        MOVLT   R12, #-0x40000000
900
+        AND     R12, R11, R12, LSL#1
901
+        ORR     R6, R12, R6, LSR#8
902
+
903
+        MOV     R1, R1, LSL#16      @ right
904
+        CMP     R1, #0x40000000
905
+        MOVGE   R1, #0x3F800000
906
+        CMP     R1, #-0x40000000
907
+        MOVLT   R1, #-0x40000000
908
+        AND     R1, R11, R1, LSL#1
909
+        ORR     R7, R1, R7, LSR#8
910
+
911
+        MOV     R12, R2             @ left sample #3
912
+        CMP     R12, #0x40000000
913
+        MOVGE   R12, #0x3F800000
914
+        CMP     R12, #-0x40000000
915
+        MOVLT   R12, #-0x40000000
916
+        AND     R12, R11, R12, LSL#1
917
+        ORR     R6, R12, R6, LSR#8
918
+
919
+        MOV     R2, R2, LSL#16      @ right
920
+        CMP     R2, #0x40000000
921
+        MOVGE   R2, #0x3F800000
922
+        CMP     R2, #-0x40000000
923
+        MOVLT   R2, #-0x40000000
924
+        AND     R2, R11, R2, LSL#1
925
+        ORR     R7, R2, R7, LSR#8
926
+
927
+        MOV     R12, R3             @ left sample #4
928
+        CMP     R12, #0x40000000
929
+        MOVGE   R12, #0x3F800000
930
+        CMP     R12, #-0x40000000
931
+        MOVLT   R12, #-0x40000000
932
+        AND     R12, R11, R12, LSL#1
933
+        ORR     R6, R12, R6, LSR#8
934
+
935
+        MOV     R3, R3, LSL#16      @ right
936
+        CMP     R3, #0x40000000
937
+        MOVGE   R3, #0x3F800000
938
+        CMP     R3, #-0x40000000
939
+        MOVLT   R3, #-0x40000000
940
+        AND     R3, R11, R3, LSL#1
941
+        ORR     R7, R3, R7, LSR#8
942
+
943
+        @LDR     R12, [R9, #0x630]    @ left
944
+        STR     R6, [R9, #0x630]
945
+        @LDR     R6, [R9]            @ right
946
+        STR     R7, [R9], #4
947
+
948
+        @MOVS    R7, R12, ASR#24
949
+        @ADDMI   R7, R7, #1
950
+        @MUL     R3, R4, R7
951
+        @MOVS    R7, R6, ASR#24
952
+        @ADDMI   R7, R7, #1
953
+        @MLA     R3, R5, R7, R3
954
+        @MOV     R12, R12, LSL#8
955
+        @MOV     R6, R6, LSL#8
956
+
957
+        @MOVS    R7, R12, ASR#24
958
+        @ADDMI   R7, R7, #1
959
+        @MUL     R2, R4, R7
960
+        @MOVS    R7, R6, ASR#24
961
+        @ADDMI   R7, R7, #1
962
+        @MLA     R2, R5, R7, R2
963
+        @MOV     R12, R12, LSL#8
964
+        @MOV     R6, R6, LSL#8
965
+
966
+        @MOVS    R7, R12, ASR#24
967
+        @ADDMI   R7, R7, #1
968
+        @MUL     R1, R4, R7
969
+        @MOVS    R7, R6, ASR#24
970
+        @ADDMI   R7, R7, #1
971
+        @MLA     R1, R5, R7, R1
972
+        @MOV     R12, R12, LSL#8
973
+        @MOV     R6, R6, LSL#8
974
+
975
+        @MOVS    R7, R12, ASR#24
976
+        @ADDMI   R7, R7, #1
977
+        @MUL     R0, R4, R7
978
+        @MOVS    R7, R6, ASR#24
979
+        @ADDMI   R7, R7, #1
980
+        @MLA     R0, R5, R7, R0
981
+        MOV     R0, #0
982
+        MOV     R1, #0
983
+        MOV     R2, #0
984
+        MOV     R3, #0
985
+
986
+        STMIA   R10!, {R0, R1, R2, R3}
987
+
988
+        SUBS    R8, #4
920 989
         BGT     C_downsampler_loop
921 990
 
922 991
     ADR     R0, (C_downsampler_return+1)
923 992
     BX      R0
924 993
 
994
+    .pool
995
+
996
+    .align  1
925 997
     .thumb
926 998
 
927 999
 C_downsampler_return: