| 1 | --- a/gcc/config/arm/arm-cores.def |
| 2 | +++ b/gcc/config/arm/arm-cores.def |
| 3 | @@ -74,6 +74,7 @@ ARM_CORE("strongarm", strongarm, 4, |
| 4 | ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) |
| 5 | ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) |
| 6 | ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) |
| 7 | +ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul) |
| 8 | |
| 9 | /* V4T Architecture Processors */ |
| 10 | ARM_CORE("arm7tdmi", arm7tdmi, 4T, FL_CO_PROC , fastmul) |
| 11 | --- a/gcc/config/arm/arm.md |
| 12 | +++ b/gcc/config/arm/arm.md |
| 13 | @@ -417,7 +417,7 @@ |
| 14 | |
| 15 | (define_attr "generic_sched" "yes,no" |
| 16 | (const (if_then_else |
| 17 | - (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9") |
| 18 | + (ior (eq_attr "tune" "fa526,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4") |
| 19 | (eq_attr "tune_cortexr4" "yes")) |
| 20 | (const_string "no") |
| 21 | (const_string "yes")))) |
| 22 | @@ -435,6 +435,7 @@ |
| 23 | (include "arm1020e.md") |
| 24 | (include "arm1026ejs.md") |
| 25 | (include "arm1136jfs.md") |
| 26 | +(include "fa526.md") |
| 27 | (include "cortex-a8.md") |
| 28 | (include "cortex-a9.md") |
| 29 | (include "cortex-r4.md") |
| 30 | --- a/gcc/config/arm/arm-tune.md |
| 31 | +++ b/gcc/config/arm/arm-tune.md |
| 32 | @@ -1,5 +1,5 @@ |
| 33 | ;; -*- buffer-read-only: t -*- |
| 34 | ;; Generated automatically by gentune.sh from arm-cores.def |
| 35 | (define_attr "tune" |
| 36 | - "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1,cortexm0" |
| 37 | + "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0" |
| 38 | (const (symbol_ref "((enum attr_tune) arm_tune)"))) |
| 39 | --- a/gcc/config/arm/bpabi.h |
| 40 | +++ b/gcc/config/arm/bpabi.h |
| 41 | @@ -52,7 +52,8 @@ |
| 42 | /* The BPABI integer comparison routines return { -1, 0, 1 }. */ |
| 43 | #define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI |
| 44 | |
| 45 | -#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}" |
| 46 | +#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*\ |
| 47 | +|march=armv4|mcpu=fa526:--fix-v4bx}" |
| 48 | |
| 49 | #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5|mcpu=cortex-a8|mcpu=cortex-a9:%{!r:--be8}}}" |
| 50 | |
| 51 | --- /dev/null |
| 52 | +++ b/gcc/config/arm/fa526.md |
| 53 | @@ -0,0 +1,161 @@ |
| 54 | +;; Faraday FA526 Pipeline Description |
| 55 | +;; Copyright (C) 2010 Free Software Foundation, Inc. |
| 56 | +;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description. |
| 57 | + |
| 58 | +;; This file is part of GCC. |
| 59 | +;; |
| 60 | +;; GCC is free software; you can redistribute it and/or modify it under |
| 61 | +;; the terms of the GNU General Public License as published by the Free |
| 62 | +;; Software Foundation; either version 3, or (at your option) any later |
| 63 | +;; version. |
| 64 | +;; |
| 65 | +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| 66 | +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 67 | +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 68 | +;; for more details. |
| 69 | +;; |
| 70 | +;; You should have received a copy of the GNU General Public License |
| 71 | +;; along with GCC; see the file COPYING3. If not see |
| 72 | +;; <http://www.gnu.org/licenses/>. */ |
| 73 | + |
| 74 | +;; These descriptions are based on the information contained in the |
| 75 | +;; FA526 Core Design Note, Copyright (c) 2010 Faraday Technology Corp. |
| 76 | +;; |
| 77 | +;; Modeled pipeline characteristics: |
| 78 | +;; LD -> any use: latency = 3 (2 cycle penalty). |
| 79 | +;; ALU -> any use: latency = 2 (1 cycle penalty). |
| 80 | + |
| 81 | +;; This automaton provides a pipeline description for the Faraday |
| 82 | +;; FA526 core. |
| 83 | +;; |
| 84 | +;; The model given here assumes that the condition for all conditional |
| 85 | +;; instructions is "true", i.e., that all of the instructions are |
| 86 | +;; actually executed. |
| 87 | + |
| 88 | +(define_automaton "fa526") |
| 89 | + |
| 90 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 91 | +;; Pipelines |
| 92 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 93 | + |
| 94 | +;; There is a single pipeline |
| 95 | +;; |
| 96 | +;; The ALU pipeline has fetch, decode, execute, memory, and |
| 97 | +;; write stages. We only need to model the execute, memory and write |
| 98 | +;; stages. |
| 99 | + |
| 100 | +;; S E M W |
| 101 | + |
| 102 | +(define_cpu_unit "fa526_core" "fa526") |
| 103 | + |
| 104 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 105 | +;; ALU Instructions |
| 106 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 107 | + |
| 108 | +;; ALU instructions require two cycles to execute, and use the ALU |
| 109 | +;; pipeline in each of the three stages. The results are available |
| 110 | +;; after the execute stage stage has finished. |
| 111 | +;; |
| 112 | +;; If the destination register is the PC, the pipelines are stalled |
| 113 | +;; for several cycles. That case is not modeled here. |
| 114 | + |
| 115 | +;; ALU operations |
| 116 | +(define_insn_reservation "526_alu_op" 1 |
| 117 | + (and (eq_attr "tune" "fa526") |
| 118 | + (eq_attr "type" "alu")) |
| 119 | + "fa526_core") |
| 120 | + |
| 121 | +(define_insn_reservation "526_alu_shift_op" 2 |
| 122 | + (and (eq_attr "tune" "fa526") |
| 123 | + (eq_attr "type" "alu_shift,alu_shift_reg")) |
| 124 | + "fa526_core") |
| 125 | + |
| 126 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 127 | +;; Multiplication Instructions |
| 128 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 129 | + |
| 130 | +(define_insn_reservation "526_mult1" 2 |
| 131 | + (and (eq_attr "tune" "fa526") |
| 132 | + (eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy")) |
| 133 | + "fa526_core") |
| 134 | + |
| 135 | +(define_insn_reservation "526_mult2" 5 |
| 136 | + (and (eq_attr "tune" "fa526") |
| 137 | + (eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\ |
| 138 | + umlals,smulls,smlals,smlawx")) |
| 139 | + "fa526_core*4") |
| 140 | + |
| 141 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 142 | +;; Load/Store Instructions |
| 143 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 144 | + |
| 145 | +;; The models for load/store instructions do not accurately describe |
| 146 | +;; the difference between operations with a base register writeback |
| 147 | +;; (such as "ldm!"). These models assume that all memory references |
| 148 | +;; hit in dcache. |
| 149 | + |
| 150 | +(define_insn_reservation "526_load1_op" 3 |
| 151 | + (and (eq_attr "tune" "fa526") |
| 152 | + (eq_attr "type" "load1,load_byte")) |
| 153 | + "fa526_core") |
| 154 | + |
| 155 | +(define_insn_reservation "526_load2_op" 4 |
| 156 | + (and (eq_attr "tune" "fa526") |
| 157 | + (eq_attr "type" "load2")) |
| 158 | + "fa526_core*2") |
| 159 | + |
| 160 | +(define_insn_reservation "526_load3_op" 5 |
| 161 | + (and (eq_attr "tune" "fa526") |
| 162 | + (eq_attr "type" "load3")) |
| 163 | + "fa526_core*3") |
| 164 | + |
| 165 | +(define_insn_reservation "526_load4_op" 6 |
| 166 | + (and (eq_attr "tune" "fa526") |
| 167 | + (eq_attr "type" "load4")) |
| 168 | + "fa526_core*4") |
| 169 | + |
| 170 | +(define_insn_reservation "526_store1_op" 0 |
| 171 | + (and (eq_attr "tune" "fa526") |
| 172 | + (eq_attr "type" "store1")) |
| 173 | + "fa526_core") |
| 174 | + |
| 175 | +(define_insn_reservation "526_store2_op" 1 |
| 176 | + (and (eq_attr "tune" "fa526") |
| 177 | + (eq_attr "type" "store2")) |
| 178 | + "fa526_core*2") |
| 179 | + |
| 180 | +(define_insn_reservation "526_store3_op" 2 |
| 181 | + (and (eq_attr "tune" "fa526") |
| 182 | + (eq_attr "type" "store3")) |
| 183 | + "fa526_core*3") |
| 184 | + |
| 185 | +(define_insn_reservation "526_store4_op" 3 |
| 186 | + (and (eq_attr "tune" "fa526") |
| 187 | + (eq_attr "type" "store4")) |
| 188 | + "fa526_core*4") |
| 189 | + |
| 190 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 191 | +;; Branch and Call Instructions |
| 192 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 193 | + |
| 194 | +;; Branch instructions are difficult to model accurately. The FA526 |
| 195 | +;; core can predict most branches. If the branch is predicted |
| 196 | +;; correctly, and predicted early enough, the branch can be completely |
| 197 | +;; eliminated from the instruction stream. Some branches can |
| 198 | +;; therefore appear to require zero cycle to execute. We assume that |
| 199 | +;; all branches are predicted correctly, and that the latency is |
| 200 | +;; therefore the minimum value. |
| 201 | + |
| 202 | +(define_insn_reservation "526_branch_op" 0 |
| 203 | + (and (eq_attr "tune" "fa526") |
| 204 | + (eq_attr "type" "branch")) |
| 205 | + "fa526_core") |
| 206 | + |
| 207 | +;; The latency for a call is actually the latency when the result is available. |
| 208 | +;; i.e. R0 ready for int return value. For most cases, the return value is set |
| 209 | +;; by a mov instruction, which has 1 cycle latency. |
| 210 | +(define_insn_reservation "526_call_op" 1 |
| 211 | + (and (eq_attr "tune" "fa526") |
| 212 | + (eq_attr "type" "call")) |
| 213 | + "fa526_core") |
| 214 | + |
| 215 | --- a/gcc/config/arm/t-arm |
| 216 | +++ b/gcc/config/arm/t-arm |
| 217 | @@ -24,6 +24,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm-t |
| 218 | $(srcdir)/config/arm/arm1020e.md \ |
| 219 | $(srcdir)/config/arm/arm1026ejs.md \ |
| 220 | $(srcdir)/config/arm/arm1136jfs.md \ |
| 221 | + $(srcdir)/config/arm/fa526.md \ |
| 222 | $(srcdir)/config/arm/arm926ejs.md \ |
| 223 | $(srcdir)/config/arm/cirrus.md \ |
| 224 | $(srcdir)/config/arm/fpa.md \ |
| 225 | --- a/gcc/config/arm/t-arm-elf |
| 226 | +++ b/gcc/config/arm/t-arm-elf |
| 227 | @@ -36,6 +36,10 @@ MULTILIB_DIRNAMES = arm thumb |
| 228 | MULTILIB_EXCEPTIONS = |
| 229 | MULTILIB_MATCHES = |
| 230 | |
| 231 | +#MULTILIB_OPTIONS += mcpu=fa526 |
| 232 | +#MULTILIB_DIRNAMES += fa526 |
| 233 | +#MULTILIB_EXCEPTIONS += *mthumb*/*mcpu=fa526 |
| 234 | + |
| 235 | #MULTILIB_OPTIONS += march=armv7 |
| 236 | #MULTILIB_DIRNAMES += thumb2 |
| 237 | #MULTILIB_EXCEPTIONS += march=armv7* marm/*march=armv7* |
| 238 | @@ -52,6 +56,7 @@ MULTILIB_MATCHES = |
| 239 | MULTILIB_OPTIONS += mfloat-abi=hard |
| 240 | MULTILIB_DIRNAMES += fpu |
| 241 | MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard* |
| 242 | +MULTILIB_EXCEPTIONS += *mcpu=fa526/*mfloat-abi=hard* |
| 243 | |
| 244 | # MULTILIB_OPTIONS += mcpu=ep9312 |
| 245 | # MULTILIB_DIRNAMES += ep9312 |
| 246 | --- a/gcc/doc/invoke.texi |
| 247 | +++ b/gcc/doc/invoke.texi |
| 248 | @@ -9900,7 +9900,8 @@ assembly code. Permissible names are: @ |
| 249 | @samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m3}, |
| 250 | @samp{cortex-m1}, |
| 251 | @samp{cortex-m0}, |
| 252 | -@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. |
| 253 | +@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}, |
| 254 | +@samp{fa526}. |
| 255 | |
| 256 | @item -mtune=@var{name} |
| 257 | @opindex mtune |
| 258 | |