Root/arch/ia64/kernel/fsys.S

1/*
2 * This file contains the light-weight system call handlers (fsyscall-handlers).
3 *
4 * Copyright (C) 2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * 25-Sep-03 davidm Implement fsys_rt_sigprocmask().
8 * 18-Feb-03 louisk Implement fsys_gettimeofday().
9 * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
10 * probably broke it along the way... ;-)
11 * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
12 * it capable of using memory based clocks without falling back to C code.
13 * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
14 *
15 */
16
17#include <asm/asmmacro.h>
18#include <asm/errno.h>
19#include <asm/asm-offsets.h>
20#include <asm/percpu.h>
21#include <asm/thread_info.h>
22#include <asm/sal.h>
23#include <asm/signal.h>
24#include <asm/system.h>
25#include <asm/unistd.h>
26
27#include "entry.h"
28#include "paravirt_inst.h"
29
30/*
31 * See Documentation/ia64/fsys.txt for details on fsyscalls.
32 *
33 * On entry to an fsyscall handler:
34 * r10 = 0 (i.e., defaults to "successful syscall return")
35 * r11 = saved ar.pfs (a user-level value)
36 * r15 = system call number
37 * r16 = "current" task pointer (in normal kernel-mode, this is in r13)
38 * r32-r39 = system call arguments
39 * b6 = return address (a user-level value)
40 * ar.pfs = previous frame-state (a user-level value)
41 * PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
42 * all other registers may contain values passed in from user-mode
43 *
44 * On return from an fsyscall handler:
45 * r11 = saved ar.pfs (as passed into the fsyscall handler)
46 * r15 = system call number (as passed into the fsyscall handler)
47 * r32-r39 = system call arguments (as passed into the fsyscall handler)
48 * b6 = return address (as passed into the fsyscall handler)
49 * ar.pfs = previous frame-state (as passed into the fsyscall handler)
50 */
51
52ENTRY(fsys_ni_syscall)
53    .prologue
54    .altrp b6
55    .body
56    mov r8=ENOSYS
57    mov r10=-1
58    FSYS_RETURN
59END(fsys_ni_syscall)
60
61ENTRY(fsys_getpid)
62    .prologue
63    .altrp b6
64    .body
65    add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
66    ;;
67    ld8 r17=[r17] // r17 = current->group_leader
68    add r9=TI_FLAGS+IA64_TASK_SIZE,r16
69    ;;
70    ld4 r9=[r9]
71    add r17=IA64_TASK_TGIDLINK_OFFSET,r17
72    ;;
73    and r9=TIF_ALLWORK_MASK,r9
74    ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
75    ;;
76    add r8=IA64_PID_LEVEL_OFFSET,r17
77    ;;
78    ld4 r8=[r8] // r8 = pid->level
79    add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
80    ;;
81    shl r8=r8,IA64_UPID_SHIFT
82    ;;
83    add r17=r17,r8 // r17 = &pid->numbers[pid->level]
84    ;;
85    ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
86    ;;
87    mov r17=0
88    ;;
89    cmp.ne p8,p0=0,r9
90(p8) br.spnt.many fsys_fallback_syscall
91    FSYS_RETURN
92END(fsys_getpid)
93
94ENTRY(fsys_getppid)
95    .prologue
96    .altrp b6
97    .body
98    add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
99    ;;
100    ld8 r17=[r17] // r17 = current->group_leader
101    add r9=TI_FLAGS+IA64_TASK_SIZE,r16
102    ;;
103
104    ld4 r9=[r9]
105    add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
106    ;;
107    and r9=TIF_ALLWORK_MASK,r9
108
1091: ld8 r18=[r17] // r18 = current->group_leader->real_parent
110    ;;
111    cmp.ne p8,p0=0,r9
112    add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = &current->group_leader->real_parent->tgid
113    ;;
114
115    /*
116     * The .acq is needed to ensure that the read of tgid has returned its data before
117     * we re-check "real_parent".
118     */
119    ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid
120#ifdef CONFIG_SMP
121    /*
122     * Re-read current->group_leader->real_parent.
123     */
124    ld8 r19=[r17] // r19 = current->group_leader->real_parent
125(p8) br.spnt.many fsys_fallback_syscall
126    ;;
127    cmp.ne p6,p0=r18,r19 // did real_parent change?
128    mov r19=0 // i must not leak kernel bits...
129(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check
130    ;;
131    mov r17=0 // i must not leak kernel bits...
132    mov r18=0 // i must not leak kernel bits...
133#else
134    mov r17=0 // i must not leak kernel bits...
135    mov r18=0 // i must not leak kernel bits...
136    mov r19=0 // i must not leak kernel bits...
137#endif
138    FSYS_RETURN
139END(fsys_getppid)
140
141ENTRY(fsys_set_tid_address)
142    .prologue
143    .altrp b6
144    .body
145    add r9=TI_FLAGS+IA64_TASK_SIZE,r16
146    add r17=IA64_TASK_TGIDLINK_OFFSET,r16
147    ;;
148    ld4 r9=[r9]
149    tnat.z p6,p7=r32 // check argument register for being NaT
150    ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
151    ;;
152    and r9=TIF_ALLWORK_MASK,r9
153    add r8=IA64_PID_LEVEL_OFFSET,r17
154    add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
155    ;;
156    ld4 r8=[r8] // r8 = pid->level
157    add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
158    ;;
159    shl r8=r8,IA64_UPID_SHIFT
160    ;;
161    add r17=r17,r8 // r17 = &pid->numbers[pid->level]
162    ;;
163    ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
164    ;;
165    cmp.ne p8,p0=0,r9
166    mov r17=-1
167    ;;
168(p6) st8 [r18]=r32
169(p7) st8 [r18]=r17
170(p8) br.spnt.many fsys_fallback_syscall
171    ;;
172    mov r17=0 // i must not leak kernel bits...
173    mov r18=0 // i must not leak kernel bits...
174    FSYS_RETURN
175END(fsys_set_tid_address)
176
177#if IA64_GTOD_LOCK_OFFSET !=0
178#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
179#endif
180#if IA64_ITC_JITTER_OFFSET !=0
181#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
182#endif
183#define CLOCK_REALTIME 0
184#define CLOCK_MONOTONIC 1
185#define CLOCK_DIVIDE_BY_1000 0x4000
186#define CLOCK_ADD_MONOTONIC 0x8000
187
188ENTRY(fsys_gettimeofday)
189    .prologue
190    .altrp b6
191    .body
192    mov r31 = r32
193    tnat.nz p6,p0 = r33 // guard against NaT argument
194(p6) br.cond.spnt.few .fail_einval
195    mov r30 = CLOCK_DIVIDE_BY_1000
196    ;;
197.gettime:
198    // Register map
199    // Incoming r31 = pointer to address where to place result
200    // r30 = flags determining how time is processed
201    // r2,r3 = temp r4-r7 preserved
202    // r8 = result nanoseconds
203    // r9 = result seconds
204    // r10 = temporary storage for clock difference
205    // r11 = preserved: saved ar.pfs
206    // r12 = preserved: memory stack
207    // r13 = preserved: thread pointer
208    // r14 = address of mask / mask value
209    // r15 = preserved: system call number
210    // r16 = preserved: current task pointer
211    // r17 = (not used)
212    // r18 = (not used)
213    // r19 = address of itc_lastcycle
214    // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
215    // r21 = address of mmio_ptr
216    // r22 = address of wall_time or monotonic_time
217    // r23 = address of shift / value
218    // r24 = address mult factor / cycle_last value
219    // r25 = itc_lastcycle value
220    // r26 = address clocksource cycle_last
221    // r27 = (not used)
222    // r28 = sequence number at the beginning of critcal section
223    // r29 = address of itc_jitter
224    // r30 = time processing flags / memory address
225    // r31 = pointer to result
226    // Predicates
227    // p6,p7 short term use
228    // p8 = timesource ar.itc
229    // p9 = timesource mmio64
230    // p10 = timesource mmio32 - not used
231    // p11 = timesource not to be handled by asm code
232    // p12 = memory time source ( = p9 | p10) - not used
233    // p13 = do cmpxchg with itc_lastcycle
234    // p14 = Divide by 1000
235    // p15 = Add monotonic
236    //
237    // Note that instructions are optimized for McKinley. McKinley can
238    // process two bundles simultaneously and therefore we continuously
239    // try to feed the CPU two bundles and then a stop.
240
241    add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
242    tnat.nz p6,p0 = r31 // guard against Nat argument
243(p6) br.cond.spnt.few .fail_einval
244    movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
245    ;;
246    ld4 r2 = [r2] // process work pending flags
247    movl r29 = itc_jitter_data // itc_jitter
248    add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
249    add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
250    mov pr = r30,0xc000 // Set predicates according to function
251    ;;
252    and r2 = TIF_ALLWORK_MASK,r2
253    add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
254(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
255    ;;
256    add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
257    cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
258(p6) br.cond.spnt.many fsys_fallback_syscall
259    ;;
260    // Begin critical section
261.time_redo:
262    ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first
263    ;;
264    and r28 = ~1,r28 // And make sequence even to force retry if odd
265    ;;
266    ld8 r30 = [r21] // clocksource->mmio_ptr
267    add r24 = IA64_CLKSRC_MULT_OFFSET,r20
268    ld4 r2 = [r29] // itc_jitter value
269    add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
270    add r14 = IA64_CLKSRC_MASK_OFFSET,r20
271    ;;
272    ld4 r3 = [r24] // clocksource mult value
273    ld8 r14 = [r14] // clocksource mask value
274    cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr
275    ;;
276    setf.sig f7 = r3 // Setup for mult scaling of counter
277(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13
278    ld4 r23 = [r23] // clocksource shift value
279    ld8 r24 = [r26] // get clksrc_cycle_last value
280(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control
281    ;;
282    .pred.rel.mutex p8,p9
283    MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!!
284(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
285(p13) ld8 r25 = [r19] // get itc_lastcycle value
286    ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
287    ;;
288    ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
289(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
290    ;;
291(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
292    sub r10 = r2,r24 // current_cycle - last_cycle
293    ;;
294(p6) sub r10 = r25,r24 // time we got was less than last_cycle
295(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
296    ;;
297(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv
298    ;;
299(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful
300    ;;
301(p7) sub r10 = r3,r24 // then use new last_cycle instead
302    ;;
303    and r10 = r10,r14 // Apply mask
304    ;;
305    setf.sig f8 = r10
306    nop.i 123
307    ;;
308    // fault check takes 5 cycles and we have spare time
309EX(.fail_efault, probe.w.fault r31, 3)
310    xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
311    ;;
312    getf.sig r2 = f8
313    mf
314    ;;
315    ld4 r10 = [r20] // gtod_lock.sequence
316    shr.u r2 = r2,r23 // shift by factor
317    ;;
318    add r8 = r8,r2 // Add xtime.nsecs
319    cmp4.ne p7,p0 = r28,r10
320(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
321    // End critical section.
322    // Now r8=tv->tv_nsec and r9=tv->tv_sec
323    mov r10 = r0
324    movl r2 = 1000000000
325    add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
326(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
327    ;;
328.time_normalize:
329    mov r21 = r8
330    cmp.ge p6,p0 = r8,r2
331(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
332    ;;
333(p14) setf.sig f8 = r20
334(p6) sub r8 = r8,r2
335(p6) add r9 = 1,r9 // two nops before the branch.
336(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
337(p6) br.cond.dpnt.few .time_normalize
338    ;;
339    // Divided by 8 though shift. Now divide by 125
340    // The compiler was able to do that with a multiply
341    // and a shift and we do the same
342EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
343(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
344    ;;
345(p14) getf.sig r2 = f8
346    ;;
347    mov r8 = r0
348(p14) shr.u r21 = r2, 4
349    ;;
350EX(.fail_efault, st8 [r31] = r9)
351EX(.fail_efault, st8 [r23] = r21)
352    FSYS_RETURN
353.fail_einval:
354    mov r8 = EINVAL
355    mov r10 = -1
356    FSYS_RETURN
357.fail_efault:
358    mov r8 = EFAULT
359    mov r10 = -1
360    FSYS_RETURN
361END(fsys_gettimeofday)
362
363ENTRY(fsys_clock_gettime)
364    .prologue
365    .altrp b6
366    .body
367    cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
368    // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
369(p6) br.spnt.few fsys_fallback_syscall
370    mov r31 = r33
371    shl r30 = r32,15
372    br.many .gettime
373END(fsys_clock_gettime)
374
375/*
376 * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
377 */
378#if _NSIG_WORDS != 1
379# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
380#endif
381ENTRY(fsys_rt_sigprocmask)
382    .prologue
383    .altrp b6
384    .body
385
386    add r2=IA64_TASK_BLOCKED_OFFSET,r16
387    add r9=TI_FLAGS+IA64_TASK_SIZE,r16
388    cmp4.ltu p6,p0=SIG_SETMASK,r32
389
390    cmp.ne p15,p0=r0,r34 // oset != NULL?
391    tnat.nz p8,p0=r34
392    add r31=IA64_TASK_SIGHAND_OFFSET,r16
393    ;;
394    ld8 r3=[r2] // read/prefetch current->blocked
395    ld4 r9=[r9]
396    tnat.nz.or p6,p0=r35
397
398    cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
399    tnat.nz.or p6,p0=r32
400(p6) br.spnt.few .fail_einval // fail with EINVAL
401    ;;
402#ifdef CONFIG_SMP
403    ld8 r31=[r31] // r31 <- current->sighand
404#endif
405    and r9=TIF_ALLWORK_MASK,r9
406    tnat.nz.or p8,p0=r33
407    ;;
408    cmp.ne p7,p0=0,r9
409    cmp.eq p6,p0=r0,r33 // set == NULL?
410    add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
411(p8) br.spnt.few .fail_efault // fail with EFAULT
412(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work...
413(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask
414
415    /* Argh, we actually have to do some work and _update_ the signal mask: */
416
417EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set
418EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
419    mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
420    ;;
421
422    RSM_PSR_I(p0, r18, r19) // mask interrupt delivery
423    andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
424    mov r8=EINVAL // default to EINVAL
425
426#ifdef CONFIG_SMP
427    // __ticket_spin_trylock(r31)
428    ld4 r17=[r31]
429    ;;
430    mov.m ar.ccv=r17
431    extr.u r9=r17,17,15
432    adds r19=1,r17
433    extr.u r18=r17,0,15
434    ;;
435    cmp.eq p6,p7=r9,r18
436    ;;
437(p6) cmpxchg4.acq r9=[r31],r19,ar.ccv
438(p6) dep.z r20=r19,1,15 // next serving ticket for unlock
439(p7) br.cond.spnt.many .lock_contention
440    ;;
441    cmp4.eq p0,p7=r9,r17
442    adds r31=2,r31
443(p7) br.cond.spnt.many .lock_contention
444    ld8 r3=[r2] // re-read current->blocked now that we hold the lock
445    ;;
446#else
447    ld8 r3=[r2] // re-read current->blocked now that we hold the lock
448#endif
449    add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
450    add r19=IA64_TASK_SIGNAL_OFFSET,r16
451    cmp4.eq p6,p0=SIG_BLOCK,r32
452    ;;
453    ld8 r19=[r19] // r19 <- current->signal
454    cmp4.eq p7,p0=SIG_UNBLOCK,r32
455    cmp4.eq p8,p0=SIG_SETMASK,r32
456    ;;
457    ld8 r18=[r18] // r18 <- current->pending.signal
458    .pred.rel.mutex p6,p7,p8
459(p6) or r14=r3,r14 // SIG_BLOCK
460(p7) andcm r14=r3,r14 // SIG_UNBLOCK
461
462(p8) mov r14=r14 // SIG_SETMASK
463(p6) mov r8=0 // clear error code
464    // recalc_sigpending()
465    add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
466
467    add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
468    ;;
469    ld4 r17=[r17] // r17 <- current->signal->group_stop_count
470(p7) mov r8=0 // clear error code
471
472    ld8 r19=[r19] // r19 <- current->signal->shared_pending
473    ;;
474    cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)?
475(p8) mov r8=0 // clear error code
476
477    or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending
478    ;;
479    // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
480    andcm r18=r18,r14
481    add r9=TI_FLAGS+IA64_TASK_SIZE,r16
482    ;;
483
484(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending
485    mov r19=0 // i must not leak kernel bits...
486(p6) br.cond.dpnt.many .sig_pending
487    ;;
488
4891: ld4 r17=[r9] // r17 <- current->thread_info->flags
490    ;;
491    mov ar.ccv=r17
492    and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING)
493    ;;
494
495    st8 [r2]=r14 // update current->blocked with new mask
496    cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
497    ;;
498    cmp.ne p6,p0=r17,r8 // update failed?
499(p6) br.cond.spnt.few 1b // yes -> retry
500
501#ifdef CONFIG_SMP
502    // __ticket_spin_unlock(r31)
503    st2.rel [r31]=r20
504    mov r20=0 // i must not leak kernel bits...
505#endif
506    SSM_PSR_I(p0, p9, r31)
507    ;;
508
509    srlz.d // ensure psr.i is set again
510    mov r18=0 // i must not leak kernel bits...
511
512.store_mask:
513EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset
514EX(.fail_efault, (p15) st8 [r34]=r3)
515    mov r2=0 // i must not leak kernel bits...
516    mov r3=0 // i must not leak kernel bits...
517    mov r8=0 // return 0
518    mov r9=0 // i must not leak kernel bits...
519    mov r14=0 // i must not leak kernel bits...
520    mov r17=0 // i must not leak kernel bits...
521    mov r31=0 // i must not leak kernel bits...
522    FSYS_RETURN
523
524.sig_pending:
525#ifdef CONFIG_SMP
526    // __ticket_spin_unlock(r31)
527    st2.rel [r31]=r20 // release the lock
528#endif
529    SSM_PSR_I(p0, p9, r17)
530    ;;
531    srlz.d
532    br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
533
534#ifdef CONFIG_SMP
535.lock_contention:
536    /* Rather than spinning here, fall back on doing a heavy-weight syscall. */
537    SSM_PSR_I(p0, p9, r17)
538    ;;
539    srlz.d
540    br.sptk.many fsys_fallback_syscall
541#endif
542END(fsys_rt_sigprocmask)
543
544/*
545 * fsys_getcpu doesn't use the third parameter in this implementation. It reads
546 * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
547 */
548ENTRY(fsys_getcpu)
549    .prologue
550    .altrp b6
551    .body
552    ;;
553    add r2=TI_FLAGS+IA64_TASK_SIZE,r16
554    tnat.nz p6,p0 = r32 // guard against NaT argument
555    add r3=TI_CPU+IA64_TASK_SIZE,r16
556    ;;
557    ld4 r3=[r3] // M r3 = thread_info->cpu
558    ld4 r2=[r2] // M r2 = thread_info->flags
559(p6) br.cond.spnt.few .fail_einval // B
560    ;;
561    tnat.nz p7,p0 = r33 // I guard against NaT argument
562(p7) br.cond.spnt.few .fail_einval // B
563#ifdef CONFIG_NUMA
564    movl r17=cpu_to_node_map
565    ;;
566EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
567EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
568    shladd r18=r3,1,r17
569    ;;
570    ld2 r20=[r18] // r20 = cpu_to_node_map[cpu]
571    and r2 = TIF_ALLWORK_MASK,r2
572    ;;
573    cmp.ne p8,p0=0,r2
574(p8) br.spnt.many fsys_fallback_syscall
575    ;;
576    ;;
577EX(.fail_efault, st4 [r32] = r3)
578EX(.fail_efault, st2 [r33] = r20)
579    mov r8=0
580    ;;
581#else
582EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
583EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
584    and r2 = TIF_ALLWORK_MASK,r2
585    ;;
586    cmp.ne p8,p0=0,r2
587(p8) br.spnt.many fsys_fallback_syscall
588    ;;
589EX(.fail_efault, st4 [r32] = r3)
590EX(.fail_efault, st2 [r33] = r0)
591    mov r8=0
592    ;;
593#endif
594    FSYS_RETURN
595END(fsys_getcpu)
596
597ENTRY(fsys_fallback_syscall)
598    .prologue
599    .altrp b6
600    .body
601    /*
602     * We only get here from light-weight syscall handlers. Thus, we already
603     * know that r15 contains a valid syscall number. No need to re-check.
604     */
605    adds r17=-1024,r15
606    movl r14=sys_call_table
607    ;;
608    RSM_PSR_I(p0, r26, r27)
609    shladd r18=r17,3,r14
610    ;;
611    ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
612    MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency)
613    mov r27=ar.rsc
614    mov r21=ar.fpsr
615    mov r26=ar.pfs
616END(fsys_fallback_syscall)
617    /* FALL THROUGH */
618GLOBAL_ENTRY(paravirt_fsys_bubble_down)
619    .prologue
620    .altrp b6
621    .body
622    /*
623     * We get here for syscalls that don't have a lightweight
624     * handler. For those, we need to bubble down into the kernel
625     * and that requires setting up a minimal pt_regs structure,
626     * and initializing the CPU state more or less as if an
627     * interruption had occurred. To make syscall-restarts work,
628     * we setup pt_regs such that cr_iip points to the second
629     * instruction in syscall_via_break. Decrementing the IP
630     * hence will restart the syscall via break and not
631     * decrementing IP will return us to the caller, as usual.
632     * Note that we preserve the value of psr.pp rather than
633     * initializing it from dcr.pp. This makes it possible to
634     * distinguish fsyscall execution from other privileged
635     * execution.
636     *
637     * On entry:
638     * - normal fsyscall handler register usage, except
639     * that we also have:
640     * - r18: address of syscall entry point
641     * - r21: ar.fpsr
642     * - r26: ar.pfs
643     * - r27: ar.rsc
644     * - r29: psr
645     *
646     * We used to clear some PSR bits here but that requires slow
647     * serialization. Fortuntely, that isn't really necessary.
648     * The rationale is as follows: we used to clear bits
649     * ~PSR_PRESERVED_BITS in PSR.L. Since
650     * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
651     * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
652     * However,
653     *
654     * PSR.BE : already is turned off in __kernel_syscall_via_epc()
655     * PSR.AC : don't care (kernel normally turns PSR.AC on)
656     * PSR.I : already turned off by the time paravirt_fsys_bubble_down gets
657     * invoked
658     * PSR.DFL: always 0 (kernel never turns it on)
659     * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
660     * initiative
661     * PSR.DI : always 0 (kernel never turns it on)
662     * PSR.SI : always 0 (kernel never turns it on)
663     * PSR.DB : don't care --- kernel never enables kernel-level
664     * breakpoints
665     * PSR.TB : must be 0 already; if it wasn't zero on entry to
666     * __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
667     * will trigger a taken branch; the taken-trap-handler then
668     * converts the syscall into a break-based system-call.
669     */
670    /*
671     * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
672     * The rest we have to synthesize.
673     */
674# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \
675                     | (0x1 << IA64_PSR_RI_BIT) \
676                     | IA64_PSR_BN | IA64_PSR_I)
677
678    invala // M0|1
679    movl r14=ia64_ret_from_syscall // X
680
681    nop.m 0
682    movl r28=__kernel_syscall_via_break // X create cr.iip
683    ;;
684
685    mov r2=r16 // A get task addr to addl-addressable register
686    adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
687    mov r31=pr // I0 save pr (2 cyc)
688    ;;
689    st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
690    addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS
691    add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A
692    ;;
693    ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags
694    lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store
695    nop.i 0
696    ;;
697    mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
698#ifdef CONFIG_VIRT_CPU_ACCOUNTING
699    MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting
700#else
701    nop.m 0
702#endif
703    nop.i 0
704    ;;
705    mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
706    mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!)
707    nop.i 0
708    ;;
709    mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS
710    movl r8=PSR_ONE_BITS // X
711    ;;
712    mov r25=ar.unat // M2 (5 cyc) save ar.unat
713    mov r19=b6 // I0 save b6 (2 cyc)
714    mov r20=r1 // A save caller's gp in r20
715    ;;
716    or r29=r8,r29 // A construct cr.ipsr value to save
717    mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc)
718    addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
719
720    mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc)
721    cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
722    br.call.sptk.many b7=ia64_syscall_setup // B
723    ;;
724#ifdef CONFIG_VIRT_CPU_ACCOUNTING
725    // mov.m r30=ar.itc is called in advance
726    add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
727    add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
728    ;;
729    ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
730    ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
731    ;;
732    ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
733    ld8 r21=[r17] // cumulated utime
734    sub r22=r19,r18 // stime before leave kernel
735    ;;
736    st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
737    sub r18=r30,r19 // elapsed time in user mode
738    ;;
739    add r20=r20,r22 // sum stime
740    add r21=r21,r18 // sum utime
741    ;;
742    st8 [r16]=r20 // update stime
743    st8 [r17]=r21 // update utime
744    ;;
745#endif
746    mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
747    mov rp=r14 // I0 set the real return addr
748    and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
749    ;;
750    SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs
751    cmp.eq p8,p0=r3,r0 // A
752(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
753
754    nop.m 0
755(p8) br.call.sptk.many b6=b6 // B (ignore return address)
756    br.cond.spnt ia64_trace_syscall // B
757END(paravirt_fsys_bubble_down)
758
759    .rodata
760    .align 8
761    .globl paravirt_fsyscall_table
762
763    data8 paravirt_fsys_bubble_down
764paravirt_fsyscall_table:
765    data8 fsys_ni_syscall
766    data8 0 // exit // 1025
767    data8 0 // read
768    data8 0 // write
769    data8 0 // open
770    data8 0 // close
771    data8 0 // creat // 1030
772    data8 0 // link
773    data8 0 // unlink
774    data8 0 // execve
775    data8 0 // chdir
776    data8 0 // fchdir // 1035
777    data8 0 // utimes
778    data8 0 // mknod
779    data8 0 // chmod
780    data8 0 // chown
781    data8 0 // lseek // 1040
782    data8 fsys_getpid // getpid
783    data8 fsys_getppid // getppid
784    data8 0 // mount
785    data8 0 // umount
786    data8 0 // setuid // 1045
787    data8 0 // getuid
788    data8 0 // geteuid
789    data8 0 // ptrace
790    data8 0 // access
791    data8 0 // sync // 1050
792    data8 0 // fsync
793    data8 0 // fdatasync
794    data8 0 // kill
795    data8 0 // rename
796    data8 0 // mkdir // 1055
797    data8 0 // rmdir
798    data8 0 // dup
799    data8 0 // pipe
800    data8 0 // times
801    data8 0 // brk // 1060
802    data8 0 // setgid
803    data8 0 // getgid
804    data8 0 // getegid
805    data8 0 // acct
806    data8 0 // ioctl // 1065
807    data8 0 // fcntl
808    data8 0 // umask
809    data8 0 // chroot
810    data8 0 // ustat
811    data8 0 // dup2 // 1070
812    data8 0 // setreuid
813    data8 0 // setregid
814    data8 0 // getresuid
815    data8 0 // setresuid
816    data8 0 // getresgid // 1075
817    data8 0 // setresgid
818    data8 0 // getgroups
819    data8 0 // setgroups
820    data8 0 // getpgid
821    data8 0 // setpgid // 1080
822    data8 0 // setsid
823    data8 0 // getsid
824    data8 0 // sethostname
825    data8 0 // setrlimit
826    data8 0 // getrlimit // 1085
827    data8 0 // getrusage
828    data8 fsys_gettimeofday // gettimeofday
829    data8 0 // settimeofday
830    data8 0 // select
831    data8 0 // poll // 1090
832    data8 0 // symlink
833    data8 0 // readlink
834    data8 0 // uselib
835    data8 0 // swapon
836    data8 0 // swapoff // 1095
837    data8 0 // reboot
838    data8 0 // truncate
839    data8 0 // ftruncate
840    data8 0 // fchmod
841    data8 0 // fchown // 1100
842    data8 0 // getpriority
843    data8 0 // setpriority
844    data8 0 // statfs
845    data8 0 // fstatfs
846    data8 0 // gettid // 1105
847    data8 0 // semget
848    data8 0 // semop
849    data8 0 // semctl
850    data8 0 // msgget
851    data8 0 // msgsnd // 1110
852    data8 0 // msgrcv
853    data8 0 // msgctl
854    data8 0 // shmget
855    data8 0 // shmat
856    data8 0 // shmdt // 1115
857    data8 0 // shmctl
858    data8 0 // syslog
859    data8 0 // setitimer
860    data8 0 // getitimer
861    data8 0 // 1120
862    data8 0
863    data8 0
864    data8 0 // vhangup
865    data8 0 // lchown
866    data8 0 // remap_file_pages // 1125
867    data8 0 // wait4
868    data8 0 // sysinfo
869    data8 0 // clone
870    data8 0 // setdomainname
871    data8 0 // newuname // 1130
872    data8 0 // adjtimex
873    data8 0
874    data8 0 // init_module
875    data8 0 // delete_module
876    data8 0 // 1135
877    data8 0
878    data8 0 // quotactl
879    data8 0 // bdflush
880    data8 0 // sysfs
881    data8 0 // personality // 1140
882    data8 0 // afs_syscall
883    data8 0 // setfsuid
884    data8 0 // setfsgid
885    data8 0 // getdents
886    data8 0 // flock // 1145
887    data8 0 // readv
888    data8 0 // writev
889    data8 0 // pread64
890    data8 0 // pwrite64
891    data8 0 // sysctl // 1150
892    data8 0 // mmap
893    data8 0 // munmap
894    data8 0 // mlock
895    data8 0 // mlockall
896    data8 0 // mprotect // 1155
897    data8 0 // mremap
898    data8 0 // msync
899    data8 0 // munlock
900    data8 0 // munlockall
901    data8 0 // sched_getparam // 1160
902    data8 0 // sched_setparam
903    data8 0 // sched_getscheduler
904    data8 0 // sched_setscheduler
905    data8 0 // sched_yield
906    data8 0 // sched_get_priority_max // 1165
907    data8 0 // sched_get_priority_min
908    data8 0 // sched_rr_get_interval
909    data8 0 // nanosleep
910    data8 0 // nfsservctl
911    data8 0 // prctl // 1170
912    data8 0 // getpagesize
913    data8 0 // mmap2
914    data8 0 // pciconfig_read
915    data8 0 // pciconfig_write
916    data8 0 // perfmonctl // 1175
917    data8 0 // sigaltstack
918    data8 0 // rt_sigaction
919    data8 0 // rt_sigpending
920    data8 fsys_rt_sigprocmask // rt_sigprocmask
921    data8 0 // rt_sigqueueinfo // 1180
922    data8 0 // rt_sigreturn
923    data8 0 // rt_sigsuspend
924    data8 0 // rt_sigtimedwait
925    data8 0 // getcwd
926    data8 0 // capget // 1185
927    data8 0 // capset
928    data8 0 // sendfile
929    data8 0
930    data8 0
931    data8 0 // socket // 1190
932    data8 0 // bind
933    data8 0 // connect
934    data8 0 // listen
935    data8 0 // accept
936    data8 0 // getsockname // 1195
937    data8 0 // getpeername
938    data8 0 // socketpair
939    data8 0 // send
940    data8 0 // sendto
941    data8 0 // recv // 1200
942    data8 0 // recvfrom
943    data8 0 // shutdown
944    data8 0 // setsockopt
945    data8 0 // getsockopt
946    data8 0 // sendmsg // 1205
947    data8 0 // recvmsg
948    data8 0 // pivot_root
949    data8 0 // mincore
950    data8 0 // madvise
951    data8 0 // newstat // 1210
952    data8 0 // newlstat
953    data8 0 // newfstat
954    data8 0 // clone2
955    data8 0 // getdents64
956    data8 0 // getunwind // 1215
957    data8 0 // readahead
958    data8 0 // setxattr
959    data8 0 // lsetxattr
960    data8 0 // fsetxattr
961    data8 0 // getxattr // 1220
962    data8 0 // lgetxattr
963    data8 0 // fgetxattr
964    data8 0 // listxattr
965    data8 0 // llistxattr
966    data8 0 // flistxattr // 1225
967    data8 0 // removexattr
968    data8 0 // lremovexattr
969    data8 0 // fremovexattr
970    data8 0 // tkill
971    data8 0 // futex // 1230
972    data8 0 // sched_setaffinity
973    data8 0 // sched_getaffinity
974    data8 fsys_set_tid_address // set_tid_address
975    data8 0 // fadvise64_64
976    data8 0 // tgkill // 1235
977    data8 0 // exit_group
978    data8 0 // lookup_dcookie
979    data8 0 // io_setup
980    data8 0 // io_destroy
981    data8 0 // io_getevents // 1240
982    data8 0 // io_submit
983    data8 0 // io_cancel
984    data8 0 // epoll_create
985    data8 0 // epoll_ctl
986    data8 0 // epoll_wait // 1245
987    data8 0 // restart_syscall
988    data8 0 // semtimedop
989    data8 0 // timer_create
990    data8 0 // timer_settime
991    data8 0 // timer_gettime // 1250
992    data8 0 // timer_getoverrun
993    data8 0 // timer_delete
994    data8 0 // clock_settime
995    data8 fsys_clock_gettime // clock_gettime
996    data8 0 // clock_getres // 1255
997    data8 0 // clock_nanosleep
998    data8 0 // fstatfs64
999    data8 0 // statfs64
1000    data8 0 // mbind
1001    data8 0 // get_mempolicy // 1260
1002    data8 0 // set_mempolicy
1003    data8 0 // mq_open
1004    data8 0 // mq_unlink
1005    data8 0 // mq_timedsend
1006    data8 0 // mq_timedreceive // 1265
1007    data8 0 // mq_notify
1008    data8 0 // mq_getsetattr
1009    data8 0 // kexec_load
1010    data8 0 // vserver
1011    data8 0 // waitid // 1270
1012    data8 0 // add_key
1013    data8 0 // request_key
1014    data8 0 // keyctl
1015    data8 0 // ioprio_set
1016    data8 0 // ioprio_get // 1275
1017    data8 0 // move_pages
1018    data8 0 // inotify_init
1019    data8 0 // inotify_add_watch
1020    data8 0 // inotify_rm_watch
1021    data8 0 // migrate_pages // 1280
1022    data8 0 // openat
1023    data8 0 // mkdirat
1024    data8 0 // mknodat
1025    data8 0 // fchownat
1026    data8 0 // futimesat // 1285
1027    data8 0 // newfstatat
1028    data8 0 // unlinkat
1029    data8 0 // renameat
1030    data8 0 // linkat
1031    data8 0 // symlinkat // 1290
1032    data8 0 // readlinkat
1033    data8 0 // fchmodat
1034    data8 0 // faccessat
1035    data8 0
1036    data8 0 // 1295
1037    data8 0 // unshare
1038    data8 0 // splice
1039    data8 0 // set_robust_list
1040    data8 0 // get_robust_list
1041    data8 0 // sync_file_range // 1300
1042    data8 0 // tee
1043    data8 0 // vmsplice
1044    data8 0
1045    data8 fsys_getcpu // getcpu // 1304
1046
1047    // fill in zeros for the remaining entries
1048    .zero:
1049    .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
1050

Archive Download this file



interactive