[Info-vax] x86-64 data aligment / faulting
Arne Vajhøj
arne at vajhoej.dk
Mon Mar 7 21:41:09 EST 2022
On 2/28/2022 1:55 PM, Simon Clubley wrote:
> On 2022-02-28, Simon Clubley <clubley at remove_me.eisner.decus.org-Earth.UFP> wrote:
>> On 2022-02-27, Arne Vajhøj <arne at vajhoej.dk> wrote:
>>>
>>> I modified the program to test with different data sizes, verified
>>> that the code was indeed working on an unaligned addresses and
>>> tried both sequential and random access to array.
>>>
>>> I simply can't get a big difference between aligned and unaligned access.
>>>
>>
>> Have you looked at the generated code to verify that unaligned access
>> is really occuring ?
>>
>> Another possibility, depending on how smart the compiler is, is that
>> it could always do aligned access lookups and then only extract the
>> data it needs out of each lookup.
>>
>
> Also, the optimisation settings and level might make a difference.
> Try varying them to see if the behaviour changes, especially when
> you switch between optimising for time versus space (and yes, include
> -O0 in the list of things you try. :-)).
No alignment effect in neither default, -O0 or -O3.
> It's for reasons like this that I suggested at the start of this
> to look at the generated code, just in case the compiler is doing
> something you are not expecting.
I am not good enough in x86-64 assembler to see if it does
anything unexpected.
SUBROUTINE TEST(IX,X,N,REP)
INTEGER*4 IX,N,REP
REAL*8 X(*)
INTEGER*4 I,T1,T2,DUMMY
CALL SYSTEM_CLOCK(T1,DUMMY,DUMMY)
DO 200 J=1,REP
DO 100 I=1,N
X(I)=I
100 CONTINUE
200 CONTINUE
CALL SYSTEM_CLOCK(T2,DUMMY,DUMMY)
WRITE(6,300) IX-1,T2-T1
RETURN
300 FORMAT(1X,'OFFSET ',I2,' : ',I6,' ms')
END
becomes:
.seh_proc test_
test_:
pushq %r13
.seh_pushreg %r13
pushq %r12
.seh_pushreg %r12
pushq %rbp
.seh_pushreg %rbp
pushq %rdi
.seh_pushreg %rdi
pushq %rsi
.seh_pushreg %rsi
pushq %rbx
.seh_pushreg %rbx
subq $600, %rsp
.seh_stackalloc 600
.seh_endprologue
movq %r9, %rbp
movq %rcx, %rbx
movq %rdx, %rsi
movq %r8, %r12
leaq 40(%rsp), %rdx
leaq 36(%rsp), %rcx
leaq 44(%rsp), %r8
call _gfortran_system_clock_4
movl 0(%rbp), %r11d
movl 36(%rsp), %edi
testl %r11d, %r11d
jle .L2
movl (%r12), %r8d
testl %r8d, %r8d
jle .L2
movl %r8d, %eax
movl %r8d, %ebp
addl $1, %r11d
movl $1, %ecx
shrl $2, %eax
andl $-4, %ebp
movdqa .LC1(%rip), %xmm3
leal -1(%r8), %r12d
subl $1, %eax
leal 1(%rbp), %r13d
salq $5, %rax
leaq 32(%rsi,%rax), %rdx
.p2align 4,,10
.p2align 3
.L6:
cmpl $2, %r12d
jbe .L7
movdqa .LC0(%rip), %xmm1
movq %rsi, %rax
.p2align 4,,10
.p2align 3
.L4:
movdqa %xmm1, %xmm0
addq $32, %rax
paddd %xmm3, %xmm1
cvtdq2pd %xmm0, %xmm2
pshufd $238, %xmm0, %xmm0
movups %xmm2, -32(%rax)
cvtdq2pd %xmm0, %xmm0
movups %xmm0, -16(%rax)
cmpq %rdx, %rax
jne .L4
movl %r13d, %eax
cmpl %ebp, %r8d
je .L5
.L3:
pxor %xmm0, %xmm0
movslq %eax, %r9
leal 1(%rax), %r10d
cvtsi2sdl %eax, %xmm0
leaq (%rsi,%r9,8), %r9
movsd %xmm0, -8(%r9)
cmpl %r10d, %r8d
jl .L5
pxor %xmm0, %xmm0
addl $2, %eax
cvtsi2sdl %r10d, %xmm0
movsd %xmm0, (%r9)
cmpl %eax, %r8d
jl .L5
pxor %xmm0, %xmm0
cvtsi2sdl %eax, %xmm0
movsd %xmm0, 8(%r9)
.L5:
addl $1, %ecx
cmpl %r11d, %ecx
jne .L6
.L2:
leaq 52(%rsp), %rdx
leaq 56(%rsp), %r8
leaq 48(%rsp), %rcx
leaq 64(%rsp), %r12
call _gfortran_system_clock_4
leaq .LC2(%rip), %rax
movq %r12, %rcx
movl 48(%rsp), %esi
movq %rax, 72(%rsp)
leaq .LC3(%rip), %rax
leaq 60(%rsp), %r13
movq %rax, 144(%rsp)
movq .LC4(%rip), %rax
subl %edi, %esi
movl $12, 80(%rsp)
movq %rax, 64(%rsp)
movq $32, 152(%rsp)
call _gfortran_st_write
movl (%rbx), %eax
movq %r13, %rdx
movq %r12, %rcx
movl $4, %r8d
subl $1, %eax
movl %eax, 60(%rsp)
call _gfortran_transfer_integer_write
movq %r13, %rdx
movq %r12, %rcx
movl %esi, 60(%rsp)
movl $4, %r8d
call _gfortran_transfer_integer_write
movq %r12, %rcx
call _gfortran_st_write_done
nop
addq $600, %rsp
popq %rbx
popq %rsi
popq %rdi
popq %rbp
popq %r12
popq %r13
ret
.L7:
movl $1, %eax
jmp .L3
.seh_endproc
Arne
More information about the Info-vax
mailing list