...
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17# this converts rotate instructions from "ro[lr] <reg>" -> "ro[lr] <reg>, 1" for yasm compatibility
18PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/'
19
20C2GOASM=c2goasm
21CC=clang-11
22C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \
23 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib
24ASM_FLAGS_AVX2=-mavx2 -mfma
25ASM_FLAGS_SSE4=-msse4
26ASM_FLAGS_BMI2=-mbmi2
27ASM_FLAGS_POPCNT=-mpopcnt
28
29C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \
30 -fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib
31
32GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go')
33ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go')
34
35.PHONY: assembly
36
37INTEL_SOURCES := \
38 bit_packing_avx2_amd64.s \
39 unpack_bool_avx2_amd64.s unpack_bool_sse4_amd64.s
40
41ARM_SOURCES := \
42 bit_packing_neon_arm64.s unpack_bool_neon_arm64.s
43
44#
45# ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support.
46# min_max_neon_arm64.s was generated by asm2plan9s.
47# And manually formatted it as the Arm64 Plan9.
48#
49
50assembly: $(INTEL_SOURCES)
51
52_lib/bit_packing_avx2.s: _lib/bit_packing_avx2.c
53 $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@; perl -i -pe 's/mem(cpy|set)/clib·_mem\1(SB)/' $@
54
55_lib/unpack_bool_avx2.s: _lib/unpack_bool.c
56 $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
57
58_lib/unpack_bool_sse4.s: _lib/unpack_bool.c
59 $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
60
61_lib/unpack_bool_neon.s: _lib/unpack_bool.c
62 $(CC) -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
63
64_lib/bit_packing_neon.s: _lib/bit_packing_neon.c
65 $(CC) -S $(C_FLAGS_NEON) $^ -o $@
66
67bit_packing_avx2_amd64.s: _lib/bit_packing_avx2.s
68 $(C2GOASM) -a -f $^ $@
69
70unpack_bool_avx2_amd64.s: _lib/unpack_bool_avx2.s
71 $(C2GOASM) -a -f $^ $@
72
73unpack_bool_sse4_amd64.s: _lib/unpack_bool_sse4.s
74 $(C2GOASM) -a -f $^ $@
75
76clean:
77 rm -f $(INTEL_SOURCES)
78 rm -f $(addprefix _lib/,$(INTEL_SOURCES))
View as plain text