blob: 14782d70a1fafd60267a242bb089b91c05659036 [file] [log] [blame]
Aart Bikf8f5a162017-02-06 15:35:29 -08001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "code_generator_x86.h"
18#include "mirror/array-inl.h"
19
20namespace art {
21namespace x86 {
22
23// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
24#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
25
26void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
27 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
28 switch (instruction->GetPackedType()) {
29 case Primitive::kPrimLong:
30 // Long needs extra temporary to load the register pair.
31 locations->AddTemp(Location::RequiresFpuRegister());
32 FALLTHROUGH_INTENDED;
33 case Primitive::kPrimBoolean:
34 case Primitive::kPrimByte:
35 case Primitive::kPrimChar:
36 case Primitive::kPrimShort:
37 case Primitive::kPrimInt:
38 locations->SetInAt(0, Location::RequiresRegister());
39 locations->SetOut(Location::RequiresFpuRegister());
40 break;
41 case Primitive::kPrimFloat:
42 case Primitive::kPrimDouble:
43 locations->SetInAt(0, Location::RequiresFpuRegister());
44 locations->SetOut(Location::SameAsFirstInput());
45 break;
46 default:
47 LOG(FATAL) << "Unsupported SIMD type";
48 UNREACHABLE();
49 }
50}
51
52void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
53 LocationSummary* locations = instruction->GetLocations();
54 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
55 switch (instruction->GetPackedType()) {
56 case Primitive::kPrimBoolean:
57 case Primitive::kPrimByte:
58 DCHECK_EQ(16u, instruction->GetVectorLength());
59 __ movd(reg, locations->InAt(0).AsRegister<Register>());
60 __ punpcklbw(reg, reg);
61 __ punpcklwd(reg, reg);
62 __ pshufd(reg, reg, Immediate(0));
63 break;
64 case Primitive::kPrimChar:
65 case Primitive::kPrimShort:
66 DCHECK_EQ(8u, instruction->GetVectorLength());
67 __ movd(reg, locations->InAt(0).AsRegister<Register>());
68 __ punpcklwd(reg, reg);
69 __ pshufd(reg, reg, Immediate(0));
70 break;
71 case Primitive::kPrimInt:
72 DCHECK_EQ(4u, instruction->GetVectorLength());
73 __ movd(reg, locations->InAt(0).AsRegister<Register>());
74 __ pshufd(reg, reg, Immediate(0));
75 break;
76 case Primitive::kPrimLong: {
77 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
78 DCHECK_EQ(2u, instruction->GetVectorLength());
79 __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
80 __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
81 __ punpckldq(reg, tmp);
82 __ punpcklqdq(reg, reg);
83 break;
84 }
85 case Primitive::kPrimFloat:
86 DCHECK(locations->InAt(0).Equals(locations->Out()));
87 DCHECK_EQ(4u, instruction->GetVectorLength());
88 __ shufps(reg, reg, Immediate(0));
89 break;
90 case Primitive::kPrimDouble:
91 DCHECK(locations->InAt(0).Equals(locations->Out()));
92 DCHECK_EQ(2u, instruction->GetVectorLength());
93 __ shufpd(reg, reg, Immediate(0));
94 break;
95 default:
96 LOG(FATAL) << "Unsupported SIMD type";
97 UNREACHABLE();
98 }
99}
100
101void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
102 LOG(FATAL) << "No SIMD for " << instruction->GetId();
103}
104
105void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
106 LOG(FATAL) << "No SIMD for " << instruction->GetId();
107}
108
109void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
110 LOG(FATAL) << "No SIMD for " << instruction->GetId();
111}
112
113void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
114 LOG(FATAL) << "No SIMD for " << instruction->GetId();
115}
116
117// Helper to set up locations for vector unary operations.
118static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
119 LocationSummary* locations = new (arena) LocationSummary(instruction);
120 switch (instruction->GetPackedType()) {
121 case Primitive::kPrimBoolean:
122 case Primitive::kPrimByte:
123 case Primitive::kPrimChar:
124 case Primitive::kPrimShort:
125 case Primitive::kPrimInt:
126 case Primitive::kPrimLong:
127 case Primitive::kPrimFloat:
128 case Primitive::kPrimDouble:
129 locations->SetInAt(0, Location::RequiresFpuRegister());
130 locations->SetOut(Location::RequiresFpuRegister());
131 break;
132 default:
133 LOG(FATAL) << "Unsupported SIMD type";
134 UNREACHABLE();
135 }
136}
137
138void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
139 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
140}
141
142void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
143 LocationSummary* locations = instruction->GetLocations();
144 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
145 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
146 Primitive::Type from = instruction->GetInputType();
147 Primitive::Type to = instruction->GetResultType();
148 if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
149 DCHECK_EQ(4u, instruction->GetVectorLength());
150 __ cvtdq2ps(dst, src);
151 } else {
152 LOG(FATAL) << "Unsupported SIMD type";
153 }
154}
155
156void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
157 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
158}
159
160void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
161 LocationSummary* locations = instruction->GetLocations();
162 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
163 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
164 switch (instruction->GetPackedType()) {
165 case Primitive::kPrimByte:
166 DCHECK_EQ(16u, instruction->GetVectorLength());
167 __ pxor(dst, dst);
168 __ psubb(dst, src);
169 break;
170 case Primitive::kPrimChar:
171 case Primitive::kPrimShort:
172 DCHECK_EQ(8u, instruction->GetVectorLength());
173 __ pxor(dst, dst);
174 __ psubw(dst, src);
175 break;
176 case Primitive::kPrimInt:
177 DCHECK_EQ(4u, instruction->GetVectorLength());
178 __ pxor(dst, dst);
179 __ psubd(dst, src);
180 break;
181 case Primitive::kPrimLong:
182 DCHECK_EQ(2u, instruction->GetVectorLength());
183 __ pxor(dst, dst);
184 __ psubq(dst, src);
185 break;
186 case Primitive::kPrimFloat:
187 DCHECK_EQ(4u, instruction->GetVectorLength());
188 __ xorps(dst, dst);
189 __ subps(dst, src);
190 break;
191 case Primitive::kPrimDouble:
192 DCHECK_EQ(2u, instruction->GetVectorLength());
193 __ xorpd(dst, dst);
194 __ subpd(dst, src);
195 break;
196 default:
197 LOG(FATAL) << "Unsupported SIMD type";
198 UNREACHABLE();
199 }
200}
201
Aart Bik6daebeb2017-04-03 14:35:41 -0700202void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
203 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
Aart Bik472821b2017-04-27 17:23:51 -0700204 // Integral-abs requires a temporary for the comparison.
Aart Bik6daebeb2017-04-03 14:35:41 -0700205 if (instruction->GetPackedType() == Primitive::kPrimInt) {
206 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
207 }
208}
209
210void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
211 LocationSummary* locations = instruction->GetLocations();
212 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
213 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
214 switch (instruction->GetPackedType()) {
215 case Primitive::kPrimInt: {
216 DCHECK_EQ(4u, instruction->GetVectorLength());
217 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
218 __ movaps(dst, src);
219 __ pxor(tmp, tmp);
220 __ pcmpgtd(tmp, dst);
221 __ pxor(dst, tmp);
222 __ psubd(dst, tmp);
223 break;
224 }
225 case Primitive::kPrimFloat:
226 DCHECK_EQ(4u, instruction->GetVectorLength());
227 __ pcmpeqb(dst, dst); // all ones
228 __ psrld(dst, Immediate(1));
229 __ andps(dst, src);
230 break;
231 case Primitive::kPrimDouble:
232 DCHECK_EQ(2u, instruction->GetVectorLength());
233 __ pcmpeqb(dst, dst); // all ones
234 __ psrlq(dst, Immediate(1));
235 __ andpd(dst, src);
236 break;
237 default:
238 LOG(FATAL) << "Unsupported SIMD type";
239 UNREACHABLE();
240 }
241}
242
Aart Bikf8f5a162017-02-06 15:35:29 -0800243void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
244 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
245 // Boolean-not requires a temporary to construct the 16 x one.
246 if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
247 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
248 }
249}
250
251void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
252 LocationSummary* locations = instruction->GetLocations();
253 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
254 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
255 switch (instruction->GetPackedType()) {
256 case Primitive::kPrimBoolean: { // special case boolean-not
257 DCHECK_EQ(16u, instruction->GetVectorLength());
258 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
259 __ pxor(dst, dst);
260 __ pcmpeqb(tmp, tmp); // all ones
261 __ psubb(dst, tmp); // 16 x one
262 __ pxor(dst, src);
263 break;
264 }
265 case Primitive::kPrimByte:
266 case Primitive::kPrimChar:
267 case Primitive::kPrimShort:
268 case Primitive::kPrimInt:
269 case Primitive::kPrimLong:
270 DCHECK_LE(2u, instruction->GetVectorLength());
271 DCHECK_LE(instruction->GetVectorLength(), 16u);
272 __ pcmpeqb(dst, dst); // all ones
273 __ pxor(dst, src);
274 break;
275 case Primitive::kPrimFloat:
276 DCHECK_EQ(4u, instruction->GetVectorLength());
277 __ pcmpeqb(dst, dst); // all ones
278 __ xorps(dst, src);
279 break;
280 case Primitive::kPrimDouble:
281 DCHECK_EQ(2u, instruction->GetVectorLength());
282 __ pcmpeqb(dst, dst); // all ones
283 __ xorpd(dst, src);
284 break;
285 default:
286 LOG(FATAL) << "Unsupported SIMD type";
287 UNREACHABLE();
288 }
289}
290
291// Helper to set up locations for vector binary operations.
292static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
293 LocationSummary* locations = new (arena) LocationSummary(instruction);
294 switch (instruction->GetPackedType()) {
295 case Primitive::kPrimBoolean:
296 case Primitive::kPrimByte:
297 case Primitive::kPrimChar:
298 case Primitive::kPrimShort:
299 case Primitive::kPrimInt:
300 case Primitive::kPrimLong:
301 case Primitive::kPrimFloat:
302 case Primitive::kPrimDouble:
303 locations->SetInAt(0, Location::RequiresFpuRegister());
304 locations->SetInAt(1, Location::RequiresFpuRegister());
305 locations->SetOut(Location::SameAsFirstInput());
306 break;
307 default:
308 LOG(FATAL) << "Unsupported SIMD type";
309 UNREACHABLE();
310 }
311}
312
313void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
314 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
315}
316
317void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
318 LocationSummary* locations = instruction->GetLocations();
319 DCHECK(locations->InAt(0).Equals(locations->Out()));
320 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
321 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
322 switch (instruction->GetPackedType()) {
323 case Primitive::kPrimByte:
324 DCHECK_EQ(16u, instruction->GetVectorLength());
325 __ paddb(dst, src);
326 break;
327 case Primitive::kPrimChar:
328 case Primitive::kPrimShort:
329 DCHECK_EQ(8u, instruction->GetVectorLength());
330 __ paddw(dst, src);
331 break;
332 case Primitive::kPrimInt:
333 DCHECK_EQ(4u, instruction->GetVectorLength());
334 __ paddd(dst, src);
335 break;
336 case Primitive::kPrimLong:
337 DCHECK_EQ(2u, instruction->GetVectorLength());
338 __ paddq(dst, src);
339 break;
340 case Primitive::kPrimFloat:
341 DCHECK_EQ(4u, instruction->GetVectorLength());
342 __ addps(dst, src);
343 break;
344 case Primitive::kPrimDouble:
345 DCHECK_EQ(2u, instruction->GetVectorLength());
346 __ addpd(dst, src);
347 break;
348 default:
349 LOG(FATAL) << "Unsupported SIMD type";
350 UNREACHABLE();
351 }
352}
353
Aart Bikf3e61ee2017-04-12 17:09:20 -0700354void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
355 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
356}
357
358void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
359 LocationSummary* locations = instruction->GetLocations();
360 DCHECK(locations->InAt(0).Equals(locations->Out()));
361 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
362 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
363
364 DCHECK(instruction->IsRounded());
365 DCHECK(instruction->IsUnsigned());
366
367 switch (instruction->GetPackedType()) {
368 case Primitive::kPrimByte:
369 DCHECK_EQ(16u, instruction->GetVectorLength());
370 __ pavgb(dst, src);
371 return;
372 case Primitive::kPrimChar:
373 case Primitive::kPrimShort:
374 DCHECK_EQ(8u, instruction->GetVectorLength());
375 __ pavgw(dst, src);
376 return;
377 default:
378 LOG(FATAL) << "Unsupported SIMD type";
379 UNREACHABLE();
380 }
381}
382
Aart Bikf8f5a162017-02-06 15:35:29 -0800383void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
384 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
385}
386
387void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
388 LocationSummary* locations = instruction->GetLocations();
389 DCHECK(locations->InAt(0).Equals(locations->Out()));
390 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
391 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
392 switch (instruction->GetPackedType()) {
393 case Primitive::kPrimByte:
394 DCHECK_EQ(16u, instruction->GetVectorLength());
395 __ psubb(dst, src);
396 break;
397 case Primitive::kPrimChar:
398 case Primitive::kPrimShort:
399 DCHECK_EQ(8u, instruction->GetVectorLength());
400 __ psubw(dst, src);
401 break;
402 case Primitive::kPrimInt:
403 DCHECK_EQ(4u, instruction->GetVectorLength());
404 __ psubd(dst, src);
405 break;
406 case Primitive::kPrimLong:
407 DCHECK_EQ(2u, instruction->GetVectorLength());
408 __ psubq(dst, src);
409 break;
410 case Primitive::kPrimFloat:
411 DCHECK_EQ(4u, instruction->GetVectorLength());
412 __ subps(dst, src);
413 break;
414 case Primitive::kPrimDouble:
415 DCHECK_EQ(2u, instruction->GetVectorLength());
416 __ subpd(dst, src);
417 break;
418 default:
419 LOG(FATAL) << "Unsupported SIMD type";
420 UNREACHABLE();
421 }
422}
423
424void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
425 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
426}
427
428void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
429 LocationSummary* locations = instruction->GetLocations();
430 DCHECK(locations->InAt(0).Equals(locations->Out()));
431 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
432 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
433 switch (instruction->GetPackedType()) {
434 case Primitive::kPrimChar:
435 case Primitive::kPrimShort:
436 DCHECK_EQ(8u, instruction->GetVectorLength());
437 __ pmullw(dst, src);
438 break;
439 case Primitive::kPrimInt:
440 DCHECK_EQ(4u, instruction->GetVectorLength());
441 __ pmulld(dst, src);
442 break;
443 case Primitive::kPrimFloat:
444 DCHECK_EQ(4u, instruction->GetVectorLength());
445 __ mulps(dst, src);
446 break;
447 case Primitive::kPrimDouble:
448 DCHECK_EQ(2u, instruction->GetVectorLength());
449 __ mulpd(dst, src);
450 break;
451 default:
452 LOG(FATAL) << "Unsupported SIMD type";
453 UNREACHABLE();
454 }
455}
456
457void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
458 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
459}
460
461void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
462 LocationSummary* locations = instruction->GetLocations();
463 DCHECK(locations->InAt(0).Equals(locations->Out()));
464 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
465 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
466 switch (instruction->GetPackedType()) {
467 case Primitive::kPrimFloat:
468 DCHECK_EQ(4u, instruction->GetVectorLength());
469 __ divps(dst, src);
470 break;
471 case Primitive::kPrimDouble:
472 DCHECK_EQ(2u, instruction->GetVectorLength());
473 __ divpd(dst, src);
474 break;
475 default:
476 LOG(FATAL) << "Unsupported SIMD type";
477 UNREACHABLE();
478 }
479}
480
Aart Bikf3e61ee2017-04-12 17:09:20 -0700481void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
482 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
483}
484
485void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
Aart Bikc8e93c72017-05-10 10:49:22 -0700486 LocationSummary* locations = instruction->GetLocations();
487 DCHECK(locations->InAt(0).Equals(locations->Out()));
488 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
489 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
490 switch (instruction->GetPackedType()) {
491 case Primitive::kPrimByte:
492 DCHECK_EQ(16u, instruction->GetVectorLength());
493 if (instruction->IsUnsigned()) {
494 __ pminub(dst, src);
495 } else {
496 __ pminsb(dst, src);
497 }
498 break;
499 case Primitive::kPrimChar:
500 case Primitive::kPrimShort:
501 DCHECK_EQ(8u, instruction->GetVectorLength());
502 if (instruction->IsUnsigned()) {
503 __ pminuw(dst, src);
504 } else {
505 __ pminsw(dst, src);
506 }
507 break;
508 case Primitive::kPrimInt:
509 DCHECK_EQ(4u, instruction->GetVectorLength());
510 if (instruction->IsUnsigned()) {
511 __ pminud(dst, src);
512 } else {
513 __ pminsd(dst, src);
514 }
515 break;
516 // Next cases are sloppy wrt 0.0 vs -0.0.
517 case Primitive::kPrimFloat:
518 DCHECK_EQ(4u, instruction->GetVectorLength());
519 DCHECK(!instruction->IsUnsigned());
520 __ minps(dst, src);
521 break;
522 case Primitive::kPrimDouble:
523 DCHECK_EQ(2u, instruction->GetVectorLength());
524 DCHECK(!instruction->IsUnsigned());
525 __ minpd(dst, src);
526 break;
527 default:
528 LOG(FATAL) << "Unsupported SIMD type";
529 UNREACHABLE();
530 }
Aart Bikf3e61ee2017-04-12 17:09:20 -0700531}
532
533void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
534 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
535}
536
537void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
Aart Bikc8e93c72017-05-10 10:49:22 -0700538 LocationSummary* locations = instruction->GetLocations();
539 DCHECK(locations->InAt(0).Equals(locations->Out()));
540 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
541 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
542 switch (instruction->GetPackedType()) {
543 case Primitive::kPrimByte:
544 DCHECK_EQ(16u, instruction->GetVectorLength());
545 if (instruction->IsUnsigned()) {
546 __ pmaxub(dst, src);
547 } else {
548 __ pmaxsb(dst, src);
549 }
550 break;
551 case Primitive::kPrimChar:
552 case Primitive::kPrimShort:
553 DCHECK_EQ(8u, instruction->GetVectorLength());
554 if (instruction->IsUnsigned()) {
555 __ pmaxuw(dst, src);
556 } else {
557 __ pmaxsw(dst, src);
558 }
559 break;
560 case Primitive::kPrimInt:
561 DCHECK_EQ(4u, instruction->GetVectorLength());
562 if (instruction->IsUnsigned()) {
563 __ pmaxud(dst, src);
564 } else {
565 __ pmaxsd(dst, src);
566 }
567 break;
568 // Next cases are sloppy wrt 0.0 vs -0.0.
569 case Primitive::kPrimFloat:
570 DCHECK_EQ(4u, instruction->GetVectorLength());
571 DCHECK(!instruction->IsUnsigned());
572 __ maxps(dst, src);
573 break;
574 case Primitive::kPrimDouble:
575 DCHECK_EQ(2u, instruction->GetVectorLength());
576 DCHECK(!instruction->IsUnsigned());
577 __ maxpd(dst, src);
578 break;
579 default:
580 LOG(FATAL) << "Unsupported SIMD type";
581 UNREACHABLE();
582 }
Aart Bikf3e61ee2017-04-12 17:09:20 -0700583}
584
Aart Bikf8f5a162017-02-06 15:35:29 -0800585void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
586 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
587}
588
589void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
590 LocationSummary* locations = instruction->GetLocations();
591 DCHECK(locations->InAt(0).Equals(locations->Out()));
592 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
593 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
594 switch (instruction->GetPackedType()) {
595 case Primitive::kPrimBoolean:
596 case Primitive::kPrimByte:
597 case Primitive::kPrimChar:
598 case Primitive::kPrimShort:
599 case Primitive::kPrimInt:
600 case Primitive::kPrimLong:
601 DCHECK_LE(2u, instruction->GetVectorLength());
602 DCHECK_LE(instruction->GetVectorLength(), 16u);
603 __ pand(dst, src);
604 break;
605 case Primitive::kPrimFloat:
606 DCHECK_EQ(4u, instruction->GetVectorLength());
607 __ andps(dst, src);
608 break;
609 case Primitive::kPrimDouble:
610 DCHECK_EQ(2u, instruction->GetVectorLength());
611 __ andpd(dst, src);
612 break;
613 default:
614 LOG(FATAL) << "Unsupported SIMD type";
615 UNREACHABLE();
616 }
617}
618
619void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
620 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
621}
622
623void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
624 LocationSummary* locations = instruction->GetLocations();
625 DCHECK(locations->InAt(0).Equals(locations->Out()));
626 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
627 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
628 switch (instruction->GetPackedType()) {
629 case Primitive::kPrimBoolean:
630 case Primitive::kPrimByte:
631 case Primitive::kPrimChar:
632 case Primitive::kPrimShort:
633 case Primitive::kPrimInt:
634 case Primitive::kPrimLong:
635 DCHECK_LE(2u, instruction->GetVectorLength());
636 DCHECK_LE(instruction->GetVectorLength(), 16u);
637 __ pandn(dst, src);
638 break;
639 case Primitive::kPrimFloat:
640 DCHECK_EQ(4u, instruction->GetVectorLength());
641 __ andnps(dst, src);
642 break;
643 case Primitive::kPrimDouble:
644 DCHECK_EQ(2u, instruction->GetVectorLength());
645 __ andnpd(dst, src);
646 break;
647 default:
648 LOG(FATAL) << "Unsupported SIMD type";
649 UNREACHABLE();
650 }
651}
652
653void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
654 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
655}
656
657void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
658 LocationSummary* locations = instruction->GetLocations();
659 DCHECK(locations->InAt(0).Equals(locations->Out()));
660 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
661 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
662 switch (instruction->GetPackedType()) {
663 case Primitive::kPrimBoolean:
664 case Primitive::kPrimByte:
665 case Primitive::kPrimChar:
666 case Primitive::kPrimShort:
667 case Primitive::kPrimInt:
668 case Primitive::kPrimLong:
669 DCHECK_LE(2u, instruction->GetVectorLength());
670 DCHECK_LE(instruction->GetVectorLength(), 16u);
671 __ por(dst, src);
672 break;
673 case Primitive::kPrimFloat:
674 DCHECK_EQ(4u, instruction->GetVectorLength());
675 __ orps(dst, src);
676 break;
677 case Primitive::kPrimDouble:
678 DCHECK_EQ(2u, instruction->GetVectorLength());
679 __ orpd(dst, src);
680 break;
681 default:
682 LOG(FATAL) << "Unsupported SIMD type";
683 UNREACHABLE();
684 }
685}
686
687void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
688 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
689}
690
691void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
692 LocationSummary* locations = instruction->GetLocations();
693 DCHECK(locations->InAt(0).Equals(locations->Out()));
694 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
695 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
696 switch (instruction->GetPackedType()) {
697 case Primitive::kPrimBoolean:
698 case Primitive::kPrimByte:
699 case Primitive::kPrimChar:
700 case Primitive::kPrimShort:
701 case Primitive::kPrimInt:
702 case Primitive::kPrimLong:
703 DCHECK_LE(2u, instruction->GetVectorLength());
704 DCHECK_LE(instruction->GetVectorLength(), 16u);
705 __ pxor(dst, src);
706 break;
707 case Primitive::kPrimFloat:
708 DCHECK_EQ(4u, instruction->GetVectorLength());
709 __ xorps(dst, src);
710 break;
711 case Primitive::kPrimDouble:
712 DCHECK_EQ(2u, instruction->GetVectorLength());
713 __ xorpd(dst, src);
714 break;
715 default:
716 LOG(FATAL) << "Unsupported SIMD type";
717 UNREACHABLE();
718 }
719}
720
721// Helper to set up locations for vector shift operations.
722static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
723 LocationSummary* locations = new (arena) LocationSummary(instruction);
724 switch (instruction->GetPackedType()) {
725 case Primitive::kPrimChar:
726 case Primitive::kPrimShort:
727 case Primitive::kPrimInt:
728 case Primitive::kPrimLong:
729 locations->SetInAt(0, Location::RequiresFpuRegister());
730 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
731 locations->SetOut(Location::SameAsFirstInput());
732 break;
733 default:
734 LOG(FATAL) << "Unsupported SIMD type";
735 UNREACHABLE();
736 }
737}
738
739void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
740 CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
741}
742
743void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
744 LocationSummary* locations = instruction->GetLocations();
745 DCHECK(locations->InAt(0).Equals(locations->Out()));
746 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
747 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
748 switch (instruction->GetPackedType()) {
749 case Primitive::kPrimChar:
750 case Primitive::kPrimShort:
751 DCHECK_EQ(8u, instruction->GetVectorLength());
752 __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
753 break;
754 case Primitive::kPrimInt:
755 DCHECK_EQ(4u, instruction->GetVectorLength());
756 __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
757 break;
758 case Primitive::kPrimLong:
759 DCHECK_EQ(2u, instruction->GetVectorLength());
760 __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
761 break;
762 default:
763 LOG(FATAL) << "Unsupported SIMD type";
764 UNREACHABLE();
765 }
766}
767
768void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
769 CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
770}
771
772void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
773 LocationSummary* locations = instruction->GetLocations();
774 DCHECK(locations->InAt(0).Equals(locations->Out()));
775 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
776 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
777 switch (instruction->GetPackedType()) {
778 case Primitive::kPrimChar:
779 case Primitive::kPrimShort:
780 DCHECK_EQ(8u, instruction->GetVectorLength());
781 __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
782 break;
783 case Primitive::kPrimInt:
784 DCHECK_EQ(4u, instruction->GetVectorLength());
785 __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
786 break;
787 default:
788 LOG(FATAL) << "Unsupported SIMD type";
789 UNREACHABLE();
790 }
791}
792
793void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
794 CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
795}
796
797void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
798 LocationSummary* locations = instruction->GetLocations();
799 DCHECK(locations->InAt(0).Equals(locations->Out()));
800 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
801 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
802 switch (instruction->GetPackedType()) {
803 case Primitive::kPrimChar:
804 case Primitive::kPrimShort:
805 DCHECK_EQ(8u, instruction->GetVectorLength());
806 __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
807 break;
808 case Primitive::kPrimInt:
809 DCHECK_EQ(4u, instruction->GetVectorLength());
810 __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
811 break;
812 case Primitive::kPrimLong:
813 DCHECK_EQ(2u, instruction->GetVectorLength());
814 __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
815 break;
816 default:
817 LOG(FATAL) << "Unsupported SIMD type";
818 UNREACHABLE();
819 }
820}
821
Artem Serovf34dd202017-04-10 17:41:46 +0100822void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
823 LOG(FATAL) << "No SIMD for " << instr->GetId();
824}
825
826void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
827 LOG(FATAL) << "No SIMD for " << instr->GetId();
828}
829
Aart Bikf8f5a162017-02-06 15:35:29 -0800830// Helper to set up locations for vector memory operations.
831static void CreateVecMemLocations(ArenaAllocator* arena,
832 HVecMemoryOperation* instruction,
833 bool is_load) {
834 LocationSummary* locations = new (arena) LocationSummary(instruction);
835 switch (instruction->GetPackedType()) {
836 case Primitive::kPrimBoolean:
837 case Primitive::kPrimByte:
838 case Primitive::kPrimChar:
839 case Primitive::kPrimShort:
840 case Primitive::kPrimInt:
841 case Primitive::kPrimLong:
842 case Primitive::kPrimFloat:
843 case Primitive::kPrimDouble:
844 locations->SetInAt(0, Location::RequiresRegister());
845 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
846 if (is_load) {
847 locations->SetOut(Location::RequiresFpuRegister());
848 } else {
849 locations->SetInAt(2, Location::RequiresFpuRegister());
850 }
851 break;
852 default:
853 LOG(FATAL) << "Unsupported SIMD type";
854 UNREACHABLE();
855 }
856}
857
Aart Bik472821b2017-04-27 17:23:51 -0700858// Helper to construct address for vector memory operations.
859static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
Aart Bikf8f5a162017-02-06 15:35:29 -0800860 Location base = locations->InAt(0);
861 Location index = locations->InAt(1);
Aart Bikf8f5a162017-02-06 15:35:29 -0800862 ScaleFactor scale = TIMES_1;
863 switch (size) {
864 case 2: scale = TIMES_2; break;
865 case 4: scale = TIMES_4; break;
866 case 8: scale = TIMES_8; break;
867 default: break;
868 }
Aart Bik472821b2017-04-27 17:23:51 -0700869 uint32_t offset = is_string_char_at
870 ? mirror::String::ValueOffset().Uint32Value()
871 : mirror::Array::DataOffset(size).Uint32Value();
Aart Bikf8f5a162017-02-06 15:35:29 -0800872 return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
873}
874
875void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
876 CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
Aart Bik472821b2017-04-27 17:23:51 -0700877 // String load requires a temporary for the compressed load.
878 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
879 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
880 }
Aart Bikf8f5a162017-02-06 15:35:29 -0800881}
882
883void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
Aart Bik472821b2017-04-27 17:23:51 -0700884 LocationSummary* locations = instruction->GetLocations();
885 size_t size = Primitive::ComponentSize(instruction->GetPackedType());
886 Address address = VecAddress(locations, size, instruction->IsStringCharAt());
887 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
Aart Bikf8f5a162017-02-06 15:35:29 -0800888 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
889 switch (instruction->GetPackedType()) {
Aart Bik472821b2017-04-27 17:23:51 -0700890 case Primitive::kPrimChar:
891 DCHECK_EQ(8u, instruction->GetVectorLength());
892 // Special handling of compressed/uncompressed string load.
893 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
894 NearLabel done, not_compressed;
895 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
896 // Test compression bit.
897 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
898 "Expecting 0=compressed, 1=uncompressed");
899 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
900 __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
901 __ j(kNotZero, &not_compressed);
902 // Zero extend 8 compressed bytes into 8 chars.
903 __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
904 __ pxor(tmp, tmp);
905 __ punpcklbw(reg, tmp);
906 __ jmp(&done);
907 // Load 4 direct uncompressed chars.
908 __ Bind(&not_compressed);
909 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
910 __ Bind(&done);
911 return;
912 }
913 FALLTHROUGH_INTENDED;
Aart Bikf8f5a162017-02-06 15:35:29 -0800914 case Primitive::kPrimBoolean:
915 case Primitive::kPrimByte:
Aart Bikf8f5a162017-02-06 15:35:29 -0800916 case Primitive::kPrimShort:
917 case Primitive::kPrimInt:
918 case Primitive::kPrimLong:
919 DCHECK_LE(2u, instruction->GetVectorLength());
920 DCHECK_LE(instruction->GetVectorLength(), 16u);
921 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
922 break;
923 case Primitive::kPrimFloat:
924 DCHECK_EQ(4u, instruction->GetVectorLength());
925 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
926 break;
927 case Primitive::kPrimDouble:
928 DCHECK_EQ(2u, instruction->GetVectorLength());
929 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
930 break;
931 default:
932 LOG(FATAL) << "Unsupported SIMD type";
933 UNREACHABLE();
934 }
935}
936
937void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
938 CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
939}
940
941void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
Aart Bik472821b2017-04-27 17:23:51 -0700942 LocationSummary* locations = instruction->GetLocations();
943 size_t size = Primitive::ComponentSize(instruction->GetPackedType());
944 Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
945 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
Aart Bikf8f5a162017-02-06 15:35:29 -0800946 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
947 switch (instruction->GetPackedType()) {
948 case Primitive::kPrimBoolean:
949 case Primitive::kPrimByte:
950 case Primitive::kPrimChar:
951 case Primitive::kPrimShort:
952 case Primitive::kPrimInt:
953 case Primitive::kPrimLong:
954 DCHECK_LE(2u, instruction->GetVectorLength());
955 DCHECK_LE(instruction->GetVectorLength(), 16u);
956 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
957 break;
958 case Primitive::kPrimFloat:
959 DCHECK_EQ(4u, instruction->GetVectorLength());
960 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
961 break;
962 case Primitive::kPrimDouble:
963 DCHECK_EQ(2u, instruction->GetVectorLength());
964 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
965 break;
966 default:
967 LOG(FATAL) << "Unsupported SIMD type";
968 UNREACHABLE();
969 }
970}
971
972#undef __
973
974} // namespace x86
975} // namespace art