new upstream release (3.3.0); modify package compatibility for Stretch
[ossec-hids.git] / src / external / pcre2-10.32 / src / sljit / sljitNativePPC_common.c
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29         return "PowerPC" SLJIT_CPUINFO;
30 }
31
32 /* Length of an instruction word.
33    Both for ppc-32 and ppc-64. */
34 typedef sljit_u32 sljit_ins;
35
36 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
37         || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
38 #define SLJIT_PPC_STACK_FRAME_V2 1
39 #endif
40
41 #ifdef _AIX
42 #include <sys/cache.h>
43 #endif
44
45 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
46 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
47 #endif
48
49 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
50
51 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
52 {
53 #ifdef _AIX
54         _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
55 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
56 #       if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
57         /* Cache flush for POWER architecture. */
58         while (from < to) {
59                 __asm__ volatile (
60                         "clf 0, %0\n"
61                         "dcs\n"
62                         : : "r"(from)
63                 );
64                 from++;
65         }
66         __asm__ volatile ( "ics" );
67 #       elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
68 #       error "Cache flush is not implemented for PowerPC/POWER common mode."
69 #       else
70         /* Cache flush for PowerPC architecture. */
71         while (from < to) {
72                 __asm__ volatile (
73                         "dcbf 0, %0\n"
74                         "sync\n"
75                         "icbi 0, %0\n"
76                         : : "r"(from)
77                 );
78                 from++;
79         }
80         __asm__ volatile ( "isync" );
81 #       endif
82 #       ifdef __xlc__
83 #       warning "This file may fail to compile if -qfuncsect is used"
84 #       endif
85 #elif defined(__xlc__)
86 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
87 #else
88 #error "This platform requires a cache flush implementation."
89 #endif /* _AIX */
90 }
91
92 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
93
94 #define TMP_REG1        (SLJIT_NUMBER_OF_REGISTERS + 2)
95 #define TMP_REG2        (SLJIT_NUMBER_OF_REGISTERS + 3)
96 #define TMP_ZERO        (SLJIT_NUMBER_OF_REGISTERS + 4)
97
98 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
99 #define TMP_CALL_REG    (SLJIT_NUMBER_OF_REGISTERS + 5)
100 #else
101 #define TMP_CALL_REG    TMP_REG2
102 #endif
103
104 #define TMP_FREG1       (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
105 #define TMP_FREG2       (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
106
107 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
108         0, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 9, 10, 31, 12
109 };
110
111 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
112         0, 1, 2, 3, 4, 5, 6, 0, 7
113 };
114
115 /* --------------------------------------------------------------------- */
116 /*  Instrucion forms                                                     */
117 /* --------------------------------------------------------------------- */
118 #define D(d)            (reg_map[d] << 21)
119 #define S(s)            (reg_map[s] << 21)
120 #define A(a)            (reg_map[a] << 16)
121 #define B(b)            (reg_map[b] << 11)
122 #define C(c)            (reg_map[c] << 6)
123 #define FD(fd)          (freg_map[fd] << 21)
124 #define FS(fs)          (freg_map[fs] << 21)
125 #define FA(fa)          (freg_map[fa] << 16)
126 #define FB(fb)          (freg_map[fb] << 11)
127 #define FC(fc)          (freg_map[fc] << 6)
128 #define IMM(imm)        ((imm) & 0xffff)
129 #define CRD(d)          ((d) << 21)
130
131 /* Instruction bit sections.
132    OE and Rc flag (see ALT_SET_FLAGS). */
133 #define OE(flags)       ((flags) & ALT_SET_FLAGS)
134 /* Rc flag (see ALT_SET_FLAGS). */
135 #define RC(flags)       (((flags) & ALT_SET_FLAGS) >> 10)
136 #define HI(opcode)      ((opcode) << 26)
137 #define LO(opcode)      ((opcode) << 1)
138
139 #define ADD             (HI(31) | LO(266))
140 #define ADDC            (HI(31) | LO(10))
141 #define ADDE            (HI(31) | LO(138))
142 #define ADDI            (HI(14))
143 #define ADDIC           (HI(13))
144 #define ADDIS           (HI(15))
145 #define ADDME           (HI(31) | LO(234))
146 #define AND             (HI(31) | LO(28))
147 #define ANDI            (HI(28))
148 #define ANDIS           (HI(29))
149 #define Bx              (HI(18))
150 #define BCx             (HI(16))
151 #define BCCTR           (HI(19) | LO(528) | (3 << 11))
152 #define BLR             (HI(19) | LO(16) | (0x14 << 21))
153 #define CNTLZD          (HI(31) | LO(58))
154 #define CNTLZW          (HI(31) | LO(26))
155 #define CMP             (HI(31) | LO(0))
156 #define CMPI            (HI(11))
157 #define CMPL            (HI(31) | LO(32))
158 #define CMPLI           (HI(10))
159 #define CROR            (HI(19) | LO(449))
160 #define DCBT            (HI(31) | LO(278))
161 #define DIVD            (HI(31) | LO(489))
162 #define DIVDU           (HI(31) | LO(457))
163 #define DIVW            (HI(31) | LO(491))
164 #define DIVWU           (HI(31) | LO(459))
165 #define EXTSB           (HI(31) | LO(954))
166 #define EXTSH           (HI(31) | LO(922))
167 #define EXTSW           (HI(31) | LO(986))
168 #define FABS            (HI(63) | LO(264))
169 #define FADD            (HI(63) | LO(21))
170 #define FADDS           (HI(59) | LO(21))
171 #define FCFID           (HI(63) | LO(846))
172 #define FCMPU           (HI(63) | LO(0))
173 #define FCTIDZ          (HI(63) | LO(815))
174 #define FCTIWZ          (HI(63) | LO(15))
175 #define FDIV            (HI(63) | LO(18))
176 #define FDIVS           (HI(59) | LO(18))
177 #define FMR             (HI(63) | LO(72))
178 #define FMUL            (HI(63) | LO(25))
179 #define FMULS           (HI(59) | LO(25))
180 #define FNEG            (HI(63) | LO(40))
181 #define FRSP            (HI(63) | LO(12))
182 #define FSUB            (HI(63) | LO(20))
183 #define FSUBS           (HI(59) | LO(20))
184 #define LD              (HI(58) | 0)
185 #define LWZ             (HI(32))
186 #define MFCR            (HI(31) | LO(19))
187 #define MFLR            (HI(31) | LO(339) | 0x80000)
188 #define MFXER           (HI(31) | LO(339) | 0x10000)
189 #define MTCTR           (HI(31) | LO(467) | 0x90000)
190 #define MTLR            (HI(31) | LO(467) | 0x80000)
191 #define MTXER           (HI(31) | LO(467) | 0x10000)
192 #define MULHD           (HI(31) | LO(73))
193 #define MULHDU          (HI(31) | LO(9))
194 #define MULHW           (HI(31) | LO(75))
195 #define MULHWU          (HI(31) | LO(11))
196 #define MULLD           (HI(31) | LO(233))
197 #define MULLI           (HI(7))
198 #define MULLW           (HI(31) | LO(235))
199 #define NEG             (HI(31) | LO(104))
200 #define NOP             (HI(24))
201 #define NOR             (HI(31) | LO(124))
202 #define OR              (HI(31) | LO(444))
203 #define ORI             (HI(24))
204 #define ORIS            (HI(25))
205 #define RLDICL          (HI(30))
206 #define RLWINM          (HI(21))
207 #define SLD             (HI(31) | LO(27))
208 #define SLW             (HI(31) | LO(24))
209 #define SRAD            (HI(31) | LO(794))
210 #define SRADI           (HI(31) | LO(413 << 1))
211 #define SRAW            (HI(31) | LO(792))
212 #define SRAWI           (HI(31) | LO(824))
213 #define SRD             (HI(31) | LO(539))
214 #define SRW             (HI(31) | LO(536))
215 #define STD             (HI(62) | 0)
216 #define STDU            (HI(62) | 1)
217 #define STDUX           (HI(31) | LO(181))
218 #define STFIWX          (HI(31) | LO(983))
219 #define STW             (HI(36))
220 #define STWU            (HI(37))
221 #define STWUX           (HI(31) | LO(183))
222 #define SUBF            (HI(31) | LO(40))
223 #define SUBFC           (HI(31) | LO(8))
224 #define SUBFE           (HI(31) | LO(136))
225 #define SUBFIC          (HI(8))
226 #define XOR             (HI(31) | LO(316))
227 #define XORI            (HI(26))
228 #define XORIS           (HI(27))
229
230 #define SIMM_MAX        (0x7fff)
231 #define SIMM_MIN        (-0x8000)
232 #define UIMM_MAX        (0xffff)
233
234 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
235 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
236 {
237         sljit_sw* ptrs;
238         if (func_ptr)
239                 *func_ptr = (void*)context;
240         ptrs = (sljit_sw*)func;
241         context->addr = addr ? addr : ptrs[0];
242         context->r2 = ptrs[1];
243         context->r11 = ptrs[2];
244 }
245 #endif
246
247 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
248 {
249         sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
250         FAIL_IF(!ptr);
251         *ptr = ins;
252         compiler->size++;
253         return SLJIT_SUCCESS;
254 }
255
256 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
257 {
258         sljit_sw diff;
259         sljit_uw target_addr;
260         sljit_sw extra_jump_flags;
261
262 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
263         if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
264                 return 0;
265 #else
266         if (jump->flags & SLJIT_REWRITABLE_JUMP)
267                 return 0;
268 #endif
269
270         if (jump->flags & JUMP_ADDR)
271                 target_addr = jump->u.target;
272         else {
273                 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
274                 target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
275         }
276
277 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
278         if (jump->flags & IS_CALL)
279                 goto keep_address;
280 #endif
281
282         diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l;
283
284         extra_jump_flags = 0;
285         if (jump->flags & IS_COND) {
286                 if (diff <= 0x7fff && diff >= -0x8000) {
287                         jump->flags |= PATCH_B;
288                         return 1;
289                 }
290                 if (target_addr <= 0xffff) {
291                         jump->flags |= PATCH_B | PATCH_ABS_B;
292                         return 1;
293                 }
294                 extra_jump_flags = REMOVE_COND;
295
296                 diff -= sizeof(sljit_ins);
297         }
298
299         if (diff <= 0x01ffffff && diff >= -0x02000000) {
300                 jump->flags |= PATCH_B | extra_jump_flags;
301                 return 1;
302         }
303
304         if (target_addr <= 0x03ffffff) {
305                 jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
306                 return 1;
307         }
308
309 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
310 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
311 keep_address:
312 #endif
313         if (target_addr <= 0x7fffffff) {
314                 jump->flags |= PATCH_ABS32;
315                 return 1;
316         }
317
318         if (target_addr <= 0x7fffffffffffl) {
319                 jump->flags |= PATCH_ABS48;
320                 return 1;
321         }
322 #endif
323
324         return 0;
325 }
326
327 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
328 {
329         struct sljit_memory_fragment *buf;
330         sljit_ins *code;
331         sljit_ins *code_ptr;
332         sljit_ins *buf_ptr;
333         sljit_ins *buf_end;
334         sljit_uw word_count;
335         sljit_sw executable_offset;
336         sljit_uw addr;
337
338         struct sljit_label *label;
339         struct sljit_jump *jump;
340         struct sljit_const *const_;
341
342         CHECK_ERROR_PTR();
343         CHECK_PTR(check_sljit_generate_code(compiler));
344         reverse_buf(compiler);
345
346 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
347 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
348         compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
349 #else
350         compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
351 #endif
352 #endif
353         code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
354         PTR_FAIL_WITH_EXEC_IF(code);
355         buf = compiler->buf;
356
357         code_ptr = code;
358         word_count = 0;
359         executable_offset = SLJIT_EXEC_OFFSET(code);
360
361         label = compiler->labels;
362         jump = compiler->jumps;
363         const_ = compiler->consts;
364
365         do {
366                 buf_ptr = (sljit_ins*)buf->memory;
367                 buf_end = buf_ptr + (buf->used_size >> 2);
368                 do {
369                         *code_ptr = *buf_ptr++;
370                         SLJIT_ASSERT(!label || label->size >= word_count);
371                         SLJIT_ASSERT(!jump || jump->addr >= word_count);
372                         SLJIT_ASSERT(!const_ || const_->addr >= word_count);
373                         /* These structures are ordered by their address. */
374                         if (label && label->size == word_count) {
375                                 /* Just recording the address. */
376                                 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
377                                 label->size = code_ptr - code;
378                                 label = label->next;
379                         }
380                         if (jump && jump->addr == word_count) {
381 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
382                                 jump->addr = (sljit_uw)(code_ptr - 3);
383 #else
384                                 jump->addr = (sljit_uw)(code_ptr - 6);
385 #endif
386                                 if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
387 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
388                                         code_ptr[-3] = code_ptr[0];
389                                         code_ptr -= 3;
390 #else
391                                         if (jump->flags & PATCH_ABS32) {
392                                                 code_ptr -= 3;
393                                                 code_ptr[-1] = code_ptr[2];
394                                                 code_ptr[0] = code_ptr[3];
395                                         }
396                                         else if (jump->flags & PATCH_ABS48) {
397                                                 code_ptr--;
398                                                 code_ptr[-1] = code_ptr[0];
399                                                 code_ptr[0] = code_ptr[1];
400                                                 /* rldicr rX,rX,32,31 -> rX,rX,16,47 */
401                                                 SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
402                                                 code_ptr[-3] ^= 0x8422;
403                                                 /* oris -> ori */
404                                                 code_ptr[-2] ^= 0x4000000;
405                                         }
406                                         else {
407                                                 code_ptr[-6] = code_ptr[0];
408                                                 code_ptr -= 6;
409                                         }
410 #endif
411                                         if (jump->flags & REMOVE_COND) {
412                                                 code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
413                                                 code_ptr++;
414                                                 jump->addr += sizeof(sljit_ins);
415                                                 code_ptr[0] = Bx;
416                                                 jump->flags -= IS_COND;
417                                         }
418                                 }
419                                 jump = jump->next;
420                         }
421                         if (const_ && const_->addr == word_count) {
422                                 const_->addr = (sljit_uw)code_ptr;
423                                 const_ = const_->next;
424                         }
425                         code_ptr ++;
426                         word_count ++;
427                 } while (buf_ptr < buf_end);
428
429                 buf = buf->next;
430         } while (buf);
431
432         if (label && label->size == word_count) {
433                 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
434                 label->size = code_ptr - code;
435                 label = label->next;
436         }
437
438         SLJIT_ASSERT(!label);
439         SLJIT_ASSERT(!jump);
440         SLJIT_ASSERT(!const_);
441 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
442         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
443 #else
444         SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
445 #endif
446
447         jump = compiler->jumps;
448         while (jump) {
449                 do {
450                         addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
451                         buf_ptr = (sljit_ins *)jump->addr;
452
453                         if (jump->flags & PATCH_B) {
454                                 if (jump->flags & IS_COND) {
455                                         if (!(jump->flags & PATCH_ABS_B)) {
456                                                 addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
457                                                 SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
458                                                 *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
459                                         }
460                                         else {
461                                                 SLJIT_ASSERT(addr <= 0xffff);
462                                                 *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
463                                         }
464                                 }
465                                 else {
466                                         if (!(jump->flags & PATCH_ABS_B)) {
467                                                 addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
468                                                 SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
469                                                 *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
470                                         }
471                                         else {
472                                                 SLJIT_ASSERT(addr <= 0x03ffffff);
473                                                 *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
474                                         }
475                                 }
476                                 break;
477                         }
478
479                         /* Set the fields of immediate loads. */
480 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
481                         buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
482                         buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
483 #else
484                         if (jump->flags & PATCH_ABS32) {
485                                 SLJIT_ASSERT(addr <= 0x7fffffff);
486                                 buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
487                                 buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
488                                 break;
489                         }
490                         if (jump->flags & PATCH_ABS48) {
491                                 SLJIT_ASSERT(addr <= 0x7fffffffffff);
492                                 buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
493                                 buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
494                                 buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
495                                 break;
496                         }
497                         buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
498                         buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
499                         buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
500                         buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
501 #endif
502                 } while (0);
503                 jump = jump->next;
504         }
505
506         compiler->error = SLJIT_ERR_COMPILED;
507         compiler->executable_offset = executable_offset;
508         compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
509
510         code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
511
512 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
513 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
514         if (((sljit_sw)code_ptr) & 0x4)
515                 code_ptr++;
516 #endif
517         sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
518 #endif
519
520         code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
521
522         SLJIT_CACHE_FLUSH(code, code_ptr);
523
524 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
525         return code_ptr;
526 #else
527         return code;
528 #endif
529 }
530
531 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
532 {
533         switch (feature_type) {
534         case SLJIT_HAS_FPU:
535 #ifdef SLJIT_IS_FPU_AVAILABLE
536                 return SLJIT_IS_FPU_AVAILABLE;
537 #else
538                 /* Available by default. */
539                 return 1;
540 #endif
541
542         case SLJIT_HAS_CLZ:
543                 return 1;
544
545         default:
546                 return 0;
547         }
548 }
549
550 /* --------------------------------------------------------------------- */
551 /*  Entry, exit                                                          */
552 /* --------------------------------------------------------------------- */
553
554 /* inp_flags: */
555
556 /* Creates an index in data_transfer_insts array. */
557 #define LOAD_DATA       0x01
558 #define INDEXED         0x02
559 #define SIGNED_DATA     0x04
560
561 #define WORD_DATA       0x00
562 #define BYTE_DATA       0x08
563 #define HALF_DATA       0x10
564 #define INT_DATA        0x18
565 /* Separates integer and floating point registers */
566 #define GPR_REG         0x1f
567 #define DOUBLE_DATA     0x20
568
569 #define MEM_MASK        0x7f
570
571 /* Other inp_flags. */
572
573 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
574 #define ALT_SIGN_EXT    0x000100
575 /* This flag affects the RC() and OERC() macros. */
576 #define ALT_SET_FLAGS   0x000400
577 #define ALT_FORM1       0x001000
578 #define ALT_FORM2       0x002000
579 #define ALT_FORM3       0x004000
580 #define ALT_FORM4       0x008000
581 #define ALT_FORM5       0x010000
582
583 /* Source and destination is register. */
584 #define REG_DEST        0x000001
585 #define REG1_SOURCE     0x000002
586 #define REG2_SOURCE     0x000004
587 /*
588 ALT_SIGN_EXT            0x000100
589 ALT_SET_FLAGS           0x000200
590 ALT_FORM1               0x001000
591 ...
592 ALT_FORM5               0x010000 */
593
594 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
595 #include "sljitNativePPC_32.c"
596 #else
597 #include "sljitNativePPC_64.c"
598 #endif
599
600 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
601 #define STACK_STORE     STW
602 #define STACK_LOAD      LWZ
603 #else
604 #define STACK_STORE     STD
605 #define STACK_LOAD      LD
606 #endif
607
608 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
609         sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
610         sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
611 {
612         sljit_s32 args, i, tmp, offs;
613
614         CHECK_ERROR();
615         CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
616         set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
617
618         FAIL_IF(push_inst(compiler, MFLR | D(0)));
619         offs = -(sljit_s32)(sizeof(sljit_sw));
620         FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
621
622         tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
623         for (i = SLJIT_S0; i >= tmp; i--) {
624                 offs -= (sljit_s32)(sizeof(sljit_sw));
625                 FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
626         }
627
628         for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
629                 offs -= (sljit_s32)(sizeof(sljit_sw));
630                 FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
631         }
632
633         SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
634
635 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
636         FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
637 #else
638         FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
639 #endif
640
641         FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
642
643         args = get_arg_count(arg_types);
644
645         if (args >= 1)
646                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
647         if (args >= 2)
648                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
649         if (args >= 3)
650                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
651
652         local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
653         local_size = (local_size + 15) & ~0xf;
654         compiler->local_size = local_size;
655
656 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
657         if (local_size <= SIMM_MAX)
658                 FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
659         else {
660                 FAIL_IF(load_immediate(compiler, 0, -local_size));
661                 FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
662         }
663 #else
664         if (local_size <= SIMM_MAX)
665                 FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
666         else {
667                 FAIL_IF(load_immediate(compiler, 0, -local_size));
668                 FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
669         }
670 #endif
671
672         return SLJIT_SUCCESS;
673 }
674
675 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
676         sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
677         sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
678 {
679         CHECK_ERROR();
680         CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
681         set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
682
683         local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
684         compiler->local_size = (local_size + 15) & ~0xf;
685         return SLJIT_SUCCESS;
686 }
687
688 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
689 {
690         sljit_s32 i, tmp, offs;
691
692         CHECK_ERROR();
693         CHECK(check_sljit_emit_return(compiler, op, src, srcw));
694
695         FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
696
697         if (compiler->local_size <= SIMM_MAX)
698                 FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
699         else {
700                 FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
701                 FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
702         }
703
704 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
705         FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
706 #else
707         FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
708 #endif
709
710         offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
711
712         tmp = compiler->scratches;
713         for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
714                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
715                 offs += (sljit_s32)(sizeof(sljit_sw));
716         }
717
718         tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
719         for (i = tmp; i <= SLJIT_S0; i++) {
720                 FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
721                 offs += (sljit_s32)(sizeof(sljit_sw));
722         }
723
724         FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
725         SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
726
727         FAIL_IF(push_inst(compiler, MTLR | S(0)));
728         FAIL_IF(push_inst(compiler, BLR));
729
730         return SLJIT_SUCCESS;
731 }
732
733 #undef STACK_STORE
734 #undef STACK_LOAD
735
736 /* --------------------------------------------------------------------- */
737 /*  Operators                                                            */
738 /* --------------------------------------------------------------------- */
739
740 /* s/l - store/load (1 bit)
741    i/x - immediate/indexed form
742    u/s - signed/unsigned (1 bit)
743    w/b/h/i - word/byte/half/int allowed (2 bit)
744
745    Some opcodes are repeated (e.g. store signed / unsigned byte is the same instruction). */
746
747 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
748 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
749 #define INT_ALIGNED     0x10000
750 #endif
751
752 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
753 #define ARCH_32_64(a, b)        a
754 #define INST_CODE_AND_DST(inst, flags, reg) \
755         ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
756 #else
757 #define ARCH_32_64(a, b)        b
758 #define INST_CODE_AND_DST(inst, flags, reg) \
759         (((inst) & ~INT_ALIGNED) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
760 #endif
761
762 static const sljit_ins data_transfer_insts[64 + 16] = {
763
764 /* -------- Integer -------- */
765
766 /* Word. */
767
768 /* w u i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
769 /* w u i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
770 /* w u x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
771 /* w u x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
772
773 /* w s i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
774 /* w s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
775 /* w s x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
776 /* w s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
777
778 /* Byte. */
779
780 /* b u i s */ HI(38) /* stb */,
781 /* b u i l */ HI(34) /* lbz */,
782 /* b u x s */ HI(31) | LO(215) /* stbx */,
783 /* b u x l */ HI(31) | LO(87) /* lbzx */,
784
785 /* b s i s */ HI(38) /* stb */,
786 /* b s i l */ HI(34) /* lbz */ /* EXTS_REQ */,
787 /* b s x s */ HI(31) | LO(215) /* stbx */,
788 /* b s x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
789
790 /* Half. */
791
792 /* h u i s */ HI(44) /* sth */,
793 /* h u i l */ HI(40) /* lhz */,
794 /* h u x s */ HI(31) | LO(407) /* sthx */,
795 /* h u x l */ HI(31) | LO(279) /* lhzx */,
796
797 /* h s i s */ HI(44) /* sth */,
798 /* h s i l */ HI(42) /* lha */,
799 /* h s x s */ HI(31) | LO(407) /* sthx */,
800 /* h s x l */ HI(31) | LO(343) /* lhax */,
801
802 /* Int. */
803
804 /* i u i s */ HI(36) /* stw */,
805 /* i u i l */ HI(32) /* lwz */,
806 /* i u x s */ HI(31) | LO(151) /* stwx */,
807 /* i u x l */ HI(31) | LO(23) /* lwzx */,
808
809 /* i s i s */ HI(36) /* stw */,
810 /* i s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
811 /* i s x s */ HI(31) | LO(151) /* stwx */,
812 /* i s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
813
814 /* -------- Floating point -------- */
815
816 /* d   i s */ HI(54) /* stfd */,
817 /* d   i l */ HI(50) /* lfd */,
818 /* d   x s */ HI(31) | LO(727) /* stfdx */,
819 /* d   x l */ HI(31) | LO(599) /* lfdx */,
820
821 /* s   i s */ HI(52) /* stfs */,
822 /* s   i l */ HI(48) /* lfs */,
823 /* s   x s */ HI(31) | LO(663) /* stfsx */,
824 /* s   x l */ HI(31) | LO(535) /* lfsx */,
825 };
826
827 static const sljit_ins updated_data_transfer_insts[64] = {
828
829 /* -------- Integer -------- */
830
831 /* Word. */
832
833 /* w u i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
834 /* w u i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
835 /* w u x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
836 /* w u x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
837
838 /* w s i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
839 /* w s i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
840 /* w s x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
841 /* w s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
842
843 /* Byte. */
844
845 /* b u i s */ HI(39) /* stbu */,
846 /* b u i l */ HI(35) /* lbzu */,
847 /* b u x s */ HI(31) | LO(247) /* stbux */,
848 /* b u x l */ HI(31) | LO(119) /* lbzux */,
849
850 /* b s i s */ HI(39) /* stbu */,
851 /* b s i l */ 0 /* no such instruction */,
852 /* b s x s */ HI(31) | LO(247) /* stbux */,
853 /* b s x l */ 0 /* no such instruction */,
854
855 /* Half. */
856
857 /* h u i s */ HI(45) /* sthu */,
858 /* h u i l */ HI(41) /* lhzu */,
859 /* h u x s */ HI(31) | LO(439) /* sthux */,
860 /* h u x l */ HI(31) | LO(311) /* lhzux */,
861
862 /* h s i s */ HI(45) /* sthu */,
863 /* h s i l */ HI(43) /* lhau */,
864 /* h s x s */ HI(31) | LO(439) /* sthux */,
865 /* h s x l */ HI(31) | LO(375) /* lhaux */,
866
867 /* Int. */
868
869 /* i u i s */ HI(37) /* stwu */,
870 /* i u i l */ HI(33) /* lwzu */,
871 /* i u x s */ HI(31) | LO(183) /* stwux */,
872 /* i u x l */ HI(31) | LO(55) /* lwzux */,
873
874 /* i s i s */ HI(37) /* stwu */,
875 /* i s i l */ ARCH_32_64(HI(33) /* lwzu */, 0 /* no such instruction */),
876 /* i s x s */ HI(31) | LO(183) /* stwux */,
877 /* i s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
878
879 /* -------- Floating point -------- */
880
881 /* d   i s */ HI(55) /* stfdu */,
882 /* d   i l */ HI(51) /* lfdu */,
883 /* d   x s */ HI(31) | LO(759) /* stfdux */,
884 /* d   x l */ HI(31) | LO(631) /* lfdux */,
885
886 /* s   i s */ HI(53) /* stfsu */,
887 /* s   i l */ HI(49) /* lfsu */,
888 /* s   x s */ HI(31) | LO(695) /* stfsux */,
889 /* s   x l */ HI(31) | LO(567) /* lfsux */,
890 };
891
892 #undef ARCH_32_64
893
894 /* Simple cases, (no caching is required). */
895 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg,
896         sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
897 {
898         sljit_ins inst;
899         sljit_s32 offs_reg;
900         sljit_sw high_short;
901
902         /* Should work when (arg & REG_MASK) == 0. */
903         SLJIT_ASSERT(A(0) == 0);
904         SLJIT_ASSERT(arg & SLJIT_MEM);
905
906         if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
907                 argw &= 0x3;
908                 offs_reg = OFFS_REG(arg);
909
910                 if (argw != 0) {
911 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
912                         FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | (argw << 11) | ((31 - argw) << 1)));
913 #else
914                         FAIL_IF(push_inst(compiler, RLDI(tmp_reg, OFFS_REG(arg), argw, 63 - argw, 1)));
915 #endif
916                         offs_reg = tmp_reg;
917                 }
918
919                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
920
921 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
922                 SLJIT_ASSERT(!(inst & INT_ALIGNED));
923 #endif
924
925                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(offs_reg));
926         }
927
928         inst = data_transfer_insts[inp_flags & MEM_MASK];
929         arg &= REG_MASK;
930
931 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
932         if ((inst & INT_ALIGNED) && (argw & 0x3) != 0) {
933                 FAIL_IF(load_immediate(compiler, tmp_reg, argw));
934
935                 inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
936                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
937         }
938 #endif
939
940         if (argw <= SIMM_MAX && argw >= SIMM_MIN)
941                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | IMM(argw));
942
943 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
944         if (argw <= 0x7fff7fffl && argw >= -0x80000000l) {
945 #endif
946
947                 high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
948
949 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
950                 SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
951 #else
952                 SLJIT_ASSERT(high_short);
953 #endif
954
955                 FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM(high_short >> 16)));
956                 return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_reg) | IMM(argw));
957
958 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
959         }
960
961         /* The rest is PPC-64 only. */
962
963         FAIL_IF(load_immediate(compiler, tmp_reg, argw));
964
965         inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
966         return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
967 #endif
968 }
969
970 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
971         sljit_s32 dst, sljit_sw dstw,
972         sljit_s32 src1, sljit_sw src1w,
973         sljit_s32 src2, sljit_sw src2w)
974 {
975         /* arg1 goes to TMP_REG1 or src reg
976            arg2 goes to TMP_REG2, imm or src reg
977            result goes to TMP_REG2, so put result can use TMP_REG1. */
978         sljit_s32 dst_r = TMP_REG2;
979         sljit_s32 src1_r;
980         sljit_s32 src2_r;
981         sljit_s32 sugg_src2_r = TMP_REG2;
982         sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS);
983
984         /* Destination check. */
985         if (SLOW_IS_REG(dst)) {
986                 dst_r = dst;
987                 flags |= REG_DEST;
988
989                 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P)
990                         sugg_src2_r = dst_r;
991         }
992
993         /* Source 1. */
994         if (FAST_IS_REG(src1)) {
995                 src1_r = src1;
996                 flags |= REG1_SOURCE;
997         }
998         else if (src1 & SLJIT_IMM) {
999                 FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1000                 src1_r = TMP_REG1;
1001         }
1002         else {
1003                 FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
1004                 src1_r = TMP_REG1;
1005         }
1006
1007         /* Source 2. */
1008         if (FAST_IS_REG(src2)) {
1009                 src2_r = src2;
1010                 flags |= REG2_SOURCE;
1011
1012                 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P)
1013                         dst_r = src2_r;
1014         }
1015         else if (src2 & SLJIT_IMM) {
1016                 FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1017                 src2_r = sugg_src2_r;
1018         }
1019         else {
1020                 FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2));
1021                 src2_r = sugg_src2_r;
1022         }
1023
1024         FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1025
1026         if (!(dst & SLJIT_MEM))
1027                 return SLJIT_SUCCESS;
1028
1029         return emit_op_mem(compiler, input_flags, dst_r, dst, dstw, TMP_REG1);
1030 }
1031
1032 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1033 {
1034 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1035         sljit_s32 int_op = op & SLJIT_I32_OP;
1036 #endif
1037
1038         CHECK_ERROR();
1039         CHECK(check_sljit_emit_op0(compiler, op));
1040
1041         op = GET_OPCODE(op);
1042         switch (op) {
1043         case SLJIT_BREAKPOINT:
1044         case SLJIT_NOP:
1045                 return push_inst(compiler, NOP);
1046         case SLJIT_LMUL_UW:
1047         case SLJIT_LMUL_SW:
1048                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1049 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1050                 FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1051                 return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1052 #else
1053                 FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1054                 return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1055 #endif
1056         case SLJIT_DIVMOD_UW:
1057         case SLJIT_DIVMOD_SW:
1058                 FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1059 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1060                 FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1061                 FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1062 #else
1063                 FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1064                 FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1065 #endif
1066                 return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
1067         case SLJIT_DIV_UW:
1068         case SLJIT_DIV_SW:
1069 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1070                 return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1071 #else
1072                 return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1073 #endif
1074         }
1075
1076         return SLJIT_SUCCESS;
1077 }
1078
1079 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
1080         sljit_s32 src, sljit_sw srcw)
1081 {
1082         if (!(src & OFFS_REG_MASK)) {
1083                 if (srcw == 0 && (src & REG_MASK) != SLJIT_UNUSED)
1084                         return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK));
1085
1086                 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1087                 /* Works with SLJIT_MEM0() case as well. */
1088                 return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
1089         }
1090
1091         srcw &= 0x3;
1092
1093         if (srcw == 0)
1094                 return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
1095
1096 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1097         FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | (srcw << 11) | ((31 - srcw) << 1)));
1098 #else
1099         FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1)));
1100 #endif
1101         return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
1102 }
1103
1104 #define EMIT_MOV(type, type_flags, type_cast) \
1105         emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
1106
1107 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1108         sljit_s32 dst, sljit_sw dstw,
1109         sljit_s32 src, sljit_sw srcw)
1110 {
1111         sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
1112         sljit_s32 op_flags = GET_ALL_FLAGS(op);
1113
1114         CHECK_ERROR();
1115         CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1116         ADJUST_LOCAL_OFFSET(dst, dstw);
1117         ADJUST_LOCAL_OFFSET(src, srcw);
1118
1119         if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
1120                 if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
1121                         return emit_prefetch(compiler, src, srcw);
1122
1123                 return SLJIT_SUCCESS;
1124         }
1125
1126         op = GET_OPCODE(op);
1127         if ((src & SLJIT_IMM) && srcw == 0)
1128                 src = TMP_ZERO;
1129
1130         if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW)
1131                 FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1132
1133         if (op < SLJIT_NOT && FAST_IS_REG(src) && src == dst) {
1134                 if (!TYPE_CAST_NEEDED(op))
1135                         return SLJIT_SUCCESS;
1136         }
1137
1138 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1139         if (op_flags & SLJIT_I32_OP) {
1140                 if (op < SLJIT_NOT) {
1141                         if (src & SLJIT_MEM) {
1142                                 if (op == SLJIT_MOV_S32)
1143                                         op = SLJIT_MOV_U32;
1144                         }
1145                         else if (src & SLJIT_IMM) {
1146                                 if (op == SLJIT_MOV_U32)
1147                                         op = SLJIT_MOV_S32;
1148                         }
1149                 }
1150                 else {
1151                         /* Most operations expect sign extended arguments. */
1152                         flags |= INT_DATA | SIGNED_DATA;
1153                         if (HAS_FLAGS(op_flags))
1154                                 flags |= ALT_SIGN_EXT;
1155                 }
1156         }
1157 #endif
1158
1159         switch (op) {
1160         case SLJIT_MOV:
1161         case SLJIT_MOV_P:
1162 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1163         case SLJIT_MOV_U32:
1164         case SLJIT_MOV_S32:
1165 #endif
1166                 return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1167
1168 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1169         case SLJIT_MOV_U32:
1170                 return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32));
1171
1172         case SLJIT_MOV_S32:
1173                 return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32));
1174 #endif
1175
1176         case SLJIT_MOV_U8:
1177                 return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8));
1178
1179         case SLJIT_MOV_S8:
1180                 return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8));
1181
1182         case SLJIT_MOV_U16:
1183                 return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16));
1184
1185         case SLJIT_MOV_S16:
1186                 return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
1187
1188         case SLJIT_NOT:
1189                 return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1190
1191         case SLJIT_NEG:
1192                 return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw);
1193
1194         case SLJIT_CLZ:
1195 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1196                 return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
1197 #else
1198                 return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1199 #endif
1200         }
1201
1202         return SLJIT_SUCCESS;
1203 }
1204
1205 #undef EMIT_MOV
1206
1207 #define TEST_SL_IMM(src, srcw) \
1208         (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
1209
1210 #define TEST_UL_IMM(src, srcw) \
1211         (((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
1212
1213 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1214 #define TEST_SH_IMM(src, srcw) \
1215         (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
1216 #else
1217 #define TEST_SH_IMM(src, srcw) \
1218         (((src) & SLJIT_IMM) && !((srcw) & 0xffff))
1219 #endif
1220
1221 #define TEST_UH_IMM(src, srcw) \
1222         (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
1223
1224 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1225 #define TEST_ADD_IMM(src, srcw) \
1226         (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
1227 #else
1228 #define TEST_ADD_IMM(src, srcw) \
1229         ((src) & SLJIT_IMM)
1230 #endif
1231
1232 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1233 #define TEST_UI_IMM(src, srcw) \
1234         (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
1235 #else
1236 #define TEST_UI_IMM(src, srcw) \
1237         ((src) & SLJIT_IMM)
1238 #endif
1239
1240 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1241         sljit_s32 dst, sljit_sw dstw,
1242         sljit_s32 src1, sljit_sw src1w,
1243         sljit_s32 src2, sljit_sw src2w)
1244 {
1245         sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
1246
1247         CHECK_ERROR();
1248         CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1249         ADJUST_LOCAL_OFFSET(dst, dstw);
1250         ADJUST_LOCAL_OFFSET(src1, src1w);
1251         ADJUST_LOCAL_OFFSET(src2, src2w);
1252
1253         if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
1254                 return SLJIT_SUCCESS;
1255
1256         if ((src1 & SLJIT_IMM) && src1w == 0)
1257                 src1 = TMP_ZERO;
1258         if ((src2 & SLJIT_IMM) && src2w == 0)
1259                 src2 = TMP_ZERO;
1260
1261 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1262         if (op & SLJIT_I32_OP) {
1263                 /* Most operations expect sign extended arguments. */
1264                 flags |= INT_DATA | SIGNED_DATA;
1265                 if (src1 & SLJIT_IMM)
1266                         src1w = (sljit_s32)(src1w);
1267                 if (src2 & SLJIT_IMM)
1268                         src2w = (sljit_s32)(src2w);
1269                 if (HAS_FLAGS(op))
1270                         flags |= ALT_SIGN_EXT;
1271         }
1272 #endif
1273         if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
1274                 FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1275
1276         switch (GET_OPCODE(op)) {
1277         case SLJIT_ADD:
1278                 if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
1279                         return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
1280
1281                 if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1282                         if (TEST_SL_IMM(src2, src2w)) {
1283                                 compiler->imm = src2w & 0xffff;
1284                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1285                         }
1286                         if (TEST_SL_IMM(src1, src1w)) {
1287                                 compiler->imm = src1w & 0xffff;
1288                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1289                         }
1290                         if (TEST_SH_IMM(src2, src2w)) {
1291                                 compiler->imm = (src2w >> 16) & 0xffff;
1292                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1293                         }
1294                         if (TEST_SH_IMM(src1, src1w)) {
1295                                 compiler->imm = (src1w >> 16) & 0xffff;
1296                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1297                         }
1298                         /* Range between -1 and -32768 is covered above. */
1299                         if (TEST_ADD_IMM(src2, src2w)) {
1300                                 compiler->imm = src2w & 0xffffffff;
1301                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1302                         }
1303                         if (TEST_ADD_IMM(src1, src1w)) {
1304                                 compiler->imm = src1w & 0xffffffff;
1305                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
1306                         }
1307                 }
1308                 if (HAS_FLAGS(op)) {
1309                         if (TEST_SL_IMM(src2, src2w)) {
1310                                 compiler->imm = src2w & 0xffff;
1311                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1312                         }
1313                         if (TEST_SL_IMM(src1, src1w)) {
1314                                 compiler->imm = src1w & 0xffff;
1315                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1316                         }
1317                 }
1318                 return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM4 : 0), dst, dstw, src1, src1w, src2, src2w);
1319
1320         case SLJIT_ADDC:
1321                 return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w);
1322
1323         case SLJIT_SUB:
1324                 if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) {
1325                         if (dst == SLJIT_UNUSED) {
1326                                 if (TEST_UL_IMM(src2, src2w)) {
1327                                         compiler->imm = src2w & 0xffff;
1328                                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1329                                 }
1330                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
1331                         }
1332
1333                         if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) {
1334                                 compiler->imm = src2w;
1335                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1336                         }
1337                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w);
1338                 }
1339
1340                 if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
1341                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w);
1342
1343                 if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1344                         if (TEST_SL_IMM(src2, -src2w)) {
1345                                 compiler->imm = (-src2w) & 0xffff;
1346                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1347                         }
1348                         if (TEST_SL_IMM(src1, src1w)) {
1349                                 compiler->imm = src1w & 0xffff;
1350                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1351                         }
1352                         if (TEST_SH_IMM(src2, -src2w)) {
1353                                 compiler->imm = ((-src2w) >> 16) & 0xffff;
1354                                 return emit_op(compiler, SLJIT_ADD, flags |  ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1355                         }
1356                         /* Range between -1 and -32768 is covered above. */
1357                         if (TEST_ADD_IMM(src2, -src2w)) {
1358                                 compiler->imm = -src2w & 0xffffffff;
1359                                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1360                         }
1361                 }
1362
1363                 if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)) {
1364                         if (TEST_SL_IMM(src2, src2w)) {
1365                                 compiler->imm = src2w & 0xffff;
1366                                 return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0);
1367                         }
1368                         return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
1369                 }
1370
1371                 if (TEST_SL_IMM(src2, -src2w)) {
1372                         compiler->imm = (-src2w) & 0xffff;
1373                         return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1374                 }
1375                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1376                 return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
1377
1378         case SLJIT_SUBC:
1379                 return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w);
1380
1381         case SLJIT_MUL:
1382 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1383                 if (op & SLJIT_I32_OP)
1384                         flags |= ALT_FORM2;
1385 #endif
1386                 if (!HAS_FLAGS(op)) {
1387                         if (TEST_SL_IMM(src2, src2w)) {
1388                                 compiler->imm = src2w & 0xffff;
1389                                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1390                         }
1391                         if (TEST_SL_IMM(src1, src1w)) {
1392                                 compiler->imm = src1w & 0xffff;
1393                                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1394                         }
1395                 }
1396                 else
1397                         FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1398                 return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
1399
1400         case SLJIT_AND:
1401         case SLJIT_OR:
1402         case SLJIT_XOR:
1403                 /* Commutative unsigned operations. */
1404                 if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
1405                         if (TEST_UL_IMM(src2, src2w)) {
1406                                 compiler->imm = src2w;
1407                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1408                         }
1409                         if (TEST_UL_IMM(src1, src1w)) {
1410                                 compiler->imm = src1w;
1411                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1412                         }
1413                         if (TEST_UH_IMM(src2, src2w)) {
1414                                 compiler->imm = (src2w >> 16) & 0xffff;
1415                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1416                         }
1417                         if (TEST_UH_IMM(src1, src1w)) {
1418                                 compiler->imm = (src1w >> 16) & 0xffff;
1419                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1420                         }
1421                 }
1422                 if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) {
1423                         /* Unlike or and xor, and resets unwanted bits as well. */
1424                         if (TEST_UI_IMM(src2, src2w)) {
1425                                 compiler->imm = src2w;
1426                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1427                         }
1428                         if (TEST_UI_IMM(src1, src1w)) {
1429                                 compiler->imm = src1w;
1430                                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1431                         }
1432                 }
1433                 return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1434
1435         case SLJIT_SHL:
1436         case SLJIT_LSHR:
1437         case SLJIT_ASHR:
1438 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1439                 if (op & SLJIT_I32_OP)
1440                         flags |= ALT_FORM2;
1441 #endif
1442                 if (src2 & SLJIT_IMM) {
1443                         compiler->imm = src2w;
1444                         return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1445                 }
1446                 return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1447         }
1448
1449         return SLJIT_SUCCESS;
1450 }
1451
1452 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
1453 {
1454         CHECK_REG_INDEX(check_sljit_get_register_index(reg));
1455         return reg_map[reg];
1456 }
1457
1458 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
1459 {
1460         CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
1461         return freg_map[reg];
1462 }
1463
1464 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
1465         void *instruction, sljit_s32 size)
1466 {
1467         CHECK_ERROR();
1468         CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1469
1470         return push_inst(compiler, *(sljit_ins*)instruction);
1471 }
1472
1473 /* --------------------------------------------------------------------- */
1474 /*  Floating point operators                                             */
1475 /* --------------------------------------------------------------------- */
1476
1477 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
1478 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
1479
1480 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1481 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
1482 #else
1483 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
1484
1485 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
1486 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
1487 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
1488 #else
1489 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
1490 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
1491 #endif
1492
1493 #endif /* SLJIT_CONFIG_PPC_64 */
1494
1495 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
1496         sljit_s32 dst, sljit_sw dstw,
1497         sljit_s32 src, sljit_sw srcw)
1498 {
1499         if (src & SLJIT_MEM) {
1500                 /* We can ignore the temporary data store on the stack from caching point of view. */
1501                 FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1));
1502                 src = TMP_FREG1;
1503         }
1504
1505 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1506         op = GET_OPCODE(op);
1507         FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
1508
1509         if (op == SLJIT_CONV_SW_FROM_F64) {
1510                 if (FAST_IS_REG(dst)) {
1511                         FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
1512                         return emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1);
1513                 }
1514                 return emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, TMP_REG1);
1515         }
1516 #else
1517         FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
1518 #endif
1519
1520         if (FAST_IS_REG(dst)) {
1521                 FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
1522                 FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
1523                 return emit_op_mem(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1);
1524         }
1525
1526         SLJIT_ASSERT(dst & SLJIT_MEM);
1527
1528         if (dst & OFFS_REG_MASK) {
1529                 dstw &= 0x3;
1530                 if (dstw) {
1531 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1532                         FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
1533 #else
1534                         FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
1535 #endif
1536                         dstw = TMP_REG1;
1537                 }
1538                 else
1539                         dstw = OFFS_REG(dst);
1540         }
1541         else {
1542                 if ((dst & REG_MASK) && !dstw) {
1543                         dstw = dst & REG_MASK;
1544                         dst = 0;
1545                 }
1546                 else {
1547                         /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
1548                         FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
1549                         dstw = TMP_REG1;
1550                 }
1551         }
1552
1553         return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
1554 }
1555
1556 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
1557         sljit_s32 dst, sljit_sw dstw,
1558         sljit_s32 src, sljit_sw srcw)
1559 {
1560 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1561
1562         sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1563
1564         if (src & SLJIT_IMM) {
1565                 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
1566                         srcw = (sljit_s32)srcw;
1567                 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1568                 src = TMP_REG1;
1569         }
1570         else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
1571                 if (FAST_IS_REG(src))
1572                         FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
1573                 else
1574                         FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
1575                 src = TMP_REG1;
1576         }
1577
1578         if (FAST_IS_REG(src)) {
1579                 FAIL_IF(emit_op_mem(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
1580                 FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
1581         }
1582         else
1583                 FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1));
1584
1585         FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
1586
1587         if (dst & SLJIT_MEM)
1588                 return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
1589         if (op & SLJIT_F32_OP)
1590                 return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1591         return SLJIT_SUCCESS;
1592
1593 #else
1594
1595         sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1596         sljit_s32 invert_sign = 1;
1597
1598         if (src & SLJIT_IMM) {
1599                 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
1600                 src = TMP_REG1;
1601                 invert_sign = 0;
1602         }
1603         else if (!FAST_IS_REG(src)) {
1604                 FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
1605                 src = TMP_REG1;
1606         }
1607
1608         /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
1609            The double precision format has exactly 53 bit precision, so the lower 32 bit represents
1610            the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
1611            to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
1612            point value, we need to substract 2^53 + 2^31 from the constructed value. */
1613         FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
1614         if (invert_sign)
1615                 FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
1616         FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, TMP_REG1));
1617         FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2));
1618         FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
1619         FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
1620         FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2));
1621         FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
1622
1623         FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
1624
1625         if (dst & SLJIT_MEM)
1626                 return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
1627         if (op & SLJIT_F32_OP)
1628                 return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1629         return SLJIT_SUCCESS;
1630
1631 #endif
1632 }
1633
1634 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
1635         sljit_s32 src1, sljit_sw src1w,
1636         sljit_s32 src2, sljit_sw src2w)
1637 {
1638         if (src1 & SLJIT_MEM) {
1639                 FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1));
1640                 src1 = TMP_FREG1;
1641         }
1642
1643         if (src2 & SLJIT_MEM) {
1644                 FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2));
1645                 src2 = TMP_FREG2;
1646         }
1647
1648         return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
1649 }
1650
1651 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1652         sljit_s32 dst, sljit_sw dstw,
1653         sljit_s32 src, sljit_sw srcw)
1654 {
1655         sljit_s32 dst_r;
1656
1657         CHECK_ERROR();
1658
1659         SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
1660         SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1661
1662         if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
1663                 op ^= SLJIT_F32_OP;
1664
1665         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1666
1667         if (src & SLJIT_MEM) {
1668                 FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, TMP_REG1));
1669                 src = dst_r;
1670         }
1671
1672         switch (GET_OPCODE(op)) {
1673         case SLJIT_CONV_F64_FROM_F32:
1674                 op ^= SLJIT_F32_OP;
1675                 if (op & SLJIT_F32_OP) {
1676                         FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
1677                         break;
1678                 }
1679                 /* Fall through. */
1680         case SLJIT_MOV_F64:
1681                 if (src != dst_r) {
1682                         if (dst_r != TMP_FREG1)
1683                                 FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
1684                         else
1685                                 dst_r = src;
1686                 }
1687                 break;
1688         case SLJIT_NEG_F64:
1689                 FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
1690                 break;
1691         case SLJIT_ABS_F64:
1692                 FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
1693                 break;
1694         }
1695
1696         if (dst & SLJIT_MEM)
1697                 FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), dst_r, dst, dstw, TMP_REG1));
1698         return SLJIT_SUCCESS;
1699 }
1700
1701 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1702         sljit_s32 dst, sljit_sw dstw,
1703         sljit_s32 src1, sljit_sw src1w,
1704         sljit_s32 src2, sljit_sw src2w)
1705 {
1706         sljit_s32 dst_r;
1707
1708         CHECK_ERROR();
1709         CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1710         ADJUST_LOCAL_OFFSET(dst, dstw);
1711         ADJUST_LOCAL_OFFSET(src1, src1w);
1712         ADJUST_LOCAL_OFFSET(src2, src2w);
1713
1714         dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
1715
1716         if (src1 & SLJIT_MEM) {
1717                 FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1));
1718                 src1 = TMP_FREG1;
1719         }
1720
1721         if (src2 & SLJIT_MEM) {
1722                 FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2));
1723                 src2 = TMP_FREG2;
1724         }
1725
1726         switch (GET_OPCODE(op)) {
1727         case SLJIT_ADD_F64:
1728                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
1729                 break;
1730
1731         case SLJIT_SUB_F64:
1732                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
1733                 break;
1734
1735         case SLJIT_MUL_F64:
1736                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
1737                 break;
1738
1739         case SLJIT_DIV_F64:
1740                 FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
1741                 break;
1742         }
1743
1744         if (dst & SLJIT_MEM)
1745                 FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, TMP_REG1));
1746
1747         return SLJIT_SUCCESS;
1748 }
1749
1750 #undef SELECT_FOP
1751
1752 /* --------------------------------------------------------------------- */
1753 /*  Other instructions                                                   */
1754 /* --------------------------------------------------------------------- */
1755
1756 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1757 {
1758         CHECK_ERROR();
1759         CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1760         ADJUST_LOCAL_OFFSET(dst, dstw);
1761
1762         if (FAST_IS_REG(dst))
1763                 return push_inst(compiler, MFLR | D(dst));
1764
1765         /* Memory. */
1766         FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
1767         return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
1768 }
1769
1770 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1771 {
1772         CHECK_ERROR();
1773         CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
1774         ADJUST_LOCAL_OFFSET(src, srcw);
1775
1776         if (FAST_IS_REG(src))
1777                 FAIL_IF(push_inst(compiler, MTLR | S(src)));
1778         else {
1779                 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
1780                 FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
1781         }
1782
1783         return push_inst(compiler, BLR);
1784 }
1785
1786 /* --------------------------------------------------------------------- */
1787 /*  Conditional instructions                                             */
1788 /* --------------------------------------------------------------------- */
1789
1790 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1791 {
1792         struct sljit_label *label;
1793
1794         CHECK_ERROR_PTR();
1795         CHECK_PTR(check_sljit_emit_label(compiler));
1796
1797         if (compiler->last_label && compiler->last_label->size == compiler->size)
1798                 return compiler->last_label;
1799
1800         label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1801         PTR_FAIL_IF(!label);
1802         set_label(label, compiler);
1803         return label;
1804 }
1805
1806 static sljit_ins get_bo_bi_flags(sljit_s32 type)
1807 {
1808         switch (type) {
1809         case SLJIT_EQUAL:
1810                 return (12 << 21) | (2 << 16);
1811
1812         case SLJIT_NOT_EQUAL:
1813                 return (4 << 21) | (2 << 16);
1814
1815         case SLJIT_LESS:
1816         case SLJIT_SIG_LESS:
1817                 return (12 << 21) | (0 << 16);
1818
1819         case SLJIT_GREATER_EQUAL:
1820         case SLJIT_SIG_GREATER_EQUAL:
1821                 return (4 << 21) | (0 << 16);
1822
1823         case SLJIT_GREATER:
1824         case SLJIT_SIG_GREATER:
1825                 return (12 << 21) | (1 << 16);
1826
1827         case SLJIT_LESS_EQUAL:
1828         case SLJIT_SIG_LESS_EQUAL:
1829                 return (4 << 21) | (1 << 16);
1830
1831         case SLJIT_LESS_F64:
1832                 return (12 << 21) | ((4 + 0) << 16);
1833
1834         case SLJIT_GREATER_EQUAL_F64:
1835                 return (4 << 21) | ((4 + 0) << 16);
1836
1837         case SLJIT_GREATER_F64:
1838                 return (12 << 21) | ((4 + 1) << 16);
1839
1840         case SLJIT_LESS_EQUAL_F64:
1841                 return (4 << 21) | ((4 + 1) << 16);
1842
1843         case SLJIT_OVERFLOW:
1844         case SLJIT_MUL_OVERFLOW:
1845                 return (12 << 21) | (3 << 16);
1846
1847         case SLJIT_NOT_OVERFLOW:
1848         case SLJIT_MUL_NOT_OVERFLOW:
1849                 return (4 << 21) | (3 << 16);
1850
1851         case SLJIT_EQUAL_F64:
1852                 return (12 << 21) | ((4 + 2) << 16);
1853
1854         case SLJIT_NOT_EQUAL_F64:
1855                 return (4 << 21) | ((4 + 2) << 16);
1856
1857         case SLJIT_UNORDERED_F64:
1858                 return (12 << 21) | ((4 + 3) << 16);
1859
1860         case SLJIT_ORDERED_F64:
1861                 return (4 << 21) | ((4 + 3) << 16);
1862
1863         default:
1864                 SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL);
1865                 return (20 << 21);
1866         }
1867 }
1868
1869 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
1870 {
1871         struct sljit_jump *jump;
1872         sljit_ins bo_bi_flags;
1873
1874         CHECK_ERROR_PTR();
1875         CHECK_PTR(check_sljit_emit_jump(compiler, type));
1876
1877         bo_bi_flags = get_bo_bi_flags(type & 0xff);
1878         if (!bo_bi_flags)
1879                 return NULL;
1880
1881         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1882         PTR_FAIL_IF(!jump);
1883         set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1884         type &= 0xff;
1885
1886         /* In PPC, we don't need to touch the arguments. */
1887         if (type < SLJIT_JUMP)
1888                 jump->flags |= IS_COND;
1889 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
1890         if (type >= SLJIT_CALL)
1891                 jump->flags |= IS_CALL;
1892 #endif
1893
1894         PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
1895         PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
1896         jump->addr = compiler->size;
1897         PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
1898         return jump;
1899 }
1900
1901 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
1902         sljit_s32 arg_types)
1903 {
1904         CHECK_ERROR_PTR();
1905         CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1906
1907 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1908         PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
1909 #endif
1910
1911 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1912                 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1913         compiler->skip_checks = 1;
1914 #endif
1915
1916         return sljit_emit_jump(compiler, type);
1917 }
1918
1919 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
1920 {
1921         struct sljit_jump *jump = NULL;
1922         sljit_s32 src_r;
1923
1924         CHECK_ERROR();
1925         CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
1926         ADJUST_LOCAL_OFFSET(src, srcw);
1927
1928         if (FAST_IS_REG(src)) {
1929 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
1930                 if (type >= SLJIT_CALL) {
1931                         FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
1932                         src_r = TMP_CALL_REG;
1933                 }
1934                 else
1935                         src_r = src;
1936 #else
1937                 src_r = src;
1938 #endif
1939         } else if (src & SLJIT_IMM) {
1940                 /* These jumps are converted to jump/call instructions when possible. */
1941                 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1942                 FAIL_IF(!jump);
1943                 set_jump(jump, compiler, JUMP_ADDR);
1944                 jump->u.target = srcw;
1945 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
1946                 if (type >= SLJIT_CALL)
1947                         jump->flags |= IS_CALL;
1948 #endif
1949                 FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
1950                 src_r = TMP_CALL_REG;
1951         }
1952         else {
1953                 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
1954                 src_r = TMP_CALL_REG;
1955         }
1956
1957         FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
1958         if (jump)
1959                 jump->addr = compiler->size;
1960         return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
1961 }
1962
1963 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1964         sljit_s32 arg_types,
1965         sljit_s32 src, sljit_sw srcw)
1966 {
1967         CHECK_ERROR();
1968         CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1969
1970 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1971         if (src & SLJIT_MEM) {
1972                 ADJUST_LOCAL_OFFSET(src, srcw);
1973                 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
1974                 src = TMP_CALL_REG;
1975         }
1976
1977         FAIL_IF(call_with_args(compiler, arg_types, &src));
1978 #endif
1979
1980 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1981                 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1982         compiler->skip_checks = 1;
1983 #endif
1984
1985         return sljit_emit_ijump(compiler, type, src, srcw);
1986 }
1987
1988 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
1989         sljit_s32 dst, sljit_sw dstw,
1990         sljit_s32 type)
1991 {
1992         sljit_s32 reg, input_flags, cr_bit, invert;
1993         sljit_s32 saved_op = op;
1994         sljit_sw saved_dstw = dstw;
1995
1996         CHECK_ERROR();
1997         CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
1998         ADJUST_LOCAL_OFFSET(dst, dstw);
1999
2000 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2001         input_flags = (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
2002 #else
2003         input_flags = WORD_DATA;
2004 #endif
2005
2006         op = GET_OPCODE(op);
2007         reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2008
2009         if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
2010                 FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1));
2011
2012         invert = 0;
2013         cr_bit = 0;
2014
2015         switch (type & 0xff) {
2016         case SLJIT_LESS:
2017         case SLJIT_SIG_LESS:
2018                 break;
2019
2020         case SLJIT_GREATER_EQUAL:
2021         case SLJIT_SIG_GREATER_EQUAL:
2022                 invert = 1;
2023                 break;
2024
2025         case SLJIT_GREATER:
2026         case SLJIT_SIG_GREATER:
2027                 cr_bit = 1;
2028                 break;
2029
2030         case SLJIT_LESS_EQUAL:
2031         case SLJIT_SIG_LESS_EQUAL:
2032                 cr_bit = 1;
2033                 invert = 1;
2034                 break;
2035
2036         case SLJIT_EQUAL:
2037                 cr_bit = 2;
2038                 break;
2039
2040         case SLJIT_NOT_EQUAL:
2041                 cr_bit = 2;
2042                 invert = 1;
2043                 break;
2044
2045         case SLJIT_OVERFLOW:
2046         case SLJIT_MUL_OVERFLOW:
2047                 cr_bit = 3;
2048                 break;
2049
2050         case SLJIT_NOT_OVERFLOW:
2051         case SLJIT_MUL_NOT_OVERFLOW:
2052                 cr_bit = 3;
2053                 invert = 1;
2054                 break;
2055
2056         case SLJIT_LESS_F64:
2057                 cr_bit = 4 + 0;
2058                 break;
2059
2060         case SLJIT_GREATER_EQUAL_F64:
2061                 cr_bit = 4 + 0;
2062                 invert = 1;
2063                 break;
2064
2065         case SLJIT_GREATER_F64:
2066                 cr_bit = 4 + 1;
2067                 break;
2068
2069         case SLJIT_LESS_EQUAL_F64:
2070                 cr_bit = 4 + 1;
2071                 invert = 1;
2072                 break;
2073
2074         case SLJIT_EQUAL_F64:
2075                 cr_bit = 4 + 2;
2076                 break;
2077
2078         case SLJIT_NOT_EQUAL_F64:
2079                 cr_bit = 4 + 2;
2080                 invert = 1;
2081                 break;
2082
2083         case SLJIT_UNORDERED_F64:
2084                 cr_bit = 4 + 3;
2085                 break;
2086
2087         case SLJIT_ORDERED_F64:
2088                 cr_bit = 4 + 3;
2089                 invert = 1;
2090                 break;
2091
2092         default:
2093                 SLJIT_UNREACHABLE();
2094                 break;
2095         }
2096
2097         FAIL_IF(push_inst(compiler, MFCR | D(reg)));
2098         FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + (cr_bit)) << 11) | (31 << 6) | (31 << 1)));
2099
2100         if (invert)
2101                 FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1));
2102
2103         if (op < SLJIT_ADD) {
2104                 if (!(dst & SLJIT_MEM))
2105                         return SLJIT_SUCCESS;
2106                 return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1);
2107         }
2108
2109 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2110                 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2111         compiler->skip_checks = 1;
2112 #endif
2113         if (dst & SLJIT_MEM)
2114                 return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0);
2115         return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0);
2116 }
2117
2118 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2119         sljit_s32 dst_reg,
2120         sljit_s32 src, sljit_sw srcw)
2121 {
2122         CHECK_ERROR();
2123         CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2124
2125         return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
2126 }
2127
2128 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
2129         sljit_s32 reg,
2130         sljit_s32 mem, sljit_sw memw)
2131 {
2132         sljit_s32 mem_flags;
2133         sljit_ins inst;
2134
2135         CHECK_ERROR();
2136         CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
2137
2138         if (type & SLJIT_MEM_POST)
2139                 return SLJIT_ERR_UNSUPPORTED;
2140
2141         switch (type & 0xff) {
2142         case SLJIT_MOV:
2143         case SLJIT_MOV_P:
2144 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
2145         case SLJIT_MOV_U32:
2146         case SLJIT_MOV_S32:
2147 #endif
2148                 mem_flags = WORD_DATA;
2149                 break;
2150
2151 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2152         case SLJIT_MOV_U32:
2153                 mem_flags = INT_DATA;
2154                 break;
2155
2156         case SLJIT_MOV_S32:
2157                 mem_flags = INT_DATA;
2158
2159                 if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_I32_OP)) {
2160                         if (mem & OFFS_REG_MASK)
2161                                 mem_flags |= SIGNED_DATA;
2162                         else
2163                                 return SLJIT_ERR_UNSUPPORTED;
2164                 }
2165                 break;
2166 #endif
2167
2168         case SLJIT_MOV_U8:
2169         case SLJIT_MOV_S8:
2170                 mem_flags = BYTE_DATA;
2171                 break;
2172
2173         case SLJIT_MOV_U16:
2174                 mem_flags = HALF_DATA;
2175                 break;
2176
2177         case SLJIT_MOV_S16:
2178                 mem_flags = HALF_DATA | SIGNED_DATA;
2179                 break;
2180
2181         default:
2182                 SLJIT_UNREACHABLE();
2183                 mem_flags = WORD_DATA;
2184                 break;
2185         }
2186
2187         if (!(type & SLJIT_MEM_STORE))
2188                 mem_flags |= LOAD_DATA;
2189
2190         if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
2191                 if (memw != 0)
2192                         return SLJIT_ERR_UNSUPPORTED;
2193
2194                 if (type & SLJIT_MEM_SUPP)
2195                         return SLJIT_SUCCESS;
2196
2197                 inst = updated_data_transfer_insts[mem_flags | INDEXED];
2198                 FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | B(OFFS_REG(mem))));
2199         }
2200         else {
2201                 if (memw > SIMM_MAX || memw < SIMM_MIN)
2202                         return SLJIT_ERR_UNSUPPORTED;
2203
2204                 inst = updated_data_transfer_insts[mem_flags];
2205
2206 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2207                 if ((inst & INT_ALIGNED) && (memw & 0x3) != 0)
2208                         return SLJIT_ERR_UNSUPPORTED;
2209 #endif
2210
2211                 if (type & SLJIT_MEM_SUPP)
2212                         return SLJIT_SUCCESS;
2213
2214                 FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | IMM(memw)));
2215         }
2216
2217         if ((mem_flags & LOAD_DATA) && (type & 0xff) == SLJIT_MOV_S8)
2218                 return push_inst(compiler, EXTSB | S(reg) | A(reg));
2219         return SLJIT_SUCCESS;
2220 }
2221
2222 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
2223         sljit_s32 freg,
2224         sljit_s32 mem, sljit_sw memw)
2225 {
2226         sljit_s32 mem_flags;
2227         sljit_ins inst;
2228
2229         CHECK_ERROR();
2230         CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
2231
2232         if (type & SLJIT_MEM_POST)
2233                 return SLJIT_ERR_UNSUPPORTED;
2234
2235         if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
2236                 if (memw != 0)
2237                         return SLJIT_ERR_UNSUPPORTED;
2238         }
2239         else {
2240                 if (memw > SIMM_MAX || memw < SIMM_MIN)
2241                         return SLJIT_ERR_UNSUPPORTED;
2242         }
2243
2244         if (type & SLJIT_MEM_SUPP)
2245                 return SLJIT_SUCCESS;
2246
2247         mem_flags = FLOAT_DATA(type);
2248
2249         if (!(type & SLJIT_MEM_STORE))
2250                 mem_flags |= LOAD_DATA;
2251
2252         if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
2253                 inst = updated_data_transfer_insts[mem_flags | INDEXED];
2254                 return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | B(OFFS_REG(mem)));
2255         }
2256
2257         inst = updated_data_transfer_insts[mem_flags];
2258         return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | IMM(memw));
2259 }
2260
2261 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2262 {
2263         struct sljit_const *const_;
2264         sljit_s32 reg;
2265
2266         CHECK_ERROR_PTR();
2267         CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2268         ADJUST_LOCAL_OFFSET(dst, dstw);
2269
2270         const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2271         PTR_FAIL_IF(!const_);
2272         set_const(const_, compiler);
2273
2274         reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2275
2276         PTR_FAIL_IF(emit_const(compiler, reg, init_value));
2277
2278         if (dst & SLJIT_MEM)
2279                 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2280         return const_;
2281 }