/* thumb2-curve25519
 *
 * Copyright (C) 2006-2023 wolfSSL Inc.
 *
 * This file is part of wolfSSL.
 *
 * wolfSSL is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * wolfSSL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
 */

/* Generated using (from wolfssl):
 *   cd ../scripts
 *   ruby ./x25519/x25519.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-curve25519.c
 */

#ifdef HAVE_CONFIG_H
    #include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>

#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__thumb__)
#ifdef WOLFSSL_ARMASM_INLINE

#ifdef __IAR_SYSTEMS_ICC__
#define __asm__        asm
#define __volatile__   volatile
#define WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__        __asm
#define __volatile__   volatile
#endif /* __KEIL__ */
/* Based on work by: Emil Lenngren
 * https://github.com/pornin/X25519-Cortex-M4
 */

#include <wolfssl/wolfcrypt/fe_operations.h>
#define CURVED25519_ASM
#include <wolfssl/wolfcrypt/ge_operations.h>

#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_init()
#else
void fe_init()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
    __asm__ __volatile__ (
        "\n\t"
        :
        :
        : "memory", "cc"
    );
}

void fe_add_sub_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_add_sub_op()
#else
void fe_add_sub_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
    __asm__ __volatile__ (
        /* Add-Sub */
        "LDRD	r4, r5, [r2]\n\t"
        "LDRD	r6, r7, [r3]\n\t"
        /*  Add */
        "ADDS	r8, r4, r6\n\t"
        "MOV	r12, #0x0\n\t"
        "ADCS	r9, r5, r7\n\t"
        "ADC	r12, r12, #0x0\n\t"
        "STRD	r8, r9, [r0]\n\t"
        /*  Sub */
        "SUBS	r10, r4, r6\n\t"
        "SBCS	r11, r5, r7\n\t"
        "STRD	r10, r11, [r1]\n\t"
        "LDRD	r4, r5, [r2, #8]\n\t"
        "LDRD	r6, r7, [r3, #8]\n\t"
        /*  Sub */
        "SBCS	r10, r4, r6\n\t"
        "MOV	lr, #0x0\n\t"
        "SBCS	r11, r5, r7\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "STRD	r10, r11, [r1, #8]\n\t"
        /*  Add */
        "SUBS	r12, r12, #0x1\n\t"
        "ADCS	r8, r4, r6\n\t"
        "ADCS	r9, r5, r7\n\t"
        "STRD	r8, r9, [r0, #8]\n\t"
        "LDRD	r4, r5, [r2, #16]\n\t"
        "LDRD	r6, r7, [r3, #16]\n\t"
        /*  Add */
        "ADCS	r8, r4, r6\n\t"
        "MOV	r12, #0x0\n\t"
        "ADCS	r9, r5, r7\n\t"
        "ADC	r12, r12, #0x0\n\t"
        "STRD	r8, r9, [r0, #16]\n\t"
        /*  Sub */
        "SUBS	lr, lr, #0x1\n\t"
        "SBCS	r10, r4, r6\n\t"
        "SBCS	r11, r5, r7\n\t"
        "STRD	r10, r11, [r1, #16]\n\t"
        "LDRD	r4, r5, [r2, #24]\n\t"
        "LDRD	r6, r7, [r3, #24]\n\t"
        /*  Sub */
        "SBCS	r10, r4, r6\n\t"
        "SBC	r11, r5, r7\n\t"
        /*  Add */
        "SUBS	r12, r12, #0x1\n\t"
        "ADCS	r8, r4, r6\n\t"
        "MOV	r12, #0x0\n\t"
        "ADCS	r9, r5, r7\n\t"
        "ADC	r12, r12, #0x0\n\t"
        /*   Multiply -modulus by overflow */
        "LSL	r3, r12, #1\n\t"
        "MOV	r12, #0x13\n\t"
        "ORR	r3, r3, r9, LSR #31\n\t"
        "MUL	r12, r3, r12\n\t"
        /*   Add -x*modulus (if overflow) */
        "LDRD	r4, r5, [r0]\n\t"
        "LDRD	r6, r7, [r0, #8]\n\t"
        "ADDS	r4, r4, r12\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "STRD	r4, r5, [r0]\n\t"
        "STRD	r6, r7, [r0, #8]\n\t"
        "LDRD	r4, r5, [r0, #16]\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "STRD	r4, r5, [r0, #16]\n\t"
        "BFC	r9, #31, #1\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "STRD	r8, r9, [r0, #24]\n\t"
        /*   Add -modulus on underflow */
        "MOV	lr, #0x13\n\t"
        "AND	lr, lr, r11, ASR #31\n\t"
        "LDM	r1, {r4, r5, r6, r7, r8, r9}\n\t"
        "SUBS	r4, r4, lr\n\t"
        "SBCS	r5, r5, #0x0\n\t"
        "SBCS	r6, r6, #0x0\n\t"
        "SBCS	r7, r7, #0x0\n\t"
        "SBCS	r8, r8, #0x0\n\t"
        "SBCS	r9, r9, #0x0\n\t"
        "BFC	r11, #31, #1\n\t"
        "SBCS	r10, r10, #0x0\n\t"
        "SBC	r11, r11, #0x0\n\t"
        "STM	r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        /* Done Add-Sub */
        :
        :
        : "memory", "lr", "cc"
    );
}

void fe_sub_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sub_op()
#else
void fe_sub_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
    __asm__ __volatile__ (
        /* Sub */
        "LDM	r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
        "LDM	r1!, {r2, r3, r4, r5}\n\t"
        "SUBS	r6, r2, r6\n\t"
        "SBCS	r7, r3, r7\n\t"
        "SBCS	r8, r4, r8\n\t"
        "SBCS	r9, r5, r9\n\t"
        "LDM	r1!, {r2, r3, r4, r5}\n\t"
        "SBCS	r10, r2, r10\n\t"
        "SBCS	r11, r3, r11\n\t"
        "SBCS	r12, r4, r12\n\t"
        "SBC	lr, r5, lr\n\t"
        "MOV	r2, #0x13\n\t"
        "AND	r2, r2, lr, ASR #31\n\t"
        "SUBS	r6, r6, r2\n\t"
        "SBCS	r7, r7, #0x0\n\t"
        "SBCS	r8, r8, #0x0\n\t"
        "SBCS	r9, r9, #0x0\n\t"
        "SBCS	r10, r10, #0x0\n\t"
        "SBCS	r11, r11, #0x0\n\t"
        "BFC	lr, #31, #1\n\t"
        "SBCS	r12, r12, #0x0\n\t"
        "SBC	lr, lr, #0x0\n\t"
        "STM	r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
        /* Done Sub */
        :
        :
        : "memory", "lr", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sub(fe r_p, const fe a_p, const fe b_p)
#else
void fe_sub(fe r, const fe a, const fe b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
    register const sword32* b __asm__ ("r2") = (const sword32*)b_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "BL	fe_sub_op\n\t"
        : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

void fe_add_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_add_op()
#else
void fe_add_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
    __asm__ __volatile__ (
        /* Add */
        "LDM	r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
        "LDM	r1!, {r2, r3, r4, r5}\n\t"
        "ADDS	r6, r2, r6\n\t"
        "ADCS	r7, r3, r7\n\t"
        "ADCS	r8, r4, r8\n\t"
        "ADCS	r9, r5, r9\n\t"
        "LDM	r1!, {r2, r3, r4, r5}\n\t"
        "ADCS	r10, r2, r10\n\t"
        "ADCS	r11, r3, r11\n\t"
        "ADCS	r12, r4, r12\n\t"
        "ADC	lr, r5, lr\n\t"
        "MOV	r2, #0x13\n\t"
        "AND	r2, r2, lr, ASR #31\n\t"
        "ADDS	r6, r6, r2\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADCS	r11, r11, #0x0\n\t"
        "BFC	lr, #31, #1\n\t"
        "ADCS	r12, r12, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "STM	r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
        /* Done Add */
        :
        :
        : "memory", "lr", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_add(fe r_p, const fe a_p, const fe b_p)
#else
void fe_add(fe r, const fe a, const fe b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
    register const sword32* b __asm__ ("r2") = (const sword32*)b_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "BL	fe_add_op\n\t"
        : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#ifdef HAVE_ED25519
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_frombytes(fe out_p, const unsigned char* in_p)
#else
void fe_frombytes(fe out, const unsigned char* in)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* out __asm__ ("r0") = (sword32*)out_p;
    register const unsigned char* in __asm__ ("r1") = (const unsigned char*)in_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "LDR	r2, [%[in]]\n\t"
        "LDR	r3, [%[in], #4]\n\t"
        "LDR	r4, [%[in], #8]\n\t"
        "LDR	r5, [%[in], #12]\n\t"
        "LDR	r6, [%[in], #16]\n\t"
        "LDR	r7, [%[in], #20]\n\t"
        "LDR	r8, [%[in], #24]\n\t"
        "LDR	r9, [%[in], #28]\n\t"
        "BFC	r9, #31, #1\n\t"
        "STR	r2, [%[out]]\n\t"
        "STR	r3, [%[out], #4]\n\t"
        "STR	r4, [%[out], #8]\n\t"
        "STR	r5, [%[out], #12]\n\t"
        "STR	r6, [%[out], #16]\n\t"
        "STR	r7, [%[out], #20]\n\t"
        "STR	r8, [%[out], #24]\n\t"
        "STR	r9, [%[out], #28]\n\t"
        : [out] "+r" (out), [in] "+r" (in)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_tobytes(unsigned char* out_p, const fe n_p)
#else
void fe_tobytes(unsigned char* out, const fe n)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register unsigned char* out __asm__ ("r0") = (unsigned char*)out_p;
    register const sword32* n __asm__ ("r1") = (const sword32*)n_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "LDM	%[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        "ADDS	r10, r2, #0x13\n\t"
        "ADCS	r10, r3, #0x0\n\t"
        "ADCS	r10, r4, #0x0\n\t"
        "ADCS	r10, r5, #0x0\n\t"
        "ADCS	r10, r6, #0x0\n\t"
        "ADCS	r10, r7, #0x0\n\t"
        "ADCS	r10, r8, #0x0\n\t"
        "ADC	r10, r9, #0x0\n\t"
        "ASR	r10, r10, #31\n\t"
        "AND	r10, r10, #0x13\n\t"
        "ADDS	r2, r2, r10\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "BFC	r9, #31, #1\n\t"
        "STR	r2, [%[out]]\n\t"
        "STR	r3, [%[out], #4]\n\t"
        "STR	r4, [%[out], #8]\n\t"
        "STR	r5, [%[out], #12]\n\t"
        "STR	r6, [%[out], #16]\n\t"
        "STR	r7, [%[out], #20]\n\t"
        "STR	r8, [%[out], #24]\n\t"
        "STR	r9, [%[out], #28]\n\t"
        : [out] "+r" (out), [n] "+r" (n)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_1(fe n_p)
#else
void fe_1(fe n)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* n __asm__ ("r0") = (sword32*)n_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        /* Set one */
        "MOV	r2, #0x1\n\t"
        "MOV	r3, #0x0\n\t"
        "MOV	r4, #0x0\n\t"
        "MOV	r5, #0x0\n\t"
        "MOV	r6, #0x0\n\t"
        "MOV	r7, #0x0\n\t"
        "MOV	r8, #0x0\n\t"
        "MOV	r9, #0x0\n\t"
        "STM	%[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        : [n] "+r" (n)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_0(fe n_p)
#else
void fe_0(fe n)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* n __asm__ ("r0") = (sword32*)n_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        /* Set zero */
        "MOV	r2, #0x0\n\t"
        "MOV	r3, #0x0\n\t"
        "MOV	r4, #0x0\n\t"
        "MOV	r5, #0x0\n\t"
        "MOV	r6, #0x0\n\t"
        "MOV	r7, #0x0\n\t"
        "MOV	r8, #0x0\n\t"
        "MOV	r9, #0x0\n\t"
        "STM	%[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        : [n] "+r" (n)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_copy(fe r_p, const fe a_p)
#else
void fe_copy(fe r, const fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        /* Copy */
        "LDRD	r2, r3, [%[a]]\n\t"
        "LDRD	r4, r5, [%[a], #8]\n\t"
        "STRD	r2, r3, [%[r]]\n\t"
        "STRD	r4, r5, [%[r], #8]\n\t"
        "LDRD	r2, r3, [%[a], #16]\n\t"
        "LDRD	r4, r5, [%[a], #24]\n\t"
        "STRD	r2, r3, [%[r], #16]\n\t"
        "STRD	r4, r5, [%[r], #24]\n\t"
        : [r] "+r" (r), [a] "+r" (a)
        :
        : "memory", "r2", "r3", "r4", "r5", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_neg(fe r_p, const fe a_p)
#else
void fe_neg(fe r, const fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "MVN	r7, #0x0\n\t"
        "MVN	r6, #0x12\n\t"
        "LDM	%[a]!, {r2, r3, r4, r5}\n\t"
        "SUBS	r2, r6, r2\n\t"
        "SBCS	r3, r7, r3\n\t"
        "SBCS	r4, r7, r4\n\t"
        "SBCS	r5, r7, r5\n\t"
        "STM	%[r]!, {r2, r3, r4, r5}\n\t"
        "MVN	r6, #0x80000000\n\t"
        "LDM	%[a]!, {r2, r3, r4, r5}\n\t"
        "SBCS	r2, r7, r2\n\t"
        "SBCS	r3, r7, r3\n\t"
        "SBCS	r4, r7, r4\n\t"
        "SBC	r5, r6, r5\n\t"
        "STM	%[r]!, {r2, r3, r4, r5}\n\t"
        : [r] "+r" (r), [a] "+r" (a)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
int fe_isnonzero(const fe a_p)
#else
int fe_isnonzero(const fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register const sword32* a __asm__ ("r0") = (const sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "LDM	%[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        "ADDS	r1, r2, #0x13\n\t"
        "ADCS	r1, r3, #0x0\n\t"
        "ADCS	r1, r4, #0x0\n\t"
        "ADCS	r1, r5, #0x0\n\t"
        "ADCS	r1, r6, #0x0\n\t"
        "ADCS	r1, r7, #0x0\n\t"
        "ADCS	r1, r8, #0x0\n\t"
        "ADC	r1, r9, #0x0\n\t"
        "ASR	r1, r1, #31\n\t"
        "AND	r1, r1, #0x13\n\t"
        "ADDS	r2, r2, r1\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "BFC	r9, #31, #1\n\t"
        "ORR	r2, r2, r3\n\t"
        "ORR	r4, r4, r5\n\t"
        "ORR	r6, r6, r7\n\t"
        "ORR	r8, r8, r9\n\t"
        "ORR	r4, r4, r6\n\t"
        "ORR	r2, r2, r8\n\t"
        "ORR	%[a], r2, r4\n\t"
        : [a] "+r" (a)
        :
        : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc"
    );
    return (uint32_t)(size_t)a;
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
int fe_isnegative(const fe a_p)
#else
int fe_isnegative(const fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register const sword32* a __asm__ ("r0") = (const sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "LDM	%[a]!, {r2, r3, r4, r5}\n\t"
        "ADDS	r1, r2, #0x13\n\t"
        "ADCS	r1, r3, #0x0\n\t"
        "ADCS	r1, r4, #0x0\n\t"
        "ADCS	r1, r5, #0x0\n\t"
        "LDM	%[a], {r2, r3, r4, r5}\n\t"
        "ADCS	r1, r2, #0x0\n\t"
        "ADCS	r1, r3, #0x0\n\t"
        "ADCS	r1, r4, #0x0\n\t"
        "LDR	r2, [%[a], #-16]\n\t"
        "ADC	r1, r5, #0x0\n\t"
        "AND	%[a], r2, #0x1\n\t"
        "LSR	r1, r1, #31\n\t"
        "EOR	%[a], %[a], r1\n\t"
        : [a] "+r" (a)
        :
        : "memory", "r1", "r2", "r3", "r4", "r5", "cc"
    );
    return (uint32_t)(size_t)a;
}

#if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN)
#ifndef WC_NO_CACHE_RESISTANT
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
#else
void fe_cmov_table(fe* r, fe* base, signed char b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register fe* r __asm__ ("r0") = (fe*)r_p;
    register fe* base __asm__ ("r1") = (fe*)base_p;
    register signed char b __asm__ ("r2") = (signed char)b_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SXTB	%[b], %[b]\n\t"
        "SBFX	r3, %[b], #7, #1\n\t"
        "EOR	r12, %[b], r3\n\t"
        "SUB	r12, r12, r3\n\t"
        "MOV	r4, #0x1\n\t"
        "MOV	r5, #0x0\n\t"
        "MOV	r6, #0x1\n\t"
        "MOV	r7, #0x0\n\t"
        "MOV	r8, #0x0\n\t"
        "MOV	r9, #0x0\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #31\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base]]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #32]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #64]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #30\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base]]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #32]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #64]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #29\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base]]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #32]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #64]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #28\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base]]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #32]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #64]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #27\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base]]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #32]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #64]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #26\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base]]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #32]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #64]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #25\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base]]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #32]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #64]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #24\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base]]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #32]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #64]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "SUB	%[base], %[base], #0x2a0\n\t"
        "MVN	r10, #0x12\n\t"
        "MVN	r11, #0x0\n\t"
        "SUBS	r10, r10, r8\n\t"
        "SBCS	r11, r11, r9\n\t"
        "SBC	lr, lr, lr\n\t"
        "ASR	r12, %[b], #31\n\t"
        "EOR	r3, r4, r6\n\t"
        "AND	r3, r3, r12\n\t"
        "EOR	r4, r4, r3\n\t"
        "EOR	r6, r6, r3\n\t"
        "EOR	r3, r5, r7\n\t"
        "AND	r3, r3, r12\n\t"
        "EOR	r5, r5, r3\n\t"
        "EOR	r7, r7, r3\n\t"
        "EOR	r10, r10, r8\n\t"
        "AND	r10, r10, r12\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r11, r11, r12\n\t"
        "EOR	r9, r9, r11\n\t"
        "STRD	r4, r5, [%[r]]\n\t"
        "STRD	r6, r7, [%[r], #32]\n\t"
        "STRD	r8, r9, [%[r], #64]\n\t"
        "SBFX	r3, %[b], #7, #1\n\t"
        "EOR	r12, %[b], r3\n\t"
        "SUB	r12, r12, r3\n\t"
        "MOV	r4, #0x0\n\t"
        "MOV	r5, #0x0\n\t"
        "MOV	r6, #0x0\n\t"
        "MOV	r7, #0x0\n\t"
        "MOV	r8, #0x0\n\t"
        "MOV	r9, #0x0\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #31\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #8]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #40]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #72]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #30\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #8]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #40]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #72]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #29\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #8]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #40]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #72]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #28\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #8]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #40]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #72]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #27\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #8]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #40]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #72]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #26\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #8]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #40]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #72]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #25\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #8]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #40]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #72]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #24\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #8]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #40]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #72]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "SUB	%[base], %[base], #0x2a0\n\t"
        "MVN	r10, #0x0\n\t"
        "MVN	r11, #0x0\n\t"
        "RSBS	lr, lr, #0x0\n\t"
        "SBCS	r10, r10, r8\n\t"
        "SBCS	r11, r11, r9\n\t"
        "SBC	lr, lr, lr\n\t"
        "ASR	r12, %[b], #31\n\t"
        "EOR	r3, r4, r6\n\t"
        "AND	r3, r3, r12\n\t"
        "EOR	r4, r4, r3\n\t"
        "EOR	r6, r6, r3\n\t"
        "EOR	r3, r5, r7\n\t"
        "AND	r3, r3, r12\n\t"
        "EOR	r5, r5, r3\n\t"
        "EOR	r7, r7, r3\n\t"
        "EOR	r10, r10, r8\n\t"
        "AND	r10, r10, r12\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r11, r11, r12\n\t"
        "EOR	r9, r9, r11\n\t"
        "STRD	r4, r5, [%[r], #8]\n\t"
        "STRD	r6, r7, [%[r], #40]\n\t"
        "STRD	r8, r9, [%[r], #72]\n\t"
        "SBFX	r3, %[b], #7, #1\n\t"
        "EOR	r12, %[b], r3\n\t"
        "SUB	r12, r12, r3\n\t"
        "MOV	r4, #0x0\n\t"
        "MOV	r5, #0x0\n\t"
        "MOV	r6, #0x0\n\t"
        "MOV	r7, #0x0\n\t"
        "MOV	r8, #0x0\n\t"
        "MOV	r9, #0x0\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #31\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #16]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #48]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #80]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #30\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #16]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #48]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #80]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #29\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #16]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #48]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #80]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #28\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #16]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #48]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #80]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #27\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #16]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #48]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #80]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #26\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #16]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #48]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #80]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #25\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #16]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #48]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #80]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #24\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #16]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #48]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #80]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "SUB	%[base], %[base], #0x2a0\n\t"
        "MVN	r10, #0x0\n\t"
        "MVN	r11, #0x0\n\t"
        "RSBS	lr, lr, #0x0\n\t"
        "SBCS	r10, r10, r8\n\t"
        "SBCS	r11, r11, r9\n\t"
        "SBC	lr, lr, lr\n\t"
        "ASR	r12, %[b], #31\n\t"
        "EOR	r3, r4, r6\n\t"
        "AND	r3, r3, r12\n\t"
        "EOR	r4, r4, r3\n\t"
        "EOR	r6, r6, r3\n\t"
        "EOR	r3, r5, r7\n\t"
        "AND	r3, r3, r12\n\t"
        "EOR	r5, r5, r3\n\t"
        "EOR	r7, r7, r3\n\t"
        "EOR	r10, r10, r8\n\t"
        "AND	r10, r10, r12\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r11, r11, r12\n\t"
        "EOR	r9, r9, r11\n\t"
        "STRD	r4, r5, [%[r], #16]\n\t"
        "STRD	r6, r7, [%[r], #48]\n\t"
        "STRD	r8, r9, [%[r], #80]\n\t"
        "SBFX	r3, %[b], #7, #1\n\t"
        "EOR	r12, %[b], r3\n\t"
        "SUB	r12, r12, r3\n\t"
        "MOV	r4, #0x0\n\t"
        "MOV	r5, #0x0\n\t"
        "MOV	r6, #0x0\n\t"
        "MOV	r7, #0x0\n\t"
        "MOV	r8, #0x0\n\t"
        "MOV	r9, #0x0\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #31\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #24]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #56]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #88]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #30\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #24]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #56]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #88]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #29\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #24]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #56]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #88]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #28\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #24]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #56]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #88]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #27\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #24]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #56]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #88]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #26\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #24]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #56]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #88]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #25\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #24]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #56]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #88]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "ADD	%[base], %[base], #0x60\n\t"
        "MOV	r3, #0x80000000\n\t"
        "ROR	r3, r3, #24\n\t"
        "ROR	r3, r3, r12\n\t"
        "ASR	r3, r3, #31\n\t"
        "LDRD	r10, r11, [%[base], #24]\n\t"
        "EOR	r10, r10, r4\n\t"
        "EOR	r11, r11, r5\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r4, r4, r10\n\t"
        "EOR	r5, r5, r11\n\t"
        "LDRD	r10, r11, [%[base], #56]\n\t"
        "EOR	r10, r10, r6\n\t"
        "EOR	r11, r11, r7\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r6, r6, r10\n\t"
        "EOR	r7, r7, r11\n\t"
        "LDRD	r10, r11, [%[base], #88]\n\t"
        "EOR	r10, r10, r8\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r9, r9, r11\n\t"
        "SUB	%[base], %[base], #0x2a0\n\t"
        "MVN	r10, #0x0\n\t"
        "MVN	r11, #0x80000000\n\t"
        "RSBS	lr, lr, #0x0\n\t"
        "SBCS	r10, r10, r8\n\t"
        "SBC	r11, r11, r9\n\t"
        "ASR	r12, %[b], #31\n\t"
        "EOR	r3, r4, r6\n\t"
        "AND	r3, r3, r12\n\t"
        "EOR	r4, r4, r3\n\t"
        "EOR	r6, r6, r3\n\t"
        "EOR	r3, r5, r7\n\t"
        "AND	r3, r3, r12\n\t"
        "EOR	r5, r5, r3\n\t"
        "EOR	r7, r7, r3\n\t"
        "EOR	r10, r10, r8\n\t"
        "AND	r10, r10, r12\n\t"
        "EOR	r8, r8, r10\n\t"
        "EOR	r11, r11, r9\n\t"
        "AND	r11, r11, r12\n\t"
        "EOR	r9, r9, r11\n\t"
        "STRD	r4, r5, [%[r], #24]\n\t"
        "STRD	r6, r7, [%[r], #56]\n\t"
        "STRD	r8, r9, [%[r], #88]\n\t"
        : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", "r11", "r12", "lr", "cc"
    );
}

#else
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
#else
void fe_cmov_table(fe* r, fe* base, signed char b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register fe* r __asm__ ("r0") = (fe*)r_p;
    register fe* base __asm__ ("r1") = (fe*)base_p;
    register signed char b __asm__ ("r2") = (signed char)b_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SXTB	%[b], %[b]\n\t"
        "SBFX	r3, %[b], #7, #1\n\t"
        "EOR	%[b], %[b], r3\n\t"
        "SUB	%[b], %[b], r3\n\t"
        "CLZ	lr, %[b]\n\t"
        "LSL	lr, lr, #26\n\t"
        "ASR	lr, lr, #31\n\t"
        "MVN	lr, lr\n\t"
        "ADD	%[b], %[b], lr\n\t"
        "MOV	r12, #0x60\n\t"
        "MUL	%[b], %[b], r12\n\t"
        "ADD	%[base], %[base], %[b]\n\t"
        "LDM	%[base]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "AND	r4, r4, lr\n\t"
        "AND	r5, r5, lr\n\t"
        "AND	r6, r6, lr\n\t"
        "AND	r7, r7, lr\n\t"
        "AND	r8, r8, lr\n\t"
        "AND	r9, r9, lr\n\t"
        "AND	r10, r10, lr\n\t"
        "AND	r11, r11, lr\n\t"
        "MVN	r12, lr\n\t"
        "SUB	r4, r4, r12\n\t"
        "MOV	r12, #0x20\n\t"
        "AND	r12, r12, r3\n\t"
        "ADD	%[r], %[r], r12\n\t"
        "STM	%[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "SUB	%[r], %[r], r12\n\t"
        "LDM	%[base]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "AND	r4, r4, lr\n\t"
        "AND	r5, r5, lr\n\t"
        "AND	r6, r6, lr\n\t"
        "AND	r7, r7, lr\n\t"
        "AND	r8, r8, lr\n\t"
        "AND	r9, r9, lr\n\t"
        "AND	r10, r10, lr\n\t"
        "AND	r11, r11, lr\n\t"
        "MVN	r12, lr\n\t"
        "SUB	r4, r4, r12\n\t"
        "MOV	r12, #0x20\n\t"
        "BIC	r12, r12, r3\n\t"
        "ADD	%[r], %[r], r12\n\t"
        "STM	%[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "SUB	%[r], %[r], r12\n\t"
        "ADD	%[r], %[r], #0x40\n\t"
        "LDM	%[base]!, {r4, r5, r6, r7}\n\t"
        "MVN	r12, #0x12\n\t"
        "SUBS	r8, r12, r4\n\t"
        "SBCS	r9, r3, r5\n\t"
        "SBCS	r10, r3, r6\n\t"
        "SBCS	r11, r3, r7\n\t"
        "BIC	r4, r4, r3\n\t"
        "BIC	r5, r5, r3\n\t"
        "BIC	r6, r6, r3\n\t"
        "BIC	r7, r7, r3\n\t"
        "AND	r8, r8, r3\n\t"
        "AND	r9, r9, r3\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "ORR	r4, r4, r8\n\t"
        "ORR	r5, r5, r9\n\t"
        "ORR	r6, r6, r10\n\t"
        "ORR	r7, r7, r11\n\t"
        "AND	r4, r4, lr\n\t"
        "AND	r5, r5, lr\n\t"
        "AND	r6, r6, lr\n\t"
        "AND	r7, r7, lr\n\t"
        "STM	%[r]!, {r4, r5, r6, r7}\n\t"
        "LDM	%[base]!, {r4, r5, r6, r7}\n\t"
        "MVN	r12, #0x80000000\n\t"
        "SBCS	r8, r3, r4\n\t"
        "SBCS	r9, r3, r5\n\t"
        "SBCS	r10, r3, r6\n\t"
        "SBC	r11, r12, r7\n\t"
        "BIC	r4, r4, r3\n\t"
        "BIC	r5, r5, r3\n\t"
        "BIC	r6, r6, r3\n\t"
        "BIC	r7, r7, r3\n\t"
        "AND	r8, r8, r3\n\t"
        "AND	r9, r9, r3\n\t"
        "AND	r10, r10, r3\n\t"
        "AND	r11, r11, r3\n\t"
        "ORR	r4, r4, r8\n\t"
        "ORR	r5, r5, r9\n\t"
        "ORR	r6, r6, r10\n\t"
        "ORR	r7, r7, r11\n\t"
        "AND	r4, r4, lr\n\t"
        "AND	r5, r5, lr\n\t"
        "AND	r6, r6, lr\n\t"
        "AND	r7, r7, lr\n\t"
        "STM	%[r]!, {r4, r5, r6, r7}\n\t"
        "SUB	%[base], %[base], %[b]\n\t"
        : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */
#endif /* HAVE_ED25519 */
#ifdef WOLFSSL_SP_NO_UMAAL
void fe_mul_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_mul_op()
#else
void fe_mul_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
    __asm__ __volatile__ (
        "SUB	sp, sp, #0x28\n\t"
        "STR	r0, [sp, #36]\n\t"
        "MOV	r0, #0x0\n\t"
        "LDR	r12, [r1]\n\t"
        /* A[0] * B[0] */
        "LDR	lr, [r2]\n\t"
        "UMULL	r3, r4, r12, lr\n\t"
        /* A[0] * B[2] */
        "LDR	lr, [r2, #8]\n\t"
        "UMULL	r5, r6, r12, lr\n\t"
        /* A[0] * B[4] */
        "LDR	lr, [r2, #16]\n\t"
        "UMULL	r7, r8, r12, lr\n\t"
        /* A[0] * B[6] */
        "LDR	lr, [r2, #24]\n\t"
        "UMULL	r9, r10, r12, lr\n\t"
        "STR	r3, [sp]\n\t"
        /* A[0] * B[1] */
        "LDR	lr, [r2, #4]\n\t"
        "MOV	r11, r0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[0] * B[3] */
        "LDR	lr, [r2, #12]\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[0] * B[5] */
        "LDR	lr, [r2, #20]\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[0] * B[7] */
        "LDR	lr, [r2, #28]\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADC	r3, r0, #0x0\n\t"
        "UMLAL	r10, r3, r12, lr\n\t"
        /* A[1] * B[0] */
        "LDR	r12, [r1, #4]\n\t"
        "LDR	lr, [r2]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "STR	r4, [sp, #4]\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[1] * B[1] */
        "LDR	lr, [r2, #4]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[1] * B[2] */
        "LDR	lr, [r2, #8]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[1] * B[3] */
        "LDR	lr, [r2, #12]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[1] * B[4] */
        "LDR	lr, [r2, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[1] * B[5] */
        "LDR	lr, [r2, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[1] * B[6] */
        "LDR	lr, [r2, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[1] * B[7] */
        "LDR	lr, [r2, #28]\n\t"
        "ADC	r4, r0, #0x0\n\t"
        "UMLAL	r3, r4, r12, lr\n\t"
        /* A[2] * B[0] */
        "LDR	r12, [r1, #8]\n\t"
        "LDR	lr, [r2]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "STR	r5, [sp, #8]\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[2] * B[1] */
        "LDR	lr, [r2, #4]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[2] * B[2] */
        "LDR	lr, [r2, #8]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[2] * B[3] */
        "LDR	lr, [r2, #12]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[2] * B[4] */
        "LDR	lr, [r2, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[2] * B[5] */
        "LDR	lr, [r2, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[2] * B[6] */
        "LDR	lr, [r2, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[2] * B[7] */
        "LDR	lr, [r2, #28]\n\t"
        "ADC	r5, r0, #0x0\n\t"
        "UMLAL	r4, r5, r12, lr\n\t"
        /* A[3] * B[0] */
        "LDR	r12, [r1, #12]\n\t"
        "LDR	lr, [r2]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "STR	r6, [sp, #12]\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[3] * B[1] */
        "LDR	lr, [r2, #4]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[3] * B[2] */
        "LDR	lr, [r2, #8]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[3] * B[3] */
        "LDR	lr, [r2, #12]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[3] * B[4] */
        "LDR	lr, [r2, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[3] * B[5] */
        "LDR	lr, [r2, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[3] * B[6] */
        "LDR	lr, [r2, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[3] * B[7] */
        "LDR	lr, [r2, #28]\n\t"
        "ADC	r6, r0, #0x0\n\t"
        "UMLAL	r5, r6, r12, lr\n\t"
        /* A[4] * B[0] */
        "LDR	r12, [r1, #16]\n\t"
        "LDR	lr, [r2]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "STR	r7, [sp, #16]\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[4] * B[1] */
        "LDR	lr, [r2, #4]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[4] * B[2] */
        "LDR	lr, [r2, #8]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[4] * B[3] */
        "LDR	lr, [r2, #12]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[4] * B[4] */
        "LDR	lr, [r2, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[4] * B[5] */
        "LDR	lr, [r2, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[4] * B[6] */
        "LDR	lr, [r2, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[4] * B[7] */
        "LDR	lr, [r2, #28]\n\t"
        "ADC	r7, r0, #0x0\n\t"
        "UMLAL	r6, r7, r12, lr\n\t"
        /* A[5] * B[0] */
        "LDR	r12, [r1, #20]\n\t"
        "LDR	lr, [r2]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "STR	r8, [sp, #20]\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[5] * B[1] */
        "LDR	lr, [r2, #4]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[5] * B[2] */
        "LDR	lr, [r2, #8]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[5] * B[3] */
        "LDR	lr, [r2, #12]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[5] * B[4] */
        "LDR	lr, [r2, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[5] * B[5] */
        "LDR	lr, [r2, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[5] * B[6] */
        "LDR	lr, [r2, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[5] * B[7] */
        "LDR	lr, [r2, #28]\n\t"
        "ADC	r8, r0, #0x0\n\t"
        "UMLAL	r7, r8, r12, lr\n\t"
        /* A[6] * B[0] */
        "LDR	r12, [r1, #24]\n\t"
        "LDR	lr, [r2]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "STR	r9, [sp, #24]\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[6] * B[1] */
        "LDR	lr, [r2, #4]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[6] * B[2] */
        "LDR	lr, [r2, #8]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[6] * B[3] */
        "LDR	lr, [r2, #12]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[6] * B[4] */
        "LDR	lr, [r2, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[6] * B[5] */
        "LDR	lr, [r2, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[6] * B[6] */
        "LDR	lr, [r2, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[6] * B[7] */
        "LDR	lr, [r2, #28]\n\t"
        "ADC	r9, r0, #0x0\n\t"
        "UMLAL	r8, r9, r12, lr\n\t"
        /* A[7] * B[0] */
        "LDR	r12, [r1, #28]\n\t"
        "LDR	lr, [r2]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "STR	r10, [sp, #28]\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[7] * B[1] */
        "LDR	lr, [r2, #4]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[7] * B[2] */
        "LDR	lr, [r2, #8]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[7] * B[3] */
        "LDR	lr, [r2, #12]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[7] * B[4] */
        "LDR	lr, [r2, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[7] * B[5] */
        "LDR	lr, [r2, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[7] * B[6] */
        "LDR	lr, [r2, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[7] * B[7] */
        "LDR	lr, [r2, #28]\n\t"
        "ADC	r10, r0, #0x0\n\t"
        "UMLAL	r9, r10, r12, lr\n\t"
        /* Reduce */
        "LDR	r2, [sp, #28]\n\t"
        "MOV	lr, sp\n\t"
        "MOV	r12, #0x26\n\t"
        "UMULL	r10, r11, r10, r12\n\t"
        "ADDS	r10, r10, r2\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "MOV	r12, #0x13\n\t"
        "LSL	r11, r11, #1\n\t"
        "ORR	r11, r11, r10, LSR #31\n\t"
        "MUL	r11, r11, r12\n\t"
        "LDM	lr!, {r1, r2}\n\t"
        "MOV	r12, #0x26\n\t"
        "ADDS	r1, r1, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r1, r11, r3, r12\n\t"
        "ADDS	r2, r2, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r2, r11, r4, r12\n\t"
        "LDM	lr!, {r3, r4}\n\t"
        "ADDS	r3, r3, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r5, r12\n\t"
        "ADDS	r4, r4, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r6, r12\n\t"
        "LDM	lr!, {r5, r6}\n\t"
        "ADDS	r5, r5, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r7, r12\n\t"
        "ADDS	r6, r6, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r6, r11, r8, r12\n\t"
        "LDM	lr!, {r7, r8}\n\t"
        "ADDS	r7, r7, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r9, r12\n\t"
        "BFC	r10, #31, #1\n\t"
        "ADDS	r8, r10, r11\n\t"
        /* Store */
        "LDR	r0, [sp, #36]\n\t"
        "STM	r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t"
        "ADD	sp, sp, #0x28\n\t"
        :
        :
        : "memory", "lr", "cc"
    );
}

#else
void fe_mul_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_mul_op()
#else
void fe_mul_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
    __asm__ __volatile__ (
        "SUB	sp, sp, #0x2c\n\t"
        "STRD	r0, r1, [sp, #36]\n\t"
        "MOV	lr, r2\n\t"
        "LDM	r1, {r0, r1, r2, r3}\n\t"
        "LDM	lr!, {r4, r5, r6}\n\t"
        "UMULL	r10, r11, r0, r4\n\t"
        "UMULL	r12, r7, r1, r4\n\t"
        "UMAAL	r11, r12, r0, r5\n\t"
        "UMULL	r8, r9, r2, r4\n\t"
        "UMAAL	r12, r8, r1, r5\n\t"
        "UMAAL	r12, r7, r0, r6\n\t"
        "UMAAL	r8, r9, r3, r4\n\t"
        "STM	sp, {r10, r11, r12}\n\t"
        "UMAAL	r7, r8, r2, r5\n\t"
        "LDM	lr!, {r4}\n\t"
        "UMULL	r10, r11, r1, r6\n\t"
        "UMAAL	r8, r9, r2, r6\n\t"
        "UMAAL	r7, r10, r0, r4\n\t"
        "UMAAL	r8, r11, r3, r5\n\t"
        "STR	r7, [sp, #12]\n\t"
        "UMAAL	r8, r10, r1, r4\n\t"
        "UMAAL	r9, r11, r3, r6\n\t"
        "UMAAL	r9, r10, r2, r4\n\t"
        "UMAAL	r10, r11, r3, r4\n\t"
        "LDM	lr, {r4, r5, r6, r7}\n\t"
        "MOV	r12, #0x0\n\t"
        "UMLAL	r8, r12, r0, r4\n\t"
        "UMAAL	r9, r12, r1, r4\n\t"
        "UMAAL	r10, r12, r2, r4\n\t"
        "UMAAL	r11, r12, r3, r4\n\t"
        "MOV	r4, #0x0\n\t"
        "UMLAL	r9, r4, r0, r5\n\t"
        "UMAAL	r10, r4, r1, r5\n\t"
        "UMAAL	r11, r4, r2, r5\n\t"
        "UMAAL	r12, r4, r3, r5\n\t"
        "MOV	r5, #0x0\n\t"
        "UMLAL	r10, r5, r0, r6\n\t"
        "UMAAL	r11, r5, r1, r6\n\t"
        "UMAAL	r12, r5, r2, r6\n\t"
        "UMAAL	r4, r5, r3, r6\n\t"
        "MOV	r6, #0x0\n\t"
        "UMLAL	r11, r6, r0, r7\n\t"
        "LDR	r0, [sp, #40]\n\t"
        "UMAAL	r12, r6, r1, r7\n\t"
        "ADD	r0, r0, #0x10\n\t"
        "UMAAL	r4, r6, r2, r7\n\t"
        "SUB	lr, lr, #0x10\n\t"
        "UMAAL	r5, r6, r3, r7\n\t"
        "LDM	r0, {r0, r1, r2, r3}\n\t"
        "STR	r6, [sp, #32]\n\t"
        "LDM	lr!, {r6}\n\t"
        "MOV	r7, #0x0\n\t"
        "UMLAL	r8, r7, r0, r6\n\t"
        "UMAAL	r9, r7, r1, r6\n\t"
        "STR	r8, [sp, #16]\n\t"
        "UMAAL	r10, r7, r2, r6\n\t"
        "UMAAL	r11, r7, r3, r6\n\t"
        "LDM	lr!, {r6}\n\t"
        "MOV	r8, #0x0\n\t"
        "UMLAL	r9, r8, r0, r6\n\t"
        "UMAAL	r10, r8, r1, r6\n\t"
        "STR	r9, [sp, #20]\n\t"
        "UMAAL	r11, r8, r2, r6\n\t"
        "UMAAL	r12, r8, r3, r6\n\t"
        "LDM	lr!, {r6}\n\t"
        "MOV	r9, #0x0\n\t"
        "UMLAL	r10, r9, r0, r6\n\t"
        "UMAAL	r11, r9, r1, r6\n\t"
        "STR	r10, [sp, #24]\n\t"
        "UMAAL	r12, r9, r2, r6\n\t"
        "UMAAL	r4, r9, r3, r6\n\t"
        "LDM	lr!, {r6}\n\t"
        "MOV	r10, #0x0\n\t"
        "UMLAL	r11, r10, r0, r6\n\t"
        "UMAAL	r12, r10, r1, r6\n\t"
        "STR	r11, [sp, #28]\n\t"
        "UMAAL	r4, r10, r2, r6\n\t"
        "UMAAL	r5, r10, r3, r6\n\t"
        "LDM	lr!, {r11}\n\t"
        "UMAAL	r12, r7, r0, r11\n\t"
        "UMAAL	r4, r7, r1, r11\n\t"
        "LDR	r6, [sp, #32]\n\t"
        "UMAAL	r5, r7, r2, r11\n\t"
        "UMAAL	r6, r7, r3, r11\n\t"
        "LDM	lr!, {r11}\n\t"
        "UMAAL	r4, r8, r0, r11\n\t"
        "UMAAL	r5, r8, r1, r11\n\t"
        "UMAAL	r6, r8, r2, r11\n\t"
        "UMAAL	r7, r8, r3, r11\n\t"
        "LDM	lr, {r11, lr}\n\t"
        "UMAAL	r5, r9, r0, r11\n\t"
        "UMAAL	r6, r10, r0, lr\n\t"
        "UMAAL	r6, r9, r1, r11\n\t"
        "UMAAL	r7, r10, r1, lr\n\t"
        "UMAAL	r7, r9, r2, r11\n\t"
        "UMAAL	r8, r10, r2, lr\n\t"
        "UMAAL	r8, r9, r3, r11\n\t"
        "UMAAL	r9, r10, r3, lr\n\t"
        /* Reduce */
        "LDR	r0, [sp, #28]\n\t"
        "MOV	lr, #0x25\n\t"
        "UMAAL	r10, r0, r10, lr\n\t"
        "MOV	lr, #0x13\n\t"
        "LSL	r0, r0, #1\n\t"
        "ORR	r0, r0, r10, LSR #31\n\t"
        "MUL	r11, r0, lr\n\t"
        "POP	{r0, r1, r2}\n\t"
        "MOV	lr, #0x26\n\t"
        "UMAAL	r0, r11, r12, lr\n\t"
        "UMAAL	r1, r11, r4, lr\n\t"
        "UMAAL	r2, r11, r5, lr\n\t"
        "POP	{r3, r4, r5}\n\t"
        "UMAAL	r3, r11, r6, lr\n\t"
        "UMAAL	r4, r11, r7, lr\n\t"
        "UMAAL	r5, r11, r8, lr\n\t"
        "POP	{r6}\n\t"
        "BFC	r10, #31, #1\n\t"
        "UMAAL	r6, r11, r9, lr\n\t"
        "ADD	r7, r10, r11\n\t"
        "LDR	lr, [sp, #8]\n\t"
        /* Store */
        "STM	lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
        "ADD	sp, sp, #0x10\n\t"
        :
        :
        : "memory", "lr", "cc"
    );
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_mul(fe r_p, const fe a_p, const fe b_p)
#else
void fe_mul(fe r, const fe a, const fe b)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
    register const sword32* b __asm__ ("r2") = (const sword32*)b_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "BL	fe_mul_op\n\t"
        : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#ifdef WOLFSSL_SP_NO_UMAAL
void fe_sq_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sq_op()
#else
void fe_sq_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
    __asm__ __volatile__ (
        "SUB	sp, sp, #0x44\n\t"
        "STR	r0, [sp, #64]\n\t"
        /* Square */
        "MOV	r0, #0x0\n\t"
        "LDR	r12, [r1]\n\t"
        /* A[0] * A[1] */
        "LDR	lr, [r1, #4]\n\t"
        "UMULL	r4, r5, r12, lr\n\t"
        /* A[0] * A[3] */
        "LDR	lr, [r1, #12]\n\t"
        "UMULL	r6, r7, r12, lr\n\t"
        /* A[0] * A[5] */
        "LDR	lr, [r1, #20]\n\t"
        "UMULL	r8, r9, r12, lr\n\t"
        /* A[0] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "UMULL	r10, r3, r12, lr\n\t"
        /* A[0] * A[2] */
        "LDR	lr, [r1, #8]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[0] * A[4] */
        "LDR	lr, [r1, #16]\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[0] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "STR	r4, [sp, #4]\n\t"
        "STR	r5, [sp, #8]\n\t"
        /* A[1] * A[2] */
        "LDR	r12, [r1, #4]\n\t"
        "LDR	lr, [r1, #8]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "STR	r6, [sp, #12]\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[1] * A[3] */
        "LDR	lr, [r1, #12]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "STR	r7, [sp, #16]\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[1] * A[4] */
        "LDR	lr, [r1, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[1] * A[5] */
        "LDR	lr, [r1, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[1] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[1] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r4, r0, #0x0\n\t"
        "UMLAL	r3, r4, r12, lr\n\t"
        /* A[2] * A[3] */
        "LDR	r12, [r1, #8]\n\t"
        "LDR	lr, [r1, #12]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "STR	r8, [sp, #20]\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[2] * A[4] */
        "LDR	lr, [r1, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "STR	r9, [sp, #24]\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[2] * A[5] */
        "LDR	lr, [r1, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[2] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[2] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r5, r0, #0x0\n\t"
        "UMLAL	r4, r5, r12, lr\n\t"
        /* A[3] * A[4] */
        "LDR	r12, [r1, #12]\n\t"
        "LDR	lr, [r1, #16]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "STR	r10, [sp, #28]\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[3] * A[5] */
        "LDR	lr, [r1, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[3] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[3] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r6, r0, #0x0\n\t"
        "UMLAL	r5, r6, r12, lr\n\t"
        /* A[4] * A[5] */
        "LDR	r12, [r1, #16]\n\t"
        "LDR	lr, [r1, #20]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[4] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[4] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r7, r0, #0x0\n\t"
        "UMLAL	r6, r7, r12, lr\n\t"
        /* A[5] * A[6] */
        "LDR	r12, [r1, #20]\n\t"
        "LDR	lr, [r1, #24]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[5] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r8, r0, #0x0\n\t"
        "UMLAL	r7, r8, r12, lr\n\t"
        /* A[6] * A[7] */
        "LDR	r12, [r1, #24]\n\t"
        "LDR	lr, [r1, #28]\n\t"
        "MOV	r9, #0x0\n\t"
        "UMLAL	r8, r9, r12, lr\n\t"
        "ADD	lr, sp, #0x20\n\t"
        "STM	lr, {r3, r4, r5, r6, r7, r8, r9}\n\t"
        "ADD	lr, sp, #0x4\n\t"
        "LDM	lr, {r4, r5, r6, r7, r8, r9, r10}\n\t"
        "ADDS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADCS	r7, r7, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "ADCS	r9, r9, r9\n\t"
        "ADCS	r10, r10, r10\n\t"
        "STM	lr!, {r4, r5, r6, r7, r8, r9, r10}\n\t"
        "LDM	lr, {r3, r4, r5, r6, r7, r8, r9}\n\t"
        "ADCS	r3, r3, r3\n\t"
        "ADCS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADCS	r7, r7, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "ADCS	r9, r9, r9\n\t"
        "ADC	r10, r0, #0x0\n\t"
        "STM	lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t"
        "ADD	lr, sp, #0x4\n\t"
        "LDM	lr, {r4, r5, r6, r7, r8, r9, r10}\n\t"
        "MOV	lr, sp\n\t"
        /* A[0] * A[0] */
        "LDR	r12, [r1]\n\t"
        "UMULL	r3, r11, r12, r12\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[1] * A[1] */
        "LDR	r12, [r1, #4]\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, r12\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[2] * A[2] */
        "LDR	r12, [r1, #8]\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, r12\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[3] * A[3] */
        "LDR	r12, [r1, #12]\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, r12\n\t"
        "ADDS	r10, r10, r11\n\t"
        "STM	lr!, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t"
        "LDM	lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t"
        /* A[4] * A[4] */
        "LDR	r12, [r1, #16]\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, r12\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[5] * A[5] */
        "LDR	r12, [r1, #20]\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, r12\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[6] * A[6] */
        "LDR	r12, [r1, #24]\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, r12\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[7] * A[7] */
        "LDR	r12, [r1, #28]\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMLAL	r9, r10, r12, r12\n\t"
        /* Reduce */
        "LDR	r2, [sp, #28]\n\t"
        "MOV	lr, sp\n\t"
        "MOV	r12, #0x26\n\t"
        "UMULL	r10, r11, r10, r12\n\t"
        "ADDS	r10, r10, r2\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "MOV	r12, #0x13\n\t"
        "LSL	r11, r11, #1\n\t"
        "ORR	r11, r11, r10, LSR #31\n\t"
        "MUL	r11, r11, r12\n\t"
        "LDM	lr!, {r1, r2}\n\t"
        "MOV	r12, #0x26\n\t"
        "ADDS	r1, r1, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r1, r11, r3, r12\n\t"
        "ADDS	r2, r2, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r2, r11, r4, r12\n\t"
        "LDM	lr!, {r3, r4}\n\t"
        "ADDS	r3, r3, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r5, r12\n\t"
        "ADDS	r4, r4, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r6, r12\n\t"
        "LDM	lr!, {r5, r6}\n\t"
        "ADDS	r5, r5, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r7, r12\n\t"
        "ADDS	r6, r6, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r6, r11, r8, r12\n\t"
        "LDM	lr!, {r7, r8}\n\t"
        "ADDS	r7, r7, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r9, r12\n\t"
        "BFC	r10, #31, #1\n\t"
        "ADDS	r8, r10, r11\n\t"
        /* Store */
        "LDR	r0, [sp, #64]\n\t"
        "STM	r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t"
        "ADD	sp, sp, #0x44\n\t"
        :
        :
        : "memory", "lr", "cc"
    );
}

#else
void fe_sq_op(void);
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sq_op()
#else
void fe_sq_op()
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
    __asm__ __volatile__ (
        "SUB	sp, sp, #0x20\n\t"
        "STR	r0, [sp, #28]\n\t"
        "LDM	r1, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
        /* Square */
        "UMULL	r9, r10, r0, r0\n\t"
        "UMULL	r11, r12, r0, r1\n\t"
        "ADDS	r11, r11, r11\n\t"
        "MOV	lr, #0x0\n\t"
        "UMAAL	r10, r11, lr, lr\n\t"
        "STM	sp, {r9, r10}\n\t"
        "MOV	r8, lr\n\t"
        "UMAAL	r8, r12, r0, r2\n\t"
        "ADCS	r8, r8, r8\n\t"
        "UMAAL	r8, r11, r1, r1\n\t"
        "UMULL	r9, r10, r0, r3\n\t"
        "UMAAL	r9, r12, r1, r2\n\t"
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r11, lr, lr\n\t"
        "STRD	r8, r9, [sp, #8]\n\t"
        "MOV	r9, lr\n\t"
        "UMAAL	r9, r10, r0, r4\n\t"
        "UMAAL	r9, r12, r1, r3\n\t"
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r11, r2, r2\n\t"
        "STR	r9, [sp, #16]\n\t"
        "UMULL	r9, r8, r0, r5\n\t"
        "UMAAL	r9, r12, r1, r4\n\t"
        "UMAAL	r9, r10, r2, r3\n\t"
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r11, lr, lr\n\t"
        "STR	r9, [sp, #20]\n\t"
        "MOV	r9, lr\n\t"
        "UMAAL	r9, r8, r0, r6\n\t"
        "UMAAL	r9, r12, r1, r5\n\t"
        "UMAAL	r9, r10, r2, r4\n\t"
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r11, r3, r3\n\t"
        "STR	r9, [sp, #24]\n\t"
        "UMULL	r0, r9, r0, r7\n\t"
        "UMAAL	r0, r8, r1, r6\n\t"
        "UMAAL	r0, r12, r2, r5\n\t"
        "UMAAL	r0, r10, r3, r4\n\t"
        "ADCS	r0, r0, r0\n\t"
        "UMAAL	r0, r11, lr, lr\n\t"
        /* R[7] = r0 */
        "UMAAL	r9, r8, r1, r7\n\t"
        "UMAAL	r9, r10, r2, r6\n\t"
        "UMAAL	r12, r9, r3, r5\n\t"
        "ADCS	r12, r12, r12\n\t"
        "UMAAL	r12, r11, r4, r4\n\t"
        /* R[8] = r12 */
        "UMAAL	r9, r8, r2, r7\n\t"
        "UMAAL	r10, r9, r3, r6\n\t"
        "MOV	r2, lr\n\t"
        "UMAAL	r10, r2, r4, r5\n\t"
        "ADCS	r10, r10, r10\n\t"
        "UMAAL	r11, r10, lr, lr\n\t"
        /* R[9] = r11 */
        "UMAAL	r2, r8, r3, r7\n\t"
        "UMAAL	r2, r9, r4, r6\n\t"
        "ADCS	r3, r2, r2\n\t"
        "UMAAL	r10, r3, r5, r5\n\t"
        /* R[10] = r10 */
        "MOV	r1, lr\n\t"
        "UMAAL	r1, r8, r4, r7\n\t"
        "UMAAL	r1, r9, r5, r6\n\t"
        "ADCS	r4, r1, r1\n\t"
        "UMAAL	r3, r4, lr, lr\n\t"
        /* R[11] = r3 */
        "UMAAL	r8, r9, r5, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "UMAAL	r4, r8, r6, r6\n\t"
        /* R[12] = r4 */
        "MOV	r5, lr\n\t"
        "UMAAL	r5, r9, r6, r7\n\t"
        "ADCS	r5, r5, r5\n\t"
        "UMAAL	r8, r5, lr, lr\n\t"
        /* R[13] = r8 */
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r5, r7, r7\n\t"
        "ADCS	r7, r5, lr\n\t"
        /* R[14] = r9 */
        /* R[15] = r7 */
        /* Reduce */
        "MOV	r6, #0x25\n\t"
        "UMAAL	r7, r0, r7, r6\n\t"
        "MOV	r6, #0x13\n\t"
        "LSL	r0, r0, #1\n\t"
        "ORR	r0, r0, r7, LSR #31\n\t"
        "MUL	lr, r0, r6\n\t"
        "POP	{r0, r1}\n\t"
        "MOV	r6, #0x26\n\t"
        "UMAAL	r0, lr, r12, r6\n\t"
        "UMAAL	r1, lr, r11, r6\n\t"
        "MOV	r12, r3\n\t"
        "MOV	r11, r4\n\t"
        "POP	{r2, r3, r4}\n\t"
        "UMAAL	r2, lr, r10, r6\n\t"
        "UMAAL	r3, lr, r12, r6\n\t"
        "UMAAL	r4, lr, r11, r6\n\t"
        "MOV	r12, r6\n\t"
        "POP	{r5, r6}\n\t"
        "UMAAL	r5, lr, r8, r12\n\t"
        "BFC	r7, #31, #1\n\t"
        "UMAAL	r6, lr, r9, r12\n\t"
        "ADD	r7, r7, lr\n\t"
        "POP	{lr}\n\t"
        /* Store */
        "STM	lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
        :
        :
        : "memory", "lr", "cc"
    );
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sq(fe r_p, const fe a_p)
#else
void fe_sq(fe r, const fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "BL	fe_sq_op\n\t"
        : [r] "+r" (r), [a] "+r" (a)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#ifdef HAVE_CURVE25519
#ifdef WOLFSSL_SP_NO_UMAAL
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_mul121666(fe r_p, fe a_p)
#else
void fe_mul121666(fe r, fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register sword32* a __asm__ ("r1") = (sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        /* Multiply by 121666 */
        "LDM	%[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        "MOV	r12, #0xdb42\n\t"
        "MOVT	r12, #0x1\n\t"
        "UMULL	r2, r10, r2, r12\n\t"
        "UMULL	r3, r11, r3, r12\n\t"
        "ADDS	r3, r3, r10\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "UMULL	r4, r10, r4, r12\n\t"
        "ADDS	r4, r4, r11\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMULL	r5, r11, r5, r12\n\t"
        "ADDS	r5, r5, r10\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "UMULL	r6, r10, r6, r12\n\t"
        "ADDS	r6, r6, r11\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMULL	r7, r11, r7, r12\n\t"
        "ADDS	r7, r7, r10\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "UMULL	r8, r10, r8, r12\n\t"
        "ADDS	r8, r8, r11\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMULL	r9, r11, r9, r12\n\t"
        "ADDS	r9, r9, r10\n\t"
        "MOV	r12, #0x13\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "LSL	r11, r11, #1\n\t"
        "ORR	r11, r11, r9, LSR #31\n\t"
        "MUL	r11, r11, r12\n\t"
        "ADDS	r2, r2, r11\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "BFC	r9, #31, #1\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "STM	%[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        : [r] "+r" (r), [a] "+r" (a)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );
}

#else
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_mul121666(fe r_p, fe a_p)
#else
void fe_mul121666(fe r, fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register sword32* a __asm__ ("r1") = (sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        /* Multiply by 121666 */
        "LDM	%[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        "MOV	r11, #0xdb42\n\t"
        "MOVT	r11, #0x1\n\t"
        "UMULL	r2, r12, r2, r11\n\t"
        "SUB	r10, r11, #0x1\n\t"
        "UMAAL	r3, r12, r3, r10\n\t"
        "UMAAL	r4, r12, r4, r10\n\t"
        "UMAAL	r5, r12, r5, r10\n\t"
        "UMAAL	r6, r12, r6, r10\n\t"
        "UMAAL	r7, r12, r7, r10\n\t"
        "UMAAL	r8, r12, r8, r10\n\t"
        "MOV	r11, #0x13\n\t"
        "UMAAL	r9, r12, r9, r10\n\t"
        "LSL	r12, r12, #1\n\t"
        "ORR	r12, r12, r9, LSR #31\n\t"
        "MUL	r12, r12, r11\n\t"
        "ADDS	r2, r2, r12\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "BFC	r9, #31, #1\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "STM	%[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        : [r] "+r" (r), [a] "+r" (a)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#ifndef WC_NO_CACHE_RESISTANT
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
#else
int curve25519(byte* r, const byte* n, const byte* a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register byte* r __asm__ ("r0") = (byte*)r_p;
    register const byte* n __asm__ ("r1") = (const byte*)n_p;
    register const byte* a __asm__ ("r2") = (const byte*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0xbc\n\t"
        "STR	%[r], [sp, #160]\n\t"
        "STR	%[n], [sp, #164]\n\t"
        "STR	%[a], [sp, #168]\n\t"
        "MOV	%[n], #0x0\n\t"
        "STR	%[n], [sp, #172]\n\t"
        "MOV	r4, #0x1\n\t"
        "MOV	r5, #0x0\n\t"
        "MOV	r6, #0x0\n\t"
        "MOV	r7, #0x0\n\t"
        "MOV	r8, #0x0\n\t"
        "MOV	r9, #0x0\n\t"
        "MOV	r10, #0x0\n\t"
        "MOV	r11, #0x0\n\t"
        "STM	%[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "ADD	r3, sp, #0x20\n\t"
        "STM	r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "MOV	r4, #0x0\n\t"
        "MOV	r3, sp\n\t"
        "STM	r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "ADD	r3, sp, #0x40\n\t"
        /* Copy */
        "LDM	r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "STM	r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "MOV	%[n], #0x1e\n\t"
        "STR	%[n], [sp, #180]\n\t"
        "MOV	%[a], #0x1c\n\t"
        "STR	%[a], [sp, #176]\n\t"
        "\n"
    "L_curve25519_words%=:\n\t"
        "\n"
    "L_curve25519_bits%=:\n\t"
        "LDR	%[n], [sp, #164]\n\t"
        "LDR	%[a], [%[n], r2]\n\t"
        "LDR	%[n], [sp, #180]\n\t"
        "LSR	%[a], %[a], %[n]\n\t"
        "AND	%[a], %[a], #0x1\n\t"
        "STR	%[a], [sp, #184]\n\t"
        "LDR	%[n], [sp, #172]\n\t"
        "EOR	%[n], %[n], %[a]\n\t"
        "STR	%[n], [sp, #172]\n\t"
        "LDR	%[r], [sp, #160]\n\t"
        /* Conditional Swap */
        "RSB	%[n], %[n], #0x0\n\t"
        "MOV	r3, r0\n\t"
        "ADD	r12, sp, #0x40\n\t"
        "LDM	r3, {r4, r5}\n\t"
        "LDM	r12, {r6, r7}\n\t"
        "EOR	r8, r4, r6\n\t"
        "EOR	r9, r5, r7\n\t"
        "AND	r8, r8, %[n]\n\t"
        "AND	r9, r9, %[n]\n\t"
        "EOR	r4, r4, r8\n\t"
        "EOR	r5, r5, r9\n\t"
        "EOR	r6, r6, r8\n\t"
        "EOR	r7, r7, r9\n\t"
        "STM	r3!, {r4, r5}\n\t"
        "STM	r12!, {r6, r7}\n\t"
        "LDM	r3, {r4, r5}\n\t"
        "LDM	r12, {r6, r7}\n\t"
        "EOR	r8, r4, r6\n\t"
        "EOR	r9, r5, r7\n\t"
        "AND	r8, r8, %[n]\n\t"
        "AND	r9, r9, %[n]\n\t"
        "EOR	r4, r4, r8\n\t"
        "EOR	r5, r5, r9\n\t"
        "EOR	r6, r6, r8\n\t"
        "EOR	r7, r7, r9\n\t"
        "STM	r3!, {r4, r5}\n\t"
        "STM	r12!, {r6, r7}\n\t"
        "LDM	r3, {r4, r5}\n\t"
        "LDM	r12, {r6, r7}\n\t"
        "EOR	r8, r4, r6\n\t"
        "EOR	r9, r5, r7\n\t"
        "AND	r8, r8, %[n]\n\t"
        "AND	r9, r9, %[n]\n\t"
        "EOR	r4, r4, r8\n\t"
        "EOR	r5, r5, r9\n\t"
        "EOR	r6, r6, r8\n\t"
        "EOR	r7, r7, r9\n\t"
        "STM	r3!, {r4, r5}\n\t"
        "STM	r12!, {r6, r7}\n\t"
        "LDM	r3, {r4, r5}\n\t"
        "LDM	r12, {r6, r7}\n\t"
        "EOR	r8, r4, r6\n\t"
        "EOR	r9, r5, r7\n\t"
        "AND	r8, r8, %[n]\n\t"
        "AND	r9, r9, %[n]\n\t"
        "EOR	r4, r4, r8\n\t"
        "EOR	r5, r5, r9\n\t"
        "EOR	r6, r6, r8\n\t"
        "EOR	r7, r7, r9\n\t"
        "STM	r3!, {r4, r5}\n\t"
        "STM	r12!, {r6, r7}\n\t"
        "LDR	%[n], [sp, #172]\n\t"
        /* Conditional Swap */
        "RSB	%[n], %[n], #0x0\n\t"
        "MOV	r3, sp\n\t"
        "ADD	r12, sp, #0x20\n\t"
        "LDM	r3, {r4, r5}\n\t"
        "LDM	r12, {r6, r7}\n\t"
        "EOR	r8, r4, r6\n\t"
        "EOR	r9, r5, r7\n\t"
        "AND	r8, r8, %[n]\n\t"
        "AND	r9, r9, %[n]\n\t"
        "EOR	r4, r4, r8\n\t"
        "EOR	r5, r5, r9\n\t"
        "EOR	r6, r6, r8\n\t"
        "EOR	r7, r7, r9\n\t"
        "STM	r3!, {r4, r5}\n\t"
        "STM	r12!, {r6, r7}\n\t"
        "LDM	r3, {r4, r5}\n\t"
        "LDM	r12, {r6, r7}\n\t"
        "EOR	r8, r4, r6\n\t"
        "EOR	r9, r5, r7\n\t"
        "AND	r8, r8, %[n]\n\t"
        "AND	r9, r9, %[n]\n\t"
        "EOR	r4, r4, r8\n\t"
        "EOR	r5, r5, r9\n\t"
        "EOR	r6, r6, r8\n\t"
        "EOR	r7, r7, r9\n\t"
        "STM	r3!, {r4, r5}\n\t"
        "STM	r12!, {r6, r7}\n\t"
        "LDM	r3, {r4, r5}\n\t"
        "LDM	r12, {r6, r7}\n\t"
        "EOR	r8, r4, r6\n\t"
        "EOR	r9, r5, r7\n\t"
        "AND	r8, r8, %[n]\n\t"
        "AND	r9, r9, %[n]\n\t"
        "EOR	r4, r4, r8\n\t"
        "EOR	r5, r5, r9\n\t"
        "EOR	r6, r6, r8\n\t"
        "EOR	r7, r7, r9\n\t"
        "STM	r3!, {r4, r5}\n\t"
        "STM	r12!, {r6, r7}\n\t"
        "LDM	r3, {r4, r5}\n\t"
        "LDM	r12, {r6, r7}\n\t"
        "EOR	r8, r4, r6\n\t"
        "EOR	r9, r5, r7\n\t"
        "AND	r8, r8, %[n]\n\t"
        "AND	r9, r9, %[n]\n\t"
        "EOR	r4, r4, r8\n\t"
        "EOR	r5, r5, r9\n\t"
        "EOR	r6, r6, r8\n\t"
        "EOR	r7, r7, r9\n\t"
        "STM	r3!, {r4, r5}\n\t"
        "STM	r12!, {r6, r7}\n\t"
        "LDR	%[n], [sp, #184]\n\t"
        "STR	%[n], [sp, #172]\n\t"
        "MOV	r3, sp\n\t"
        "LDR	r2, [sp, #160]\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "LDR	r0, [sp, #160]\n\t"
        "BL	fe_add_sub_op\n\t"
        "ADD	r3, sp, #0x20\n\t"
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_add_sub_op\n\t"
        "LDR	r2, [sp, #160]\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x80\n\t"
        "MOV	r1, sp\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "BL	fe_sq_op\n\t"
        "LDR	r1, [sp, #160]\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r3, sp\n\t"
        "ADD	r2, sp, #0x20\n\t"
        "MOV	r1, sp\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_add_sub_op\n\t"
        "ADD	r2, sp, #0x80\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "LDR	r0, [sp, #160]\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x80\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sub_op\n\t"
        "MOV	r1, sp\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul121666\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "BL	fe_add_op\n\t"
        "MOV	r2, sp\n\t"
        "LDR	r1, [sp, #168]\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x80\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	%[a], [sp, #176]\n\t"
        "LDR	%[n], [sp, #180]\n\t"
        "SUBS	%[n], %[n], #0x1\n\t"
        "STR	%[n], [sp, #180]\n\t"
#ifdef __GNUC__
        "BGE	L_curve25519_bits%=\n\t"
#else
        "BGE.W	L_curve25519_bits%=\n\t"
#endif
        "MOV	%[n], #0x1f\n\t"
        "STR	%[n], [sp, #180]\n\t"
        "SUBS	%[a], %[a], #0x4\n\t"
        "STR	%[a], [sp, #176]\n\t"
#ifdef __GNUC__
        "BGE	L_curve25519_words%=\n\t"
#else
        "BGE.W	L_curve25519_words%=\n\t"
#endif
        /* Invert */
        "ADD	r1, sp, #0x0\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x0\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r2, sp, #0x60\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x4\n\t"
        "\n"
    "L_curve25519_inv_1%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_1%=\n\t"
#else
        "BNE.N	L_curve25519_inv_1%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x9\n\t"
        "\n"
    "L_curve25519_inv_2%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_2%=\n\t"
#else
        "BNE.N	L_curve25519_inv_2%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x13\n\t"
        "\n"
    "L_curve25519_inv_3%=:\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_3%=\n\t"
#else
        "BNE.N	L_curve25519_inv_3%=\n\t"
#endif
        "ADD	r2, sp, #0x60\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0xa\n\t"
        "\n"
    "L_curve25519_inv_4%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_4%=\n\t"
#else
        "BNE.N	L_curve25519_inv_4%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x31\n\t"
        "\n"
    "L_curve25519_inv_5%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_5%=\n\t"
#else
        "BNE.N	L_curve25519_inv_5%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x63\n\t"
        "\n"
    "L_curve25519_inv_6%=:\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_6%=\n\t"
#else
        "BNE.N	L_curve25519_inv_6%=\n\t"
#endif
        "ADD	r2, sp, #0x60\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0x32\n\t"
        "\n"
    "L_curve25519_inv_7%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_7%=\n\t"
#else
        "BNE.N	L_curve25519_inv_7%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0x5\n\t"
        "\n"
    "L_curve25519_inv_8%=:\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_8%=\n\t"
#else
        "BNE.N	L_curve25519_inv_8%=\n\t"
#endif
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x0\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r2, sp\n\t"
        "LDR	r1, [sp, #160]\n\t"
        "LDR	r0, [sp, #160]\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r0, #0x0\n\t"
        "ADD	sp, sp, #0xbc\n\t"
        : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr", "cc"
    );
    return (uint32_t)(size_t)r;
}

#else
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
#else
int curve25519(byte* r, const byte* n, const byte* a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register byte* r __asm__ ("r0") = (byte*)r_p;
    register const byte* n __asm__ ("r1") = (const byte*)n_p;
    register const byte* a __asm__ ("r2") = (const byte*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0xc0\n\t"
        "STR	%[r], [sp, #176]\n\t"
        "STR	%[n], [sp, #160]\n\t"
        "STR	%[a], [sp, #172]\n\t"
        "ADD	r5, sp, #0x40\n\t"
        "ADD	r4, sp, #0x20\n\t"
        "STR	sp, [sp, #184]\n\t"
        "STR	r5, [sp, #180]\n\t"
        "STR	r4, [sp, #188]\n\t"
        "MOV	%[n], #0x0\n\t"
        "STR	%[n], [sp, #164]\n\t"
        "MOV	r4, #0x1\n\t"
        "MOV	r5, #0x0\n\t"
        "MOV	r6, #0x0\n\t"
        "MOV	r7, #0x0\n\t"
        "MOV	r8, #0x0\n\t"
        "MOV	r9, #0x0\n\t"
        "MOV	r10, #0x0\n\t"
        "MOV	r11, #0x0\n\t"
        "STM	%[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "ADD	r3, sp, #0x20\n\t"
        "STM	r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "MOV	r4, #0x0\n\t"
        "MOV	r3, sp\n\t"
        "STM	r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "ADD	r3, sp, #0x40\n\t"
        /* Copy */
        "LDM	r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "STM	r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "MOV	%[a], #0xfe\n\t"
        "\n"
    "L_curve25519_bits%=:\n\t"
        "STR	%[a], [sp, #168]\n\t"
        "LDR	%[n], [sp, #160]\n\t"
        "AND	r4, %[a], #0x1f\n\t"
        "LSR	%[a], %[a], #5\n\t"
        "LDR	%[a], [%[n], r2, LSL #2]\n\t"
        "RSB	r4, r4, #0x1f\n\t"
        "LSL	%[a], %[a], r4\n\t"
        "LDR	%[n], [sp, #164]\n\t"
        "EOR	%[n], %[n], %[a]\n\t"
        "ASR	%[n], %[n], #31\n\t"
        "STR	%[a], [sp, #164]\n\t"
        /* Conditional Swap */
        "ADD	r11, sp, #0xb0\n\t"
        "LDM	r11, {r4, r5, r6, r7}\n\t"
        "EOR	r8, r4, r5\n\t"
        "EOR	r9, r6, r7\n\t"
        "AND	r8, r8, %[n]\n\t"
        "AND	r9, r9, %[n]\n\t"
        "EOR	r4, r4, r8\n\t"
        "EOR	r5, r5, r8\n\t"
        "EOR	r6, r6, r9\n\t"
        "EOR	r7, r7, r9\n\t"
        "STM	r11, {r4, r5, r6, r7}\n\t"
        /* Ladder step */
        "LDR	r3, [sp, #184]\n\t"
        "LDR	r2, [sp, #176]\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "LDR	r0, [sp, #176]\n\t"
        "BL	fe_add_sub_op\n\t"
        "LDR	r3, [sp, #188]\n\t"
        "LDR	r2, [sp, #180]\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "LDR	r0, [sp, #184]\n\t"
        "BL	fe_add_sub_op\n\t"
        "LDR	r2, [sp, #176]\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "LDR	r0, [sp, #188]\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x80\n\t"
        "LDR	r1, [sp, #184]\n\t"
        "LDR	r0, [sp, #184]\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "LDR	r1, [sp, #176]\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "BL	fe_sq_op\n\t"
        "LDR	r3, [sp, #184]\n\t"
        "LDR	r2, [sp, #188]\n\t"
        "LDR	r1, [sp, #184]\n\t"
        "LDR	r0, [sp, #180]\n\t"
        "BL	fe_add_sub_op\n\t"
        "ADD	r2, sp, #0x60\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "LDR	r0, [sp, #176]\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x60\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "BL	fe_sub_op\n\t"
        "LDR	r1, [sp, #184]\n\t"
        "LDR	r0, [sp, #184]\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "LDR	r0, [sp, #188]\n\t"
        "BL	fe_mul121666\n\t"
        "LDR	r1, [sp, #180]\n\t"
        "LDR	r0, [sp, #180]\n\t"
        "BL	fe_sq_op\n\t"
        "LDR	r2, [sp, #188]\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_add_op\n\t"
        "LDR	r2, [sp, #184]\n\t"
        "LDR	r1, [sp, #172]\n\t"
        "LDR	r0, [sp, #188]\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x60\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "LDR	r0, [sp, #184]\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	%[a], [sp, #168]\n\t"
        "SUBS	%[a], %[a], #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BGE	L_curve25519_bits%=\n\t"
#else
        "BGE.N	L_curve25519_bits%=\n\t"
#endif
        /*   Cycle Count: 171 */
        "LDR	%[n], [sp, #184]\n\t"
        /* Copy */
        "LDM	r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "STM	sp, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        /* Invert */
        "ADD	r1, sp, #0x0\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x0\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r2, sp, #0x60\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x4\n\t"
        "\n"
    "L_curve25519_inv_1%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_1%=\n\t"
#else
        "BNE.N	L_curve25519_inv_1%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x9\n\t"
        "\n"
    "L_curve25519_inv_2%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_2%=\n\t"
#else
        "BNE.N	L_curve25519_inv_2%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x13\n\t"
        "\n"
    "L_curve25519_inv_3%=:\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_3%=\n\t"
#else
        "BNE.N	L_curve25519_inv_3%=\n\t"
#endif
        "ADD	r2, sp, #0x60\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0xa\n\t"
        "\n"
    "L_curve25519_inv_4%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_4%=\n\t"
#else
        "BNE.N	L_curve25519_inv_4%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x31\n\t"
        "\n"
    "L_curve25519_inv_5%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_5%=\n\t"
#else
        "BNE.N	L_curve25519_inv_5%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x63\n\t"
        "\n"
    "L_curve25519_inv_6%=:\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x80\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_6%=\n\t"
#else
        "BNE.N	L_curve25519_inv_6%=\n\t"
#endif
        "ADD	r2, sp, #0x60\n\t"
        "ADD	r1, sp, #0x80\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0x32\n\t"
        "\n"
    "L_curve25519_inv_7%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_7%=\n\t"
#else
        "BNE.N	L_curve25519_inv_7%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0x5\n\t"
        "\n"
    "L_curve25519_inv_8%=:\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_curve25519_inv_8%=\n\t"
#else
        "BNE.N	L_curve25519_inv_8%=\n\t"
#endif
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x0\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r2, [sp, #184]\n\t"
        "LDR	r1, [sp, #176]\n\t"
        "LDR	r0, [sp, #176]\n\t"
        "BL	fe_mul_op\n\t"
        /* Ensure result is less than modulus */
        "LDR	%[r], [sp, #176]\n\t"
        "LDM	%[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "MOV	%[a], #0x13\n\t"
        "AND	%[a], %[a], r11, ASR #31\n\t"
        "ADDS	r4, r4, %[a]\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "BFC	r11, #31, #1\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "STM	%[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "MOV	r0, #0x0\n\t"
        "ADD	sp, sp, #0xc0\n\t"
        : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr", "cc"
    );
    return (uint32_t)(size_t)r;
}

#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_CURVE25519 */
#ifdef HAVE_ED25519
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_invert(fe r_p, const fe a_p)
#else
void fe_invert(fe r, const fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x88\n\t"
        /* Invert */
        "STR	%[r], [sp, #128]\n\t"
        "STR	%[a], [sp, #132]\n\t"
        "LDR	r1, [sp, #132]\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r1, sp\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r2, sp, #0x20\n\t"
        "LDR	r1, [sp, #132]\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x20\n\t"
        "MOV	r1, sp\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r1, sp\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x4\n\t"
        "\n"
    "L_fe_invert1%=:\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_invert1%=\n\t"
#else
        "BNE.N	L_fe_invert1%=\n\t"
#endif
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x9\n\t"
        "\n"
    "L_fe_invert2%=:\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_invert2%=\n\t"
#else
        "BNE.N	L_fe_invert2%=\n\t"
#endif
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x13\n\t"
        "\n"
    "L_fe_invert3%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_invert3%=\n\t"
#else
        "BNE.N	L_fe_invert3%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0xa\n\t"
        "\n"
    "L_fe_invert4%=:\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_invert4%=\n\t"
#else
        "BNE.N	L_fe_invert4%=\n\t"
#endif
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x31\n\t"
        "\n"
    "L_fe_invert5%=:\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_invert5%=\n\t"
#else
        "BNE.N	L_fe_invert5%=\n\t"
#endif
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x63\n\t"
        "\n"
    "L_fe_invert6%=:\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x60\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_invert6%=\n\t"
#else
        "BNE.N	L_fe_invert6%=\n\t"
#endif
        "ADD	r2, sp, #0x40\n\t"
        "ADD	r1, sp, #0x60\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0x32\n\t"
        "\n"
    "L_fe_invert7%=:\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_invert7%=\n\t"
#else
        "BNE.N	L_fe_invert7%=\n\t"
#endif
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0x5\n\t"
        "\n"
    "L_fe_invert8%=:\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_invert8%=\n\t"
#else
        "BNE.N	L_fe_invert8%=\n\t"
#endif
        "MOV	r2, sp\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "LDR	r0, [sp, #128]\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	%[a], [sp, #132]\n\t"
        "LDR	%[r], [sp, #128]\n\t"
        "ADD	sp, sp, #0x88\n\t"
        : [r] "+r" (r), [a] "+r" (a)
        :
        : "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc"
    );
}

#ifdef WOLFSSL_SP_NO_UMAAL
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sq2(fe r_p, const fe a_p)
#else
void fe_sq2(fe r, const fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x44\n\t"
        "STR	r0, [sp, #64]\n\t"
        /* Square * 2 */
        "MOV	r0, #0x0\n\t"
        "LDR	r12, [r1]\n\t"
        /* A[0] * A[1] */
        "LDR	lr, [r1, #4]\n\t"
        "UMULL	r4, r5, r12, lr\n\t"
        /* A[0] * A[3] */
        "LDR	lr, [r1, #12]\n\t"
        "UMULL	r6, r7, r12, lr\n\t"
        /* A[0] * A[5] */
        "LDR	lr, [r1, #20]\n\t"
        "UMULL	r8, r9, r12, lr\n\t"
        /* A[0] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "UMULL	r10, r3, r12, lr\n\t"
        /* A[0] * A[2] */
        "LDR	lr, [r1, #8]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[0] * A[4] */
        "LDR	lr, [r1, #16]\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[0] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "STR	r4, [sp, #4]\n\t"
        "STR	r5, [sp, #8]\n\t"
        /* A[1] * A[2] */
        "LDR	r12, [r1, #4]\n\t"
        "LDR	lr, [r1, #8]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "STR	r6, [sp, #12]\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[1] * A[3] */
        "LDR	lr, [r1, #12]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "STR	r7, [sp, #16]\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[1] * A[4] */
        "LDR	lr, [r1, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[1] * A[5] */
        "LDR	lr, [r1, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[1] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[1] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r4, r0, #0x0\n\t"
        "UMLAL	r3, r4, r12, lr\n\t"
        /* A[2] * A[3] */
        "LDR	r12, [r1, #8]\n\t"
        "LDR	lr, [r1, #12]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "STR	r8, [sp, #20]\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[2] * A[4] */
        "LDR	lr, [r1, #16]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "STR	r9, [sp, #24]\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[2] * A[5] */
        "LDR	lr, [r1, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[2] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[2] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r5, r0, #0x0\n\t"
        "UMLAL	r4, r5, r12, lr\n\t"
        /* A[3] * A[4] */
        "LDR	r12, [r1, #12]\n\t"
        "LDR	lr, [r1, #16]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "STR	r10, [sp, #28]\n\t"
        "ADDS	r3, r3, r11\n\t"
        /* A[3] * A[5] */
        "LDR	lr, [r1, #20]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[3] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[3] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r6, r0, #0x0\n\t"
        "UMLAL	r5, r6, r12, lr\n\t"
        /* A[4] * A[5] */
        "LDR	r12, [r1, #16]\n\t"
        "LDR	lr, [r1, #20]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[4] * A[6] */
        "LDR	lr, [r1, #24]\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[4] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r7, r0, #0x0\n\t"
        "UMLAL	r6, r7, r12, lr\n\t"
        /* A[5] * A[6] */
        "LDR	r12, [r1, #20]\n\t"
        "LDR	lr, [r1, #24]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[5] * A[7] */
        "LDR	lr, [r1, #28]\n\t"
        "ADC	r8, r0, #0x0\n\t"
        "UMLAL	r7, r8, r12, lr\n\t"
        /* A[6] * A[7] */
        "LDR	r12, [r1, #24]\n\t"
        "LDR	lr, [r1, #28]\n\t"
        "MOV	r9, #0x0\n\t"
        "UMLAL	r8, r9, r12, lr\n\t"
        "ADD	lr, sp, #0x20\n\t"
        "STM	lr, {r3, r4, r5, r6, r7, r8, r9}\n\t"
        "ADD	lr, sp, #0x4\n\t"
        "LDM	lr, {r4, r5, r6, r7, r8, r9, r10}\n\t"
        "ADDS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADCS	r7, r7, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "ADCS	r9, r9, r9\n\t"
        "ADCS	r10, r10, r10\n\t"
        "STM	lr!, {r4, r5, r6, r7, r8, r9, r10}\n\t"
        "LDM	lr, {r3, r4, r5, r6, r7, r8, r9}\n\t"
        "ADCS	r3, r3, r3\n\t"
        "ADCS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADCS	r7, r7, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "ADCS	r9, r9, r9\n\t"
        "ADC	r10, r0, #0x0\n\t"
        "STM	lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t"
        "ADD	lr, sp, #0x4\n\t"
        "LDM	lr, {r4, r5, r6, r7, r8, r9, r10}\n\t"
        "MOV	lr, sp\n\t"
        /* A[0] * A[0] */
        "LDR	r12, [r1]\n\t"
        "UMULL	r3, r11, r12, r12\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[1] * A[1] */
        "LDR	r12, [r1, #4]\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, r12\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[2] * A[2] */
        "LDR	r12, [r1, #8]\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, r12\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[3] * A[3] */
        "LDR	r12, [r1, #12]\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r9, r11, r12, r12\n\t"
        "ADDS	r10, r10, r11\n\t"
        "STM	lr!, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t"
        "LDM	lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t"
        /* A[4] * A[4] */
        "LDR	r12, [r1, #16]\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r12, r12\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[5] * A[5] */
        "LDR	r12, [r1, #20]\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r12, r12\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[6] * A[6] */
        "LDR	r12, [r1, #24]\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r12, r12\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[7] * A[7] */
        "LDR	r12, [r1, #28]\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMLAL	r9, r10, r12, r12\n\t"
        /* Reduce */
        "LDR	r2, [sp, #28]\n\t"
        "MOV	lr, sp\n\t"
        "MOV	r12, #0x26\n\t"
        "UMULL	r10, r11, r10, r12\n\t"
        "ADDS	r10, r10, r2\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "MOV	r12, #0x13\n\t"
        "LSL	r11, r11, #1\n\t"
        "ORR	r11, r11, r10, LSR #31\n\t"
        "MUL	r11, r11, r12\n\t"
        "LDM	lr!, {r1, r2}\n\t"
        "MOV	r12, #0x26\n\t"
        "ADDS	r1, r1, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r1, r11, r3, r12\n\t"
        "ADDS	r2, r2, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r2, r11, r4, r12\n\t"
        "LDM	lr!, {r3, r4}\n\t"
        "ADDS	r3, r3, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r3, r11, r5, r12\n\t"
        "ADDS	r4, r4, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r4, r11, r6, r12\n\t"
        "LDM	lr!, {r5, r6}\n\t"
        "ADDS	r5, r5, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r5, r11, r7, r12\n\t"
        "ADDS	r6, r6, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r6, r11, r8, r12\n\t"
        "LDM	lr!, {r7, r8}\n\t"
        "ADDS	r7, r7, r11\n\t"
        "ADC	r11, r0, #0x0\n\t"
        "UMLAL	r7, r11, r9, r12\n\t"
        "BFC	r10, #31, #1\n\t"
        "ADDS	r8, r10, r11\n\t"
        /* Reduce if top bit set */
        "MOV	r12, #0x13\n\t"
        "AND	r11, r12, r8, ASR #31\n\t"
        "ADDS	r1, r1, r11\n\t"
        "ADCS	r2, r2, #0x0\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "BFC	r8, #31, #1\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADC	r8, r8, #0x0\n\t"
        /* Double */
        "ADDS	r1, r1, r1\n\t"
        "ADCS	r2, r2, r2\n\t"
        "ADCS	r3, r3, r3\n\t"
        "ADCS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADCS	r7, r7, r7\n\t"
        "ADC	r8, r8, r8\n\t"
        /* Reduce if top bit set */
        "MOV	r12, #0x13\n\t"
        "AND	r11, r12, r8, ASR #31\n\t"
        "ADDS	r1, r1, r11\n\t"
        "ADCS	r2, r2, #0x0\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "BFC	r8, #31, #1\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADC	r8, r8, #0x0\n\t"
        /* Store */
        "LDR	r0, [sp, #64]\n\t"
        "STM	r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t"
        "ADD	sp, sp, #0x44\n\t"
        : [r] "+r" (r), [a] "+r" (a)
        :
        : "memory", "lr", "cc"
    );
}

#else
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_sq2(fe r_p, const fe a_p)
#else
void fe_sq2(fe r, const fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x24\n\t"
        "STRD	r0, r1, [sp, #28]\n\t"
        "LDM	r1, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
        /* Square * 2 */
        "UMULL	r9, r10, r0, r0\n\t"
        "UMULL	r11, r12, r0, r1\n\t"
        "ADDS	r11, r11, r11\n\t"
        "MOV	lr, #0x0\n\t"
        "UMAAL	r10, r11, lr, lr\n\t"
        "STM	sp, {r9, r10}\n\t"
        "MOV	r8, lr\n\t"
        "UMAAL	r8, r12, r0, r2\n\t"
        "ADCS	r8, r8, r8\n\t"
        "UMAAL	r8, r11, r1, r1\n\t"
        "UMULL	r9, r10, r0, r3\n\t"
        "UMAAL	r9, r12, r1, r2\n\t"
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r11, lr, lr\n\t"
        "STRD	r8, r9, [sp, #8]\n\t"
        "MOV	r9, lr\n\t"
        "UMAAL	r9, r10, r0, r4\n\t"
        "UMAAL	r9, r12, r1, r3\n\t"
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r11, r2, r2\n\t"
        "STR	r9, [sp, #16]\n\t"
        "UMULL	r9, r8, r0, r5\n\t"
        "UMAAL	r9, r12, r1, r4\n\t"
        "UMAAL	r9, r10, r2, r3\n\t"
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r11, lr, lr\n\t"
        "STR	r9, [sp, #20]\n\t"
        "MOV	r9, lr\n\t"
        "UMAAL	r9, r8, r0, r6\n\t"
        "UMAAL	r9, r12, r1, r5\n\t"
        "UMAAL	r9, r10, r2, r4\n\t"
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r11, r3, r3\n\t"
        "STR	r9, [sp, #24]\n\t"
        "UMULL	r0, r9, r0, r7\n\t"
        "UMAAL	r0, r8, r1, r6\n\t"
        "UMAAL	r0, r12, r2, r5\n\t"
        "UMAAL	r0, r10, r3, r4\n\t"
        "ADCS	r0, r0, r0\n\t"
        "UMAAL	r0, r11, lr, lr\n\t"
        /* R[7] = r0 */
        "UMAAL	r9, r8, r1, r7\n\t"
        "UMAAL	r9, r10, r2, r6\n\t"
        "UMAAL	r12, r9, r3, r5\n\t"
        "ADCS	r12, r12, r12\n\t"
        "UMAAL	r12, r11, r4, r4\n\t"
        /* R[8] = r12 */
        "UMAAL	r9, r8, r2, r7\n\t"
        "UMAAL	r10, r9, r3, r6\n\t"
        "MOV	r2, lr\n\t"
        "UMAAL	r10, r2, r4, r5\n\t"
        "ADCS	r10, r10, r10\n\t"
        "UMAAL	r11, r10, lr, lr\n\t"
        /* R[9] = r11 */
        "UMAAL	r2, r8, r3, r7\n\t"
        "UMAAL	r2, r9, r4, r6\n\t"
        "ADCS	r3, r2, r2\n\t"
        "UMAAL	r10, r3, r5, r5\n\t"
        /* R[10] = r10 */
        "MOV	r1, lr\n\t"
        "UMAAL	r1, r8, r4, r7\n\t"
        "UMAAL	r1, r9, r5, r6\n\t"
        "ADCS	r4, r1, r1\n\t"
        "UMAAL	r3, r4, lr, lr\n\t"
        /* R[11] = r3 */
        "UMAAL	r8, r9, r5, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "UMAAL	r4, r8, r6, r6\n\t"
        /* R[12] = r4 */
        "MOV	r5, lr\n\t"
        "UMAAL	r5, r9, r6, r7\n\t"
        "ADCS	r5, r5, r5\n\t"
        "UMAAL	r8, r5, lr, lr\n\t"
        /* R[13] = r8 */
        "ADCS	r9, r9, r9\n\t"
        "UMAAL	r9, r5, r7, r7\n\t"
        "ADCS	r7, r5, lr\n\t"
        /* R[14] = r9 */
        /* R[15] = r7 */
        /* Reduce */
        "MOV	r6, #0x25\n\t"
        "UMAAL	r7, r0, r7, r6\n\t"
        "MOV	r6, #0x13\n\t"
        "LSL	r0, r0, #1\n\t"
        "ORR	r0, r0, r7, LSR #31\n\t"
        "MUL	lr, r0, r6\n\t"
        "POP	{r0, r1}\n\t"
        "MOV	r6, #0x26\n\t"
        "UMAAL	r0, lr, r12, r6\n\t"
        "UMAAL	r1, lr, r11, r6\n\t"
        "MOV	r12, r3\n\t"
        "MOV	r11, r4\n\t"
        "POP	{r2, r3, r4}\n\t"
        "UMAAL	r2, lr, r10, r6\n\t"
        "UMAAL	r3, lr, r12, r6\n\t"
        "UMAAL	r4, lr, r11, r6\n\t"
        "MOV	r12, r6\n\t"
        "POP	{r5, r6}\n\t"
        "UMAAL	r5, lr, r8, r12\n\t"
        "BFC	r7, #31, #1\n\t"
        "UMAAL	r6, lr, r9, r12\n\t"
        "ADD	r7, r7, lr\n\t"
        /* Reduce if top bit set */
        "MOV	r11, #0x13\n\t"
        "AND	r12, r11, r7, ASR #31\n\t"
        "ADDS	r0, r0, r12\n\t"
        "ADCS	r1, r1, #0x0\n\t"
        "ADCS	r2, r2, #0x0\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "BFC	r7, #31, #1\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADC	r7, r7, #0x0\n\t"
        /* Double */
        "ADDS	r0, r0, r0\n\t"
        "ADCS	r1, r1, r1\n\t"
        "ADCS	r2, r2, r2\n\t"
        "ADCS	r3, r3, r3\n\t"
        "ADCS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADC	r7, r7, r7\n\t"
        /* Reduce if top bit set */
        "MOV	r11, #0x13\n\t"
        "AND	r12, r11, r7, ASR #31\n\t"
        "ADDS	r0, r0, r12\n\t"
        "ADCS	r1, r1, #0x0\n\t"
        "ADCS	r2, r2, #0x0\n\t"
        "ADCS	r3, r3, #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "BFC	r7, #31, #1\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADC	r7, r7, #0x0\n\t"
        "POP	{r12, lr}\n\t"
        /* Store */
        "STM	r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
        "MOV	r0, r12\n\t"
        "MOV	r1, lr\n\t"
        : [r] "+r" (r), [a] "+r" (a)
        :
        : "memory", "lr", "cc"
    );
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void fe_pow22523(fe r_p, const fe a_p)
#else
void fe_pow22523(fe r, const fe a)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register sword32* r __asm__ ("r0") = (sword32*)r_p;
    register const sword32* a __asm__ ("r1") = (const sword32*)a_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x68\n\t"
        /* pow22523 */
        "STR	%[r], [sp, #96]\n\t"
        "STR	%[a], [sp, #100]\n\t"
        "LDR	r1, [sp, #100]\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r1, sp\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_sq_op\n\t"
        "ADD	r2, sp, #0x20\n\t"
        "LDR	r1, [sp, #100]\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r2, sp, #0x20\n\t"
        "MOV	r1, sp\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r1, sp\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r2, sp\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r1, sp\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x4\n\t"
        "\n"
    "L_fe_pow22523_1%=:\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_pow22523_1%=\n\t"
#else
        "BNE.N	L_fe_pow22523_1%=\n\t"
#endif
        "MOV	r2, sp\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r1, sp\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x9\n\t"
        "\n"
    "L_fe_pow22523_2%=:\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_pow22523_2%=\n\t"
#else
        "BNE.N	L_fe_pow22523_2%=\n\t"
#endif
        "MOV	r2, sp\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x13\n\t"
        "\n"
    "L_fe_pow22523_3%=:\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_pow22523_3%=\n\t"
#else
        "BNE.N	L_fe_pow22523_3%=\n\t"
#endif
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0xa\n\t"
        "\n"
    "L_fe_pow22523_4%=:\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_pow22523_4%=\n\t"
#else
        "BNE.N	L_fe_pow22523_4%=\n\t"
#endif
        "MOV	r2, sp\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r1, sp\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x31\n\t"
        "\n"
    "L_fe_pow22523_5%=:\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_pow22523_5%=\n\t"
#else
        "BNE.N	L_fe_pow22523_5%=\n\t"
#endif
        "MOV	r2, sp\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "MOV	r12, #0x63\n\t"
        "\n"
    "L_fe_pow22523_6%=:\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x40\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_pow22523_6%=\n\t"
#else
        "BNE.N	L_fe_pow22523_6%=\n\t"
#endif
        "ADD	r2, sp, #0x20\n\t"
        "ADD	r1, sp, #0x40\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0x32\n\t"
        "\n"
    "L_fe_pow22523_7%=:\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "ADD	r0, sp, #0x20\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_pow22523_7%=\n\t"
#else
        "BNE.N	L_fe_pow22523_7%=\n\t"
#endif
        "MOV	r2, sp\n\t"
        "ADD	r1, sp, #0x20\n\t"
        "MOV	r0, sp\n\t"
        "BL	fe_mul_op\n\t"
        "MOV	r12, #0x2\n\t"
        "\n"
    "L_fe_pow22523_8%=:\n\t"
        "MOV	r1, sp\n\t"
        "MOV	r0, sp\n\t"
        "PUSH	{r12}\n\t"
        "BL	fe_sq_op\n\t"
        "POP	{r12}\n\t"
        "SUBS	r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
        "BNE	L_fe_pow22523_8%=\n\t"
#else
        "BNE.N	L_fe_pow22523_8%=\n\t"
#endif
        "LDR	r2, [sp, #100]\n\t"
        "MOV	r1, sp\n\t"
        "LDR	r0, [sp, #96]\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	%[a], [sp, #100]\n\t"
        "LDR	%[r], [sp, #96]\n\t"
        "ADD	sp, sp, #0x68\n\t"
        : [r] "+r" (r), [a] "+r" (a)
        :
        : "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
#else
void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register ge_p2 * r __asm__ ("r0") = (ge_p2 *)r_p;
    register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x8\n\t"
        "STR	%[r], [sp]\n\t"
        "STR	%[p], [sp, #4]\n\t"
        "ADD	r2, r1, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r2, r1, #0x40\n\t"
        "ADD	r1, r1, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r2, r1, #0x60\n\t"
        "ADD	r1, r1, #0x40\n\t"
        "ADD	r0, r0, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	sp, sp, #0x8\n\t"
        : [r] "+r" (r), [p] "+r" (p)
        :
        : "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
#else
void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register ge_p3 * r __asm__ ("r0") = (ge_p3 *)r_p;
    register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x8\n\t"
        "STR	%[r], [sp]\n\t"
        "STR	%[p], [sp, #4]\n\t"
        "ADD	r2, r1, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r2, r1, #0x40\n\t"
        "ADD	r1, r1, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r2, r1, #0x60\n\t"
        "ADD	r1, r1, #0x40\n\t"
        "ADD	r0, r0, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r2, r1, #0x20\n\t"
        "ADD	r0, r0, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "ADD	sp, sp, #0x8\n\t"
        : [r] "+r" (r), [p] "+r" (p)
        :
        : "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
#else
void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
    register const ge_p2 * p __asm__ ("r1") = (const ge_p2 *)p_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x8\n\t"
        "STR	%[r], [sp]\n\t"
        "STR	%[p], [sp, #4]\n\t"
        "BL	fe_sq_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r1, r1, #0x20\n\t"
        "ADD	r0, r0, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r2, r1, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_add_op\n\t"
        "MOV	r1, r0\n\t"
        "ADD	r0, r0, #0x40\n\t"
        "BL	fe_sq_op\n\t"
        "LDR	r0, [sp]\n\t"
        "MOV	r3, r0\n\t"
        "ADD	r2, r0, #0x40\n\t"
        "ADD	r1, r0, #0x40\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "MOV	r2, r0\n\t"
        "ADD	r1, r0, #0x40\n\t"
        "SUB	r0, r0, #0x20\n\t"
        "BL	fe_sub_op\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r1, r1, #0x40\n\t"
        "ADD	r0, r0, #0x60\n\t"
        "BL	fe_sq2\n\t"
        "SUB	r2, r0, #0x20\n\t"
        "MOV	r1, r0\n\t"
        "BL	fe_sub_op\n\t"
        "ADD	sp, sp, #0x8\n\t"
        : [r] "+r" (r), [p] "+r" (p)
        :
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
#else
void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
    register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0xc\n\t"
        "STR	%[r], [sp]\n\t"
        "STR	%[p], [sp, #4]\n\t"
        "STR	%[q], [sp, #8]\n\t"
        "MOV	r2, r1\n\t"
        "ADD	r1, r1, #0x20\n\t"
        "BL	fe_add_op\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "MOV	r2, r1\n\t"
        "ADD	r1, r1, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_sub_op\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "SUB	r1, r0, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "ADD	r2, r2, #0x20\n\t"
        "ADD	r1, r0, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #8]\n\t"
        "LDR	r2, [sp, #4]\n\t"
        "ADD	r2, r2, #0x60\n\t"
        "ADD	r1, r1, #0x40\n\t"
        "ADD	r0, r0, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "ADD	r3, r0, #0x20\n\t"
        "ADD	r2, r0, #0x40\n\t"
        "MOV	r1, r0\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r1, r1, #0x40\n\t"
        "ADD	r0, r0, #0x20\n\t"
        /* Double */
        "LDM	r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "ADDS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADCS	r7, r7, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "ADCS	r9, r9, r9\n\t"
        "ADCS	r10, r10, r10\n\t"
        "MOV	lr, #0x0\n\t"
        "ADCS	r11, r11, r11\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "MOV	r12, #0x13\n\t"
        "LSL	lr, lr, #1\n\t"
        "ORR	lr, lr, r11, LSR #31\n\t"
        "MUL	r12, lr, r12\n\t"
        "ADDS	r4, r4, r12\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "BFC	r11, #31, #1\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "STM	r0, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        /* Done Double */
        "ADD	r3, r0, #0x20\n\t"
        "ADD	r1, r0, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "ADD	sp, sp, #0xc\n\t"
        : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
#else
void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
    register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0xc\n\t"
        "STR	%[r], [sp]\n\t"
        "STR	%[p], [sp, #4]\n\t"
        "STR	%[q], [sp, #8]\n\t"
        "MOV	r2, r1\n\t"
        "ADD	r1, r1, #0x20\n\t"
        "BL	fe_add_op\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "MOV	r2, r1\n\t"
        "ADD	r1, r1, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_sub_op\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "ADD	r2, r2, #0x20\n\t"
        "SUB	r1, r0, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "ADD	r1, r0, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #8]\n\t"
        "LDR	r2, [sp, #4]\n\t"
        "ADD	r2, r2, #0x60\n\t"
        "ADD	r1, r1, #0x40\n\t"
        "ADD	r0, r0, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "ADD	r3, r0, #0x20\n\t"
        "ADD	r2, r0, #0x40\n\t"
        "MOV	r1, r0\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "ADD	r1, r1, #0x40\n\t"
        "ADD	r0, r0, #0x20\n\t"
        /* Double */
        "LDM	r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "ADDS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADCS	r7, r7, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "ADCS	r9, r9, r9\n\t"
        "ADCS	r10, r10, r10\n\t"
        "MOV	lr, #0x0\n\t"
        "ADCS	r11, r11, r11\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "MOV	r12, #0x13\n\t"
        "LSL	lr, lr, #1\n\t"
        "ORR	lr, lr, r11, LSR #31\n\t"
        "MUL	r12, lr, r12\n\t"
        "ADDS	r4, r4, r12\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "BFC	r11, #31, #1\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "STM	r0, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        /* Done Double */
        "ADD	r3, r0, #0x20\n\t"
        "MOV	r1, r0\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "ADD	sp, sp, #0xc\n\t"
        : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
#else
void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
    register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x2c\n\t"
        "STR	%[r], [sp]\n\t"
        "STR	%[p], [sp, #4]\n\t"
        "STR	%[q], [sp, #8]\n\t"
        "MOV	r3, r1\n\t"
        "ADD	r2, r1, #0x20\n\t"
        "ADD	r1, r0, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "MOV	r1, r0\n\t"
        "ADD	r0, r0, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "ADD	r2, r2, #0x20\n\t"
        "ADD	r1, r0, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #8]\n\t"
        "LDR	r2, [sp, #4]\n\t"
        "ADD	r2, r2, #0x60\n\t"
        "ADD	r1, r1, #0x60\n\t"
        "ADD	r0, r0, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "ADD	r2, r2, #0x40\n\t"
        "ADD	r1, r1, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r1, [sp]\n\t"
        "ADD	r0, sp, #0xc\n\t"
        /* Double */
        "LDM	r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "ADDS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADCS	r7, r7, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "ADCS	r9, r9, r9\n\t"
        "ADCS	r10, r10, r10\n\t"
        "MOV	lr, #0x0\n\t"
        "ADCS	r11, r11, r11\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "MOV	r12, #0x13\n\t"
        "LSL	lr, lr, #1\n\t"
        "ORR	lr, lr, r11, LSR #31\n\t"
        "MUL	r12, lr, r12\n\t"
        "ADDS	r4, r4, r12\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "BFC	r11, #31, #1\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "STM	r0, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        /* Done Double */
        "ADD	r3, r1, #0x20\n\t"
        "ADD	r2, r1, #0x40\n\t"
        "ADD	r0, r1, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "ADD	r3, r0, #0x40\n\t"
        "ADD	r2, sp, #0xc\n\t"
        "ADD	r1, r0, #0x40\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "ADD	sp, sp, #0x2c\n\t"
        : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
#else
void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p;
    register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p;
    register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x2c\n\t"
        "STR	%[r], [sp]\n\t"
        "STR	%[p], [sp, #4]\n\t"
        "STR	%[q], [sp, #8]\n\t"
        "MOV	r3, r1\n\t"
        "ADD	r2, r1, #0x20\n\t"
        "ADD	r1, r0, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "ADD	r2, r2, #0x20\n\t"
        "MOV	r1, r0\n\t"
        "ADD	r0, r0, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "ADD	r1, r0, #0x20\n\t"
        "ADD	r0, r0, #0x20\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #8]\n\t"
        "LDR	r2, [sp, #4]\n\t"
        "ADD	r2, r2, #0x60\n\t"
        "ADD	r1, r1, #0x60\n\t"
        "ADD	r0, r0, #0x60\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r0, [sp]\n\t"
        "LDR	r1, [sp, #4]\n\t"
        "LDR	r2, [sp, #8]\n\t"
        "ADD	r2, r2, #0x40\n\t"
        "ADD	r1, r1, #0x40\n\t"
        "BL	fe_mul_op\n\t"
        "LDR	r1, [sp]\n\t"
        "ADD	r0, sp, #0xc\n\t"
        /* Double */
        "LDM	r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        "ADDS	r4, r4, r4\n\t"
        "ADCS	r5, r5, r5\n\t"
        "ADCS	r6, r6, r6\n\t"
        "ADCS	r7, r7, r7\n\t"
        "ADCS	r8, r8, r8\n\t"
        "ADCS	r9, r9, r9\n\t"
        "ADCS	r10, r10, r10\n\t"
        "MOV	lr, #0x0\n\t"
        "ADCS	r11, r11, r11\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "MOV	r12, #0x13\n\t"
        "LSL	lr, lr, #1\n\t"
        "ORR	lr, lr, r11, LSR #31\n\t"
        "MUL	r12, lr, r12\n\t"
        "ADDS	r4, r4, r12\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADCS	r9, r9, #0x0\n\t"
        "BFC	r11, #31, #1\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "STM	r0, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
        /* Done Double */
        "ADD	r3, r1, #0x20\n\t"
        "ADD	r2, r1, #0x40\n\t"
        "ADD	r0, r1, #0x20\n\t"
        "BL	fe_add_sub_op\n\t"
        "ADD	r3, r0, #0x40\n\t"
        "ADD	r2, sp, #0xc\n\t"
        "ADD	r1, r0, #0x20\n\t"
        "ADD	r0, r0, #0x40\n\t"
        "BL	fe_add_sub_op\n\t"
        "ADD	sp, sp, #0x2c\n\t"
        : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#ifdef WOLFSSL_SP_NO_UMAAL
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void sc_reduce(byte* s_p)
#else
void sc_reduce(byte* s)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register byte* s __asm__ ("r0") = (byte*)s_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x38\n\t"
        "STR	%[s], [sp, #52]\n\t"
        /* Load bits 252-511 */
        "ADD	%[s], %[s], #0x1c\n\t"
        "LDM	%[s], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        "LSR	lr, r9, #24\n\t"
        "LSL	r9, r9, #4\n\t"
        "ORR	r9, r9, r8, LSR #28\n\t"
        "LSL	r8, r8, #4\n\t"
        "ORR	r8, r8, r7, LSR #28\n\t"
        "LSL	r7, r7, #4\n\t"
        "ORR	r7, r7, r6, LSR #28\n\t"
        "LSL	r6, r6, #4\n\t"
        "ORR	r6, r6, r5, LSR #28\n\t"
        "LSL	r5, r5, #4\n\t"
        "ORR	r5, r5, r4, LSR #28\n\t"
        "LSL	r4, r4, #4\n\t"
        "ORR	r4, r4, r3, LSR #28\n\t"
        "LSL	r3, r3, #4\n\t"
        "ORR	r3, r3, r2, LSR #28\n\t"
        "LSL	r2, r2, #4\n\t"
        "ORR	r2, r2, r1, LSR #28\n\t"
        "BFC	r9, #28, #4\n\t"
        "SUB	%[s], %[s], #0x1c\n\t"
        /* Add order times bits 504..511 */
        "MOV	r10, #0x2c13\n\t"
        "MOVT	r10, #0xa30a\n\t"
        "MOV	r11, #0x9ce5\n\t"
        "MOVT	r11, #0xa7ed\n\t"
        "MOV	r1, #0x0\n\t"
        "UMLAL	r2, r1, r10, lr\n\t"
        "ADDS	r3, r3, r1\n\t"
        "MOV	r1, #0x0\n\t"
        "ADC	r1, r1, #0x0\n\t"
        "UMLAL	r3, r1, r11, lr\n\t"
        "MOV	r10, #0x6329\n\t"
        "MOVT	r10, #0x5d08\n\t"
        "MOV	r11, #0x621\n\t"
        "MOVT	r11, #0xeb21\n\t"
        "ADDS	r4, r4, r1\n\t"
        "MOV	r1, #0x0\n\t"
        "ADC	r1, r1, #0x0\n\t"
        "UMLAL	r4, r1, r10, lr\n\t"
        "ADDS	r5, r5, r1\n\t"
        "MOV	r1, #0x0\n\t"
        "ADC	r1, r1, #0x0\n\t"
        "UMLAL	r5, r1, r11, lr\n\t"
        "ADDS	r6, r6, r1\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "SUBS	r6, r6, lr\n\t"
        "SBCS	r7, r7, #0x0\n\t"
        "SBCS	r8, r8, #0x0\n\t"
        "SBC	r9, r9, #0x0\n\t"
        /* Sub product of top 8 words and order */
        "MOV	r12, sp\n\t"
        "MOV	r1, #0x2c13\n\t"
        "MOVT	r1, #0xa30a\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMLAL	r10, lr, r2, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r3, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r4, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r5, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r6, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r7, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r8, r1\n\t"
        "BFC	r11, #28, #4\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r9, r1\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	r1, #0x9ce5\n\t"
        "MOVT	r1, #0xa7ed\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, r2, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r3, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r4, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r5, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r6, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r7, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r8, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r9, r1\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	r1, #0x6329\n\t"
        "MOVT	r1, #0x5d08\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, r2, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r3, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r4, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r5, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r6, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r7, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r8, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r9, r1\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	r1, #0x621\n\t"
        "MOVT	r1, #0xeb21\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, r2, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r3, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r4, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r5, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r6, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r7, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r8, r1\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r9, r1\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        /* Subtract at 4 * 32 */
        "LDM	r12, {r10, r11}\n\t"
        "SUBS	r10, r10, r2\n\t"
        "SBCS	r11, r11, r3\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r4\n\t"
        "SBCS	r11, r11, r5\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r6\n\t"
        "SBCS	r11, r11, r7\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r8\n\t"
        "SBC	r11, r11, r9\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "SUB	r12, r12, #0x24\n\t"
        "ASR	lr, r11, #25\n\t"
        /* Conditionally subtract order starting at bit 125 */
        "MOV	r1, #0xa0000000\n\t"
        "MOV	r2, #0xba7d\n\t"
        "MOVT	r2, #0x4b9e\n\t"
        "MOV	r3, #0x4c63\n\t"
        "MOVT	r3, #0xcb02\n\t"
        "MOV	r4, #0xf39a\n\t"
        "MOVT	r4, #0xd45e\n\t"
        "MOV	r5, #0xdf3b\n\t"
        "MOVT	r5, #0x29b\n\t"
        "MOV	r9, #0x2000000\n\t"
        "AND	r1, r1, lr\n\t"
        "AND	r2, r2, lr\n\t"
        "AND	r3, r3, lr\n\t"
        "AND	r4, r4, lr\n\t"
        "AND	r5, r5, lr\n\t"
        "AND	r9, r9, lr\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, r1\n\t"
        "ADCS	r11, r11, r2\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, r3\n\t"
        "ADCS	r11, r11, r4\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, r5\n\t"
        "ADCS	r11, r11, #0x0\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADCS	r11, r11, #0x0\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10}\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "STM	r12!, {r10}\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "MOV	r12, sp\n\t"
        /* Load bits 252-376 */
        "ADD	r12, r12, #0x1c\n\t"
        "LDM	r12, {r1, r2, r3, r4, r5}\n\t"
        "LSL	r5, r5, #4\n\t"
        "ORR	r5, r5, r4, LSR #28\n\t"
        "LSL	r4, r4, #4\n\t"
        "ORR	r4, r4, r3, LSR #28\n\t"
        "LSL	r3, r3, #4\n\t"
        "ORR	r3, r3, r2, LSR #28\n\t"
        "LSL	r2, r2, #4\n\t"
        "ORR	r2, r2, r1, LSR #28\n\t"
        "BFC	r5, #29, #3\n\t"
        "SUB	r12, r12, #0x1c\n\t"
        /* Sub product of top 4 words and order */
        "MOV	%[s], sp\n\t"
        /*   * -5cf5d3ed */
        "MOV	r1, #0x2c13\n\t"
        "MOVT	r1, #0xa30a\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, lr, r2, r1\n\t"
        "ADDS	r7, r7, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r7, lr, r3, r1\n\t"
        "ADDS	r8, r8, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r8, lr, r4, r1\n\t"
        "ADDS	r9, r9, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r9, lr, r5, r1\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -5812631b */
        "MOV	r1, #0x9ce5\n\t"
        "MOVT	r1, #0xa7ed\n\t"
        "MOV	r10, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r10, r2, r1\n\t"
        "ADDS	r7, r7, r10\n\t"
        "MOV	r10, #0x0\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMLAL	r7, r10, r3, r1\n\t"
        "ADDS	r8, r8, r10\n\t"
        "MOV	r10, #0x0\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMLAL	r8, r10, r4, r1\n\t"
        "ADDS	r9, r9, r10\n\t"
        "MOV	r10, #0x0\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMLAL	r9, r10, r5, r1\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -a2f79cd7 */
        "MOV	r1, #0x6329\n\t"
        "MOVT	r1, #0x5d08\n\t"
        "MOV	r11, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r11, r2, r1\n\t"
        "ADDS	r7, r7, r11\n\t"
        "MOV	r11, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "UMLAL	r7, r11, r3, r1\n\t"
        "ADDS	r8, r8, r11\n\t"
        "MOV	r11, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "UMLAL	r8, r11, r4, r1\n\t"
        "ADDS	r9, r9, r11\n\t"
        "MOV	r11, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "UMLAL	r9, r11, r5, r1\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -14def9df */
        "MOV	r1, #0x621\n\t"
        "MOVT	r1, #0xeb21\n\t"
        "MOV	r12, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r12, r2, r1\n\t"
        "ADDS	r7, r7, r12\n\t"
        "MOV	r12, #0x0\n\t"
        "ADC	r12, r12, #0x0\n\t"
        "UMLAL	r7, r12, r3, r1\n\t"
        "ADDS	r8, r8, r12\n\t"
        "MOV	r12, #0x0\n\t"
        "ADC	r12, r12, #0x0\n\t"
        "UMLAL	r8, r12, r4, r1\n\t"
        "ADDS	r9, r9, r12\n\t"
        "MOV	r12, #0x0\n\t"
        "ADC	r12, r12, #0x0\n\t"
        "UMLAL	r9, r12, r5, r1\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /* Add overflows at 4 * 32 */
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "BFC	r9, #28, #4\n\t"
        "ADDS	r6, r6, lr\n\t"
        "ADCS	r7, r7, r10\n\t"
        "ADCS	r8, r8, r11\n\t"
        "ADC	r9, r9, r12\n\t"
        /* Subtract top at 4 * 32 */
        "SUBS	r6, r6, r2\n\t"
        "SBCS	r7, r7, r3\n\t"
        "SBCS	r8, r8, r4\n\t"
        "SBCS	r9, r9, r5\n\t"
        "SBC	r1, r1, r1\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "LDM	%[s], {r2, r3, r4, r5}\n\t"
        "MOV	r10, #0xd3ed\n\t"
        "MOVT	r10, #0x5cf5\n\t"
        "MOV	r11, #0x631a\n\t"
        "MOVT	r11, #0x5812\n\t"
        "MOV	r12, #0x9cd6\n\t"
        "MOVT	r12, #0xa2f7\n\t"
        "MOV	lr, #0xf9de\n\t"
        "MOVT	lr, #0x14de\n\t"
        "AND	r10, r10, r1\n\t"
        "AND	r11, r11, r1\n\t"
        "AND	r12, r12, r1\n\t"
        "AND	lr, lr, r1\n\t"
        "ADDS	r2, r2, r10\n\t"
        "ADCS	r3, r3, r11\n\t"
        "ADCS	r4, r4, r12\n\t"
        "ADCS	r5, r5, lr\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "AND	r1, r1, #0x10000000\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, r1\n\t"
        "BFC	r9, #28, #4\n\t"
        /* Store result */
        "LDR	%[s], [sp, #52]\n\t"
        "STM	%[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        "ADD	sp, sp, #0x38\n\t"
        : [s] "+r" (s)
        :
        : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#else
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void sc_reduce(byte* s_p)
#else
void sc_reduce(byte* s)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register byte* s __asm__ ("r0") = (byte*)s_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x38\n\t"
        "STR	%[s], [sp, #52]\n\t"
        /* Load bits 252-511 */
        "ADD	%[s], %[s], #0x1c\n\t"
        "LDM	%[s], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        "LSR	lr, r9, #24\n\t"
        "LSL	r9, r9, #4\n\t"
        "ORR	r9, r9, r8, LSR #28\n\t"
        "LSL	r8, r8, #4\n\t"
        "ORR	r8, r8, r7, LSR #28\n\t"
        "LSL	r7, r7, #4\n\t"
        "ORR	r7, r7, r6, LSR #28\n\t"
        "LSL	r6, r6, #4\n\t"
        "ORR	r6, r6, r5, LSR #28\n\t"
        "LSL	r5, r5, #4\n\t"
        "ORR	r5, r5, r4, LSR #28\n\t"
        "LSL	r4, r4, #4\n\t"
        "ORR	r4, r4, r3, LSR #28\n\t"
        "LSL	r3, r3, #4\n\t"
        "ORR	r3, r3, r2, LSR #28\n\t"
        "LSL	r2, r2, #4\n\t"
        "ORR	r2, r2, r1, LSR #28\n\t"
        "BFC	r9, #28, #4\n\t"
        "SUB	%[s], %[s], #0x1c\n\t"
        /* Add order times bits 504..511 */
        "MOV	r10, #0x2c13\n\t"
        "MOVT	r10, #0xa30a\n\t"
        "MOV	r11, #0x9ce5\n\t"
        "MOVT	r11, #0xa7ed\n\t"
        "MOV	r1, #0x0\n\t"
        "UMLAL	r2, r1, r10, lr\n\t"
        "UMAAL	r3, r1, r11, lr\n\t"
        "MOV	r10, #0x6329\n\t"
        "MOVT	r10, #0x5d08\n\t"
        "MOV	r11, #0x621\n\t"
        "MOVT	r11, #0xeb21\n\t"
        "UMAAL	r4, r1, r10, lr\n\t"
        "UMAAL	r5, r1, r11, lr\n\t"
        "ADDS	r6, r6, r1\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "SUBS	r6, r6, lr\n\t"
        "SBCS	r7, r7, #0x0\n\t"
        "SBCS	r8, r8, #0x0\n\t"
        "SBC	r9, r9, #0x0\n\t"
        /* Sub product of top 8 words and order */
        "MOV	r12, sp\n\t"
        "MOV	r1, #0x2c13\n\t"
        "MOVT	r1, #0xa30a\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMLAL	r10, lr, r2, r1\n\t"
        "UMAAL	r11, lr, r3, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMAAL	r10, lr, r4, r1\n\t"
        "UMAAL	r11, lr, r5, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMAAL	r10, lr, r6, r1\n\t"
        "UMAAL	r11, lr, r7, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMAAL	r10, lr, r8, r1\n\t"
        "BFC	r11, #28, #4\n\t"
        "UMAAL	r11, lr, r9, r1\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	r1, #0x9ce5\n\t"
        "MOVT	r1, #0xa7ed\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, r2, r1\n\t"
        "UMAAL	r11, lr, r3, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r4, r1\n\t"
        "UMAAL	r11, lr, r5, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r6, r1\n\t"
        "UMAAL	r11, lr, r7, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r8, r1\n\t"
        "UMAAL	r11, lr, r9, r1\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	r1, #0x6329\n\t"
        "MOVT	r1, #0x5d08\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, r2, r1\n\t"
        "UMAAL	r11, lr, r3, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r4, r1\n\t"
        "UMAAL	r11, lr, r5, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r6, r1\n\t"
        "UMAAL	r11, lr, r7, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r8, r1\n\t"
        "UMAAL	r11, lr, r9, r1\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	r1, #0x621\n\t"
        "MOVT	r1, #0xeb21\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, r2, r1\n\t"
        "UMAAL	r11, lr, r3, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r4, r1\n\t"
        "UMAAL	r11, lr, r5, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r6, r1\n\t"
        "UMAAL	r11, lr, r7, r1\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r8, r1\n\t"
        "UMAAL	r11, lr, r9, r1\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        /* Subtract at 4 * 32 */
        "LDM	r12, {r10, r11}\n\t"
        "SUBS	r10, r10, r2\n\t"
        "SBCS	r11, r11, r3\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r4\n\t"
        "SBCS	r11, r11, r5\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r6\n\t"
        "SBCS	r11, r11, r7\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r8\n\t"
        "SBC	r11, r11, r9\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "SUB	r12, r12, #0x24\n\t"
        "ASR	lr, r11, #25\n\t"
        /* Conditionally subtract order starting at bit 125 */
        "MOV	r1, #0xa0000000\n\t"
        "MOV	r2, #0xba7d\n\t"
        "MOVT	r2, #0x4b9e\n\t"
        "MOV	r3, #0x4c63\n\t"
        "MOVT	r3, #0xcb02\n\t"
        "MOV	r4, #0xf39a\n\t"
        "MOVT	r4, #0xd45e\n\t"
        "MOV	r5, #0xdf3b\n\t"
        "MOVT	r5, #0x29b\n\t"
        "MOV	r9, #0x2000000\n\t"
        "AND	r1, r1, lr\n\t"
        "AND	r2, r2, lr\n\t"
        "AND	r3, r3, lr\n\t"
        "AND	r4, r4, lr\n\t"
        "AND	r5, r5, lr\n\t"
        "AND	r9, r9, lr\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, r1\n\t"
        "ADCS	r11, r11, r2\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, r3\n\t"
        "ADCS	r11, r11, r4\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, r5\n\t"
        "ADCS	r11, r11, #0x0\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADCS	r11, r11, #0x0\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10}\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "STM	r12!, {r10}\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "MOV	r12, sp\n\t"
        /* Load bits 252-376 */
        "ADD	r12, r12, #0x1c\n\t"
        "LDM	r12, {r1, r2, r3, r4, r5}\n\t"
        "LSL	r5, r5, #4\n\t"
        "ORR	r5, r5, r4, LSR #28\n\t"
        "LSL	r4, r4, #4\n\t"
        "ORR	r4, r4, r3, LSR #28\n\t"
        "LSL	r3, r3, #4\n\t"
        "ORR	r3, r3, r2, LSR #28\n\t"
        "LSL	r2, r2, #4\n\t"
        "ORR	r2, r2, r1, LSR #28\n\t"
        "BFC	r5, #29, #3\n\t"
        "SUB	r12, r12, #0x1c\n\t"
        /* Sub product of top 4 words and order */
        "MOV	%[s], sp\n\t"
        /*   * -5cf5d3ed */
        "MOV	r1, #0x2c13\n\t"
        "MOVT	r1, #0xa30a\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, lr, r2, r1\n\t"
        "UMAAL	r7, lr, r3, r1\n\t"
        "UMAAL	r8, lr, r4, r1\n\t"
        "UMAAL	r9, lr, r5, r1\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -5812631b */
        "MOV	r1, #0x9ce5\n\t"
        "MOVT	r1, #0xa7ed\n\t"
        "MOV	r10, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r10, r2, r1\n\t"
        "UMAAL	r7, r10, r3, r1\n\t"
        "UMAAL	r8, r10, r4, r1\n\t"
        "UMAAL	r9, r10, r5, r1\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -a2f79cd7 */
        "MOV	r1, #0x6329\n\t"
        "MOVT	r1, #0x5d08\n\t"
        "MOV	r11, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r11, r2, r1\n\t"
        "UMAAL	r7, r11, r3, r1\n\t"
        "UMAAL	r8, r11, r4, r1\n\t"
        "UMAAL	r9, r11, r5, r1\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -14def9df */
        "MOV	r1, #0x621\n\t"
        "MOVT	r1, #0xeb21\n\t"
        "MOV	r12, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r12, r2, r1\n\t"
        "UMAAL	r7, r12, r3, r1\n\t"
        "UMAAL	r8, r12, r4, r1\n\t"
        "UMAAL	r9, r12, r5, r1\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /* Add overflows at 4 * 32 */
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "BFC	r9, #28, #4\n\t"
        "ADDS	r6, r6, lr\n\t"
        "ADCS	r7, r7, r10\n\t"
        "ADCS	r8, r8, r11\n\t"
        "ADC	r9, r9, r12\n\t"
        /* Subtract top at 4 * 32 */
        "SUBS	r6, r6, r2\n\t"
        "SBCS	r7, r7, r3\n\t"
        "SBCS	r8, r8, r4\n\t"
        "SBCS	r9, r9, r5\n\t"
        "SBC	r1, r1, r1\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "LDM	%[s], {r2, r3, r4, r5}\n\t"
        "MOV	r10, #0xd3ed\n\t"
        "MOVT	r10, #0x5cf5\n\t"
        "MOV	r11, #0x631a\n\t"
        "MOVT	r11, #0x5812\n\t"
        "MOV	r12, #0x9cd6\n\t"
        "MOVT	r12, #0xa2f7\n\t"
        "MOV	lr, #0xf9de\n\t"
        "MOVT	lr, #0x14de\n\t"
        "AND	r10, r10, r1\n\t"
        "AND	r11, r11, r1\n\t"
        "AND	r12, r12, r1\n\t"
        "AND	lr, lr, r1\n\t"
        "ADDS	r2, r2, r10\n\t"
        "ADCS	r3, r3, r11\n\t"
        "ADCS	r4, r4, r12\n\t"
        "ADCS	r5, r5, lr\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "AND	r1, r1, #0x10000000\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, r1\n\t"
        "BFC	r9, #28, #4\n\t"
        /* Store result */
        "LDR	%[s], [sp, #52]\n\t"
        "STM	%[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
        "ADD	sp, sp, #0x38\n\t"
        : [s] "+r" (s)
        :
        : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#ifdef HAVE_ED25519_SIGN
#ifdef WOLFSSL_SP_NO_UMAAL
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
#else
void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register byte* s __asm__ ("r0") = (byte*)s_p;
    register const byte* a __asm__ ("r1") = (const byte*)a_p;
    register const byte* b __asm__ ("r2") = (const byte*)b_p;
    register const byte* c __asm__ ("r3") = (const byte*)c_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x50\n\t"
        "ADD	lr, sp, #0x44\n\t"
        "STM	lr, {%[s], %[a], %[c]}\n\t"
        "MOV	%[s], #0x0\n\t"
        "LDR	r12, [%[a]]\n\t"
        /* A[0] * B[0] */
        "LDR	lr, [%[b]]\n\t"
        "UMULL	%[c], r4, r12, lr\n\t"
        /* A[0] * B[2] */
        "LDR	lr, [%[b], #8]\n\t"
        "UMULL	r5, r6, r12, lr\n\t"
        /* A[0] * B[4] */
        "LDR	lr, [%[b], #16]\n\t"
        "UMULL	r7, r8, r12, lr\n\t"
        /* A[0] * B[6] */
        "LDR	lr, [%[b], #24]\n\t"
        "UMULL	r9, r10, r12, lr\n\t"
        "STR	%[c], [sp]\n\t"
        /* A[0] * B[1] */
        "LDR	lr, [%[b], #4]\n\t"
        "MOV	r11, %[s]\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[0] * B[3] */
        "LDR	lr, [%[b], #12]\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[0] * B[5] */
        "LDR	lr, [%[b], #20]\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[0] * B[7] */
        "LDR	lr, [%[b], #28]\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADC	%[c], %[s], #0x0\n\t"
        "UMLAL	r10, %[c], r12, lr\n\t"
        /* A[1] * B[0] */
        "LDR	r12, [%[a], #4]\n\t"
        "LDR	lr, [%[b]]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "STR	r4, [sp, #4]\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[1] * B[1] */
        "LDR	lr, [%[b], #4]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[1] * B[2] */
        "LDR	lr, [%[b], #8]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[1] * B[3] */
        "LDR	lr, [%[b], #12]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[1] * B[4] */
        "LDR	lr, [%[b], #16]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[1] * B[5] */
        "LDR	lr, [%[b], #20]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[1] * B[6] */
        "LDR	lr, [%[b], #24]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	%[c], %[c], r11\n\t"
        /* A[1] * B[7] */
        "LDR	lr, [%[b], #28]\n\t"
        "ADC	r4, %[s], #0x0\n\t"
        "UMLAL	%[c], r4, r12, lr\n\t"
        /* A[2] * B[0] */
        "LDR	r12, [%[a], #8]\n\t"
        "LDR	lr, [%[b]]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "STR	r5, [sp, #8]\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[2] * B[1] */
        "LDR	lr, [%[b], #4]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[2] * B[2] */
        "LDR	lr, [%[b], #8]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[2] * B[3] */
        "LDR	lr, [%[b], #12]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[2] * B[4] */
        "LDR	lr, [%[b], #16]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[2] * B[5] */
        "LDR	lr, [%[b], #20]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	%[c], %[c], r11\n\t"
        /* A[2] * B[6] */
        "LDR	lr, [%[b], #24]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	%[c], r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[2] * B[7] */
        "LDR	lr, [%[b], #28]\n\t"
        "ADC	r5, %[s], #0x0\n\t"
        "UMLAL	r4, r5, r12, lr\n\t"
        /* A[3] * B[0] */
        "LDR	r12, [%[a], #12]\n\t"
        "LDR	lr, [%[b]]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "STR	r6, [sp, #12]\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[3] * B[1] */
        "LDR	lr, [%[b], #4]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[3] * B[2] */
        "LDR	lr, [%[b], #8]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[3] * B[3] */
        "LDR	lr, [%[b], #12]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[3] * B[4] */
        "LDR	lr, [%[b], #16]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	%[c], %[c], r11\n\t"
        /* A[3] * B[5] */
        "LDR	lr, [%[b], #20]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	%[c], r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[3] * B[6] */
        "LDR	lr, [%[b], #24]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[3] * B[7] */
        "LDR	lr, [%[b], #28]\n\t"
        "ADC	r6, %[s], #0x0\n\t"
        "UMLAL	r5, r6, r12, lr\n\t"
        /* A[4] * B[0] */
        "LDR	r12, [%[a], #16]\n\t"
        "LDR	lr, [%[b]]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "STR	r7, [sp, #16]\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[4] * B[1] */
        "LDR	lr, [%[b], #4]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[4] * B[2] */
        "LDR	lr, [%[b], #8]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[4] * B[3] */
        "LDR	lr, [%[b], #12]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	%[c], %[c], r11\n\t"
        /* A[4] * B[4] */
        "LDR	lr, [%[b], #16]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	%[c], r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[4] * B[5] */
        "LDR	lr, [%[b], #20]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[4] * B[6] */
        "LDR	lr, [%[b], #24]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[4] * B[7] */
        "LDR	lr, [%[b], #28]\n\t"
        "ADC	r7, %[s], #0x0\n\t"
        "UMLAL	r6, r7, r12, lr\n\t"
        /* A[5] * B[0] */
        "LDR	r12, [%[a], #20]\n\t"
        "LDR	lr, [%[b]]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "STR	r8, [sp, #20]\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[5] * B[1] */
        "LDR	lr, [%[b], #4]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[5] * B[2] */
        "LDR	lr, [%[b], #8]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	%[c], %[c], r11\n\t"
        /* A[5] * B[3] */
        "LDR	lr, [%[b], #12]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	%[c], r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[5] * B[4] */
        "LDR	lr, [%[b], #16]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[5] * B[5] */
        "LDR	lr, [%[b], #20]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[5] * B[6] */
        "LDR	lr, [%[b], #24]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[5] * B[7] */
        "LDR	lr, [%[b], #28]\n\t"
        "ADC	r8, %[s], #0x0\n\t"
        "UMLAL	r7, r8, r12, lr\n\t"
        /* A[6] * B[0] */
        "LDR	r12, [%[a], #24]\n\t"
        "LDR	lr, [%[b]]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r9, r11, r12, lr\n\t"
        "STR	r9, [sp, #24]\n\t"
        "ADDS	r10, r10, r11\n\t"
        /* A[6] * B[1] */
        "LDR	lr, [%[b], #4]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "ADDS	%[c], %[c], r11\n\t"
        /* A[6] * B[2] */
        "LDR	lr, [%[b], #8]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	%[c], r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[6] * B[3] */
        "LDR	lr, [%[b], #12]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[6] * B[4] */
        "LDR	lr, [%[b], #16]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[6] * B[5] */
        "LDR	lr, [%[b], #20]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[6] * B[6] */
        "LDR	lr, [%[b], #24]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[6] * B[7] */
        "LDR	lr, [%[b], #28]\n\t"
        "ADC	r9, %[s], #0x0\n\t"
        "UMLAL	r8, r9, r12, lr\n\t"
        /* A[7] * B[0] */
        "LDR	r12, [%[a], #28]\n\t"
        "LDR	lr, [%[b]]\n\t"
        "MOV	r11, #0x0\n\t"
        "UMLAL	r10, r11, r12, lr\n\t"
        "STR	r10, [sp, #28]\n\t"
        "ADDS	%[c], %[c], r11\n\t"
        /* A[7] * B[1] */
        "LDR	lr, [%[b], #4]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	%[c], r11, r12, lr\n\t"
        "ADDS	r4, r4, r11\n\t"
        /* A[7] * B[2] */
        "LDR	lr, [%[b], #8]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r4, r11, r12, lr\n\t"
        "ADDS	r5, r5, r11\n\t"
        /* A[7] * B[3] */
        "LDR	lr, [%[b], #12]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r5, r11, r12, lr\n\t"
        "ADDS	r6, r6, r11\n\t"
        /* A[7] * B[4] */
        "LDR	lr, [%[b], #16]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r6, r11, r12, lr\n\t"
        "ADDS	r7, r7, r11\n\t"
        /* A[7] * B[5] */
        "LDR	lr, [%[b], #20]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r7, r11, r12, lr\n\t"
        "ADDS	r8, r8, r11\n\t"
        /* A[7] * B[6] */
        "LDR	lr, [%[b], #24]\n\t"
        "ADC	r11, %[s], #0x0\n\t"
        "UMLAL	r8, r11, r12, lr\n\t"
        "ADDS	r9, r9, r11\n\t"
        /* A[7] * B[7] */
        "LDR	lr, [%[b], #28]\n\t"
        "ADC	r10, %[s], #0x0\n\t"
        "UMLAL	r9, r10, r12, lr\n\t"
        "ADD	lr, sp, #0x20\n\t"
        "STM	lr, {%[c], r4, r5, r6, r7, r8, r9, r10}\n\t"
        "MOV	%[s], sp\n\t"
        /* Add c to a * b */
        "LDR	lr, [sp, #76]\n\t"
        "LDM	%[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t"
        "LDM	lr!, {%[a], r10, r11, r12}\n\t"
        "ADDS	%[b], %[b], %[a]\n\t"
        "ADCS	%[c], %[c], r10\n\t"
        "ADCS	r4, r4, r11\n\t"
        "ADCS	r5, r5, r12\n\t"
        "LDM	lr!, {%[a], r10, r11, r12}\n\t"
        "ADCS	r6, r6, %[a]\n\t"
        "ADCS	r7, r7, r10\n\t"
        "ADCS	r8, r8, r11\n\t"
        "ADCS	r9, r9, r12\n\t"
        "MOV	%[a], r9\n\t"
        "STM	%[s]!, {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t"
        "LDM	%[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t"
        "ADCS	%[b], %[b], #0x0\n\t"
        "ADCS	%[c], %[c], #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "SUB	%[s], %[s], #0x20\n\t"
        /* Get 252..503 and 504..507 */
        "LSR	lr, r9, #24\n\t"
        "LSL	r9, r9, #4\n\t"
        "ORR	r9, r9, r8, LSR #28\n\t"
        "LSL	r8, r8, #4\n\t"
        "ORR	r8, r8, r7, LSR #28\n\t"
        "LSL	r7, r7, #4\n\t"
        "ORR	r7, r7, r6, LSR #28\n\t"
        "LSL	r6, r6, #4\n\t"
        "ORR	r6, r6, r5, LSR #28\n\t"
        "LSL	r5, r5, #4\n\t"
        "ORR	r5, r5, r4, LSR #28\n\t"
        "LSL	r4, r4, #4\n\t"
        "ORR	r4, r4, %[c], LSR #28\n\t"
        "LSL	%[c], %[c], #4\n\t"
        "ORR	%[c], %[c], %[b], LSR #28\n\t"
        "LSL	%[b], %[b], #4\n\t"
        "ORR	%[b], %[b], %[a], LSR #28\n\t"
        "BFC	r9, #28, #4\n\t"
        /* Add order times bits 504..507 */
        "MOV	r10, #0x2c13\n\t"
        "MOVT	r10, #0xa30a\n\t"
        "MOV	r11, #0x9ce5\n\t"
        "MOVT	r11, #0xa7ed\n\t"
        "MOV	%[a], #0x0\n\t"
        "UMLAL	%[b], %[a], r10, lr\n\t"
        "ADDS	%[c], %[c], %[a]\n\t"
        "MOV	%[a], #0x0\n\t"
        "ADC	%[a], %[a], #0x0\n\t"
        "UMLAL	%[c], %[a], r11, lr\n\t"
        "MOV	r10, #0x6329\n\t"
        "MOVT	r10, #0x5d08\n\t"
        "MOV	r11, #0x621\n\t"
        "MOVT	r11, #0xeb21\n\t"
        "ADDS	r4, r4, %[a]\n\t"
        "MOV	%[a], #0x0\n\t"
        "ADC	%[a], %[a], #0x0\n\t"
        "UMLAL	r4, %[a], r10, lr\n\t"
        "ADDS	r5, r5, %[a]\n\t"
        "MOV	%[a], #0x0\n\t"
        "ADC	%[a], %[a], #0x0\n\t"
        "UMLAL	r5, %[a], r11, lr\n\t"
        "ADDS	r6, r6, %[a]\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "SUBS	r6, r6, lr\n\t"
        "SBCS	r7, r7, #0x0\n\t"
        "SBCS	r8, r8, #0x0\n\t"
        "SBC	r9, r9, #0x0\n\t"
        /* Sub product of top 8 words and order */
        "MOV	r12, sp\n\t"
        "MOV	%[a], #0x2c13\n\t"
        "MOVT	%[a], #0xa30a\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMLAL	r10, lr, %[b], %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, %[c], %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r4, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r5, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r6, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r7, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r8, %[a]\n\t"
        "BFC	r11, #28, #4\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r9, %[a]\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	%[a], #0x9ce5\n\t"
        "MOVT	%[a], #0xa7ed\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, %[b], %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, %[c], %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r4, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r5, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r6, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r7, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r8, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r9, %[a]\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	%[a], #0x6329\n\t"
        "MOVT	%[a], #0x5d08\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, %[b], %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, %[c], %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r4, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r5, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r6, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r7, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r8, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r9, %[a]\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	%[a], #0x621\n\t"
        "MOVT	%[a], #0xeb21\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, %[b], %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, %[c], %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r4, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r5, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r6, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r7, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r10, lr, r8, %[a]\n\t"
        "ADDS	r11, r11, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r11, lr, r9, %[a]\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        /* Subtract at 4 * 32 */
        "LDM	r12, {r10, r11}\n\t"
        "SUBS	r10, r10, %[b]\n\t"
        "SBCS	r11, r11, %[c]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r4\n\t"
        "SBCS	r11, r11, r5\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r6\n\t"
        "SBCS	r11, r11, r7\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r8\n\t"
        "SBC	r11, r11, r9\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "SUB	r12, r12, #0x24\n\t"
        "ASR	lr, r11, #25\n\t"
        /* Conditionally subtract order starting at bit 125 */
        "MOV	%[a], #0xa0000000\n\t"
        "MOV	%[b], #0xba7d\n\t"
        "MOVT	%[b], #0x4b9e\n\t"
        "MOV	%[c], #0x4c63\n\t"
        "MOVT	%[c], #0xcb02\n\t"
        "MOV	r4, #0xf39a\n\t"
        "MOVT	r4, #0xd45e\n\t"
        "MOV	r5, #0xdf3b\n\t"
        "MOVT	r5, #0x29b\n\t"
        "MOV	r9, #0x2000000\n\t"
        "AND	%[a], %[a], lr\n\t"
        "AND	%[b], %[b], lr\n\t"
        "AND	%[c], %[c], lr\n\t"
        "AND	r4, r4, lr\n\t"
        "AND	r5, r5, lr\n\t"
        "AND	r9, r9, lr\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, %[a]\n\t"
        "ADCS	r11, r11, %[b]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, %[c]\n\t"
        "ADCS	r11, r11, r4\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, r5\n\t"
        "ADCS	r11, r11, #0x0\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADCS	r11, r11, #0x0\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10}\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "STM	r12!, {r10}\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "MOV	r12, sp\n\t"
        /* Load bits 252-376 */
        "ADD	r12, r12, #0x1c\n\t"
        "LDM	r12, {%[a], %[b], %[c], r4, r5}\n\t"
        "LSL	r5, r5, #4\n\t"
        "ORR	r5, r5, r4, LSR #28\n\t"
        "LSL	r4, r4, #4\n\t"
        "ORR	r4, r4, %[c], LSR #28\n\t"
        "LSL	%[c], %[c], #4\n\t"
        "ORR	%[c], %[c], %[b], LSR #28\n\t"
        "LSL	%[b], %[b], #4\n\t"
        "ORR	%[b], %[b], %[a], LSR #28\n\t"
        "BFC	r5, #29, #3\n\t"
        "SUB	r12, r12, #0x1c\n\t"
        /* Sub product of top 4 words and order */
        "MOV	%[s], sp\n\t"
        /*   * -5cf5d3ed */
        "MOV	%[a], #0x2c13\n\t"
        "MOVT	%[a], #0xa30a\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, lr, %[b], %[a]\n\t"
        "ADDS	r7, r7, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r7, lr, %[c], %[a]\n\t"
        "ADDS	r8, r8, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r8, lr, r4, %[a]\n\t"
        "ADDS	r9, r9, lr\n\t"
        "MOV	lr, #0x0\n\t"
        "ADC	lr, lr, #0x0\n\t"
        "UMLAL	r9, lr, r5, %[a]\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -5812631b */
        "MOV	%[a], #0x9ce5\n\t"
        "MOVT	%[a], #0xa7ed\n\t"
        "MOV	r10, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r10, %[b], %[a]\n\t"
        "ADDS	r7, r7, r10\n\t"
        "MOV	r10, #0x0\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMLAL	r7, r10, %[c], %[a]\n\t"
        "ADDS	r8, r8, r10\n\t"
        "MOV	r10, #0x0\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMLAL	r8, r10, r4, %[a]\n\t"
        "ADDS	r9, r9, r10\n\t"
        "MOV	r10, #0x0\n\t"
        "ADC	r10, r10, #0x0\n\t"
        "UMLAL	r9, r10, r5, %[a]\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -a2f79cd7 */
        "MOV	%[a], #0x6329\n\t"
        "MOVT	%[a], #0x5d08\n\t"
        "MOV	r11, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r11, %[b], %[a]\n\t"
        "ADDS	r7, r7, r11\n\t"
        "MOV	r11, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "UMLAL	r7, r11, %[c], %[a]\n\t"
        "ADDS	r8, r8, r11\n\t"
        "MOV	r11, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "UMLAL	r8, r11, r4, %[a]\n\t"
        "ADDS	r9, r9, r11\n\t"
        "MOV	r11, #0x0\n\t"
        "ADC	r11, r11, #0x0\n\t"
        "UMLAL	r9, r11, r5, %[a]\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -14def9df */
        "MOV	%[a], #0x621\n\t"
        "MOVT	%[a], #0xeb21\n\t"
        "MOV	r12, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r12, %[b], %[a]\n\t"
        "ADDS	r7, r7, r12\n\t"
        "MOV	r12, #0x0\n\t"
        "ADC	r12, r12, #0x0\n\t"
        "UMLAL	r7, r12, %[c], %[a]\n\t"
        "ADDS	r8, r8, r12\n\t"
        "MOV	r12, #0x0\n\t"
        "ADC	r12, r12, #0x0\n\t"
        "UMLAL	r8, r12, r4, %[a]\n\t"
        "ADDS	r9, r9, r12\n\t"
        "MOV	r12, #0x0\n\t"
        "ADC	r12, r12, #0x0\n\t"
        "UMLAL	r9, r12, r5, %[a]\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /* Add overflows at 4 * 32 */
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "BFC	r9, #28, #4\n\t"
        "ADDS	r6, r6, lr\n\t"
        "ADCS	r7, r7, r10\n\t"
        "ADCS	r8, r8, r11\n\t"
        "ADC	r9, r9, r12\n\t"
        /* Subtract top at 4 * 32 */
        "SUBS	r6, r6, %[b]\n\t"
        "SBCS	r7, r7, %[c]\n\t"
        "SBCS	r8, r8, r4\n\t"
        "SBCS	r9, r9, r5\n\t"
        "SBC	%[a], %[a], %[a]\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "LDM	%[s], {%[b], %[c], r4, r5}\n\t"
        "MOV	r10, #0xd3ed\n\t"
        "MOVT	r10, #0x5cf5\n\t"
        "MOV	r11, #0x631a\n\t"
        "MOVT	r11, #0x5812\n\t"
        "MOV	r12, #0x9cd6\n\t"
        "MOVT	r12, #0xa2f7\n\t"
        "MOV	lr, #0xf9de\n\t"
        "MOVT	lr, #0x14de\n\t"
        "AND	r10, r10, %[a]\n\t"
        "AND	r11, r11, %[a]\n\t"
        "AND	r12, r12, %[a]\n\t"
        "AND	lr, lr, %[a]\n\t"
        "ADDS	%[b], %[b], r10\n\t"
        "ADCS	%[c], %[c], r11\n\t"
        "ADCS	r4, r4, r12\n\t"
        "ADCS	r5, r5, lr\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "AND	%[a], %[a], #0x10000000\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, %[a]\n\t"
        "BFC	r9, #28, #4\n\t"
        "LDR	%[s], [sp, #68]\n\t"
        /* Store result */
        "STR	%[b], [%[s]]\n\t"
        "STR	%[c], [%[s], #4]\n\t"
        "STR	r4, [%[s], #8]\n\t"
        "STR	r5, [%[s], #12]\n\t"
        "STR	r6, [%[s], #16]\n\t"
        "STR	r7, [%[s], #20]\n\t"
        "STR	r8, [%[s], #24]\n\t"
        "STR	r9, [%[s], #28]\n\t"
        "ADD	sp, sp, #0x50\n\t"
        : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#else
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
#else
void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
    register byte* s __asm__ ("r0") = (byte*)s_p;
    register const byte* a __asm__ ("r1") = (const byte*)a_p;
    register const byte* b __asm__ ("r2") = (const byte*)b_p;
    register const byte* c __asm__ ("r3") = (const byte*)c_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */

    __asm__ __volatile__ (
        "SUB	sp, sp, #0x50\n\t"
        "ADD	lr, sp, #0x44\n\t"
        "STM	lr, {%[s], %[a], %[c]}\n\t"
        "MOV	lr, %[b]\n\t"
        "LDM	%[a], {%[s], %[a], %[b], %[c]}\n\t"
        "LDM	lr!, {r4, r5, r6}\n\t"
        "UMULL	r10, r11, %[s], r4\n\t"
        "UMULL	r12, r7, %[a], r4\n\t"
        "UMAAL	r11, r12, %[s], r5\n\t"
        "UMULL	r8, r9, %[b], r4\n\t"
        "UMAAL	r12, r8, %[a], r5\n\t"
        "UMAAL	r12, r7, %[s], r6\n\t"
        "UMAAL	r8, r9, %[c], r4\n\t"
        "STM	sp, {r10, r11, r12}\n\t"
        "UMAAL	r7, r8, %[b], r5\n\t"
        "LDM	lr!, {r4}\n\t"
        "UMULL	r10, r11, %[a], r6\n\t"
        "UMAAL	r8, r9, %[b], r6\n\t"
        "UMAAL	r7, r10, %[s], r4\n\t"
        "UMAAL	r8, r11, %[c], r5\n\t"
        "STR	r7, [sp, #12]\n\t"
        "UMAAL	r8, r10, %[a], r4\n\t"
        "UMAAL	r9, r11, %[c], r6\n\t"
        "UMAAL	r9, r10, %[b], r4\n\t"
        "UMAAL	r10, r11, %[c], r4\n\t"
        "LDM	lr, {r4, r5, r6, r7}\n\t"
        "MOV	r12, #0x0\n\t"
        "UMLAL	r8, r12, %[s], r4\n\t"
        "UMAAL	r9, r12, %[a], r4\n\t"
        "UMAAL	r10, r12, %[b], r4\n\t"
        "UMAAL	r11, r12, %[c], r4\n\t"
        "MOV	r4, #0x0\n\t"
        "UMLAL	r9, r4, %[s], r5\n\t"
        "UMAAL	r10, r4, %[a], r5\n\t"
        "UMAAL	r11, r4, %[b], r5\n\t"
        "UMAAL	r12, r4, %[c], r5\n\t"
        "MOV	r5, #0x0\n\t"
        "UMLAL	r10, r5, %[s], r6\n\t"
        "UMAAL	r11, r5, %[a], r6\n\t"
        "UMAAL	r12, r5, %[b], r6\n\t"
        "UMAAL	r4, r5, %[c], r6\n\t"
        "MOV	r6, #0x0\n\t"
        "UMLAL	r11, r6, %[s], r7\n\t"
        "LDR	%[s], [sp, #72]\n\t"
        "UMAAL	r12, r6, %[a], r7\n\t"
        "ADD	%[s], %[s], #0x10\n\t"
        "UMAAL	r4, r6, %[b], r7\n\t"
        "SUB	lr, lr, #0x10\n\t"
        "UMAAL	r5, r6, %[c], r7\n\t"
        "LDM	%[s], {%[s], %[a], %[b], %[c]}\n\t"
        "STR	r6, [sp, #64]\n\t"
        "LDM	lr!, {r6}\n\t"
        "MOV	r7, #0x0\n\t"
        "UMLAL	r8, r7, %[s], r6\n\t"
        "UMAAL	r9, r7, %[a], r6\n\t"
        "STR	r8, [sp, #16]\n\t"
        "UMAAL	r10, r7, %[b], r6\n\t"
        "UMAAL	r11, r7, %[c], r6\n\t"
        "LDM	lr!, {r6}\n\t"
        "MOV	r8, #0x0\n\t"
        "UMLAL	r9, r8, %[s], r6\n\t"
        "UMAAL	r10, r8, %[a], r6\n\t"
        "STR	r9, [sp, #20]\n\t"
        "UMAAL	r11, r8, %[b], r6\n\t"
        "UMAAL	r12, r8, %[c], r6\n\t"
        "LDM	lr!, {r6}\n\t"
        "MOV	r9, #0x0\n\t"
        "UMLAL	r10, r9, %[s], r6\n\t"
        "UMAAL	r11, r9, %[a], r6\n\t"
        "STR	r10, [sp, #24]\n\t"
        "UMAAL	r12, r9, %[b], r6\n\t"
        "UMAAL	r4, r9, %[c], r6\n\t"
        "LDM	lr!, {r6}\n\t"
        "MOV	r10, #0x0\n\t"
        "UMLAL	r11, r10, %[s], r6\n\t"
        "UMAAL	r12, r10, %[a], r6\n\t"
        "STR	r11, [sp, #28]\n\t"
        "UMAAL	r4, r10, %[b], r6\n\t"
        "UMAAL	r5, r10, %[c], r6\n\t"
        "LDM	lr!, {r11}\n\t"
        "UMAAL	r12, r7, %[s], r11\n\t"
        "UMAAL	r4, r7, %[a], r11\n\t"
        "LDR	r6, [sp, #64]\n\t"
        "UMAAL	r5, r7, %[b], r11\n\t"
        "UMAAL	r6, r7, %[c], r11\n\t"
        "LDM	lr!, {r11}\n\t"
        "UMAAL	r4, r8, %[s], r11\n\t"
        "UMAAL	r5, r8, %[a], r11\n\t"
        "UMAAL	r6, r8, %[b], r11\n\t"
        "UMAAL	r7, r8, %[c], r11\n\t"
        "LDM	lr, {r11, lr}\n\t"
        "UMAAL	r5, r9, %[s], r11\n\t"
        "UMAAL	r6, r10, %[s], lr\n\t"
        "UMAAL	r6, r9, %[a], r11\n\t"
        "UMAAL	r7, r10, %[a], lr\n\t"
        "UMAAL	r7, r9, %[b], r11\n\t"
        "UMAAL	r8, r10, %[b], lr\n\t"
        "UMAAL	r8, r9, %[c], r11\n\t"
        "UMAAL	r9, r10, %[c], lr\n\t"
        "MOV	%[c], r12\n\t"
        "ADD	lr, sp, #0x20\n\t"
        "STM	lr, {%[c], r4, r5, r6, r7, r8, r9, r10}\n\t"
        "MOV	%[s], sp\n\t"
        /* Add c to a * b */
        "LDR	lr, [sp, #76]\n\t"
        "LDM	%[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t"
        "LDM	lr!, {%[a], r10, r11, r12}\n\t"
        "ADDS	%[b], %[b], %[a]\n\t"
        "ADCS	%[c], %[c], r10\n\t"
        "ADCS	r4, r4, r11\n\t"
        "ADCS	r5, r5, r12\n\t"
        "LDM	lr!, {%[a], r10, r11, r12}\n\t"
        "ADCS	r6, r6, %[a]\n\t"
        "ADCS	r7, r7, r10\n\t"
        "ADCS	r8, r8, r11\n\t"
        "ADCS	r9, r9, r12\n\t"
        "MOV	%[a], r9\n\t"
        "STM	%[s]!, {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t"
        "LDM	%[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t"
        "ADCS	%[b], %[b], #0x0\n\t"
        "ADCS	%[c], %[c], #0x0\n\t"
        "ADCS	r4, r4, #0x0\n\t"
        "ADCS	r5, r5, #0x0\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "SUB	%[s], %[s], #0x20\n\t"
        /* Get 252..503 and 504..507 */
        "LSR	lr, r9, #24\n\t"
        "LSL	r9, r9, #4\n\t"
        "ORR	r9, r9, r8, LSR #28\n\t"
        "LSL	r8, r8, #4\n\t"
        "ORR	r8, r8, r7, LSR #28\n\t"
        "LSL	r7, r7, #4\n\t"
        "ORR	r7, r7, r6, LSR #28\n\t"
        "LSL	r6, r6, #4\n\t"
        "ORR	r6, r6, r5, LSR #28\n\t"
        "LSL	r5, r5, #4\n\t"
        "ORR	r5, r5, r4, LSR #28\n\t"
        "LSL	r4, r4, #4\n\t"
        "ORR	r4, r4, %[c], LSR #28\n\t"
        "LSL	%[c], %[c], #4\n\t"
        "ORR	%[c], %[c], %[b], LSR #28\n\t"
        "LSL	%[b], %[b], #4\n\t"
        "ORR	%[b], %[b], %[a], LSR #28\n\t"
        "BFC	r9, #28, #4\n\t"
        /* Add order times bits 504..507 */
        "MOV	r10, #0x2c13\n\t"
        "MOVT	r10, #0xa30a\n\t"
        "MOV	r11, #0x9ce5\n\t"
        "MOVT	r11, #0xa7ed\n\t"
        "MOV	%[a], #0x0\n\t"
        "UMLAL	%[b], %[a], r10, lr\n\t"
        "UMAAL	%[c], %[a], r11, lr\n\t"
        "MOV	r10, #0x6329\n\t"
        "MOVT	r10, #0x5d08\n\t"
        "MOV	r11, #0x621\n\t"
        "MOVT	r11, #0xeb21\n\t"
        "UMAAL	r4, %[a], r10, lr\n\t"
        "UMAAL	r5, %[a], r11, lr\n\t"
        "ADDS	r6, r6, %[a]\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, #0x0\n\t"
        "SUBS	r6, r6, lr\n\t"
        "SBCS	r7, r7, #0x0\n\t"
        "SBCS	r8, r8, #0x0\n\t"
        "SBC	r9, r9, #0x0\n\t"
        /* Sub product of top 8 words and order */
        "MOV	r12, sp\n\t"
        "MOV	%[a], #0x2c13\n\t"
        "MOVT	%[a], #0xa30a\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMLAL	r10, lr, %[b], %[a]\n\t"
        "UMAAL	r11, lr, %[c], %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMAAL	r10, lr, r4, %[a]\n\t"
        "UMAAL	r11, lr, r5, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMAAL	r10, lr, r6, %[a]\n\t"
        "UMAAL	r11, lr, r7, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	%[s]!, {r10, r11}\n\t"
        "UMAAL	r10, lr, r8, %[a]\n\t"
        "BFC	r11, #28, #4\n\t"
        "UMAAL	r11, lr, r9, %[a]\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	%[a], #0x9ce5\n\t"
        "MOVT	%[a], #0xa7ed\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, %[b], %[a]\n\t"
        "UMAAL	r11, lr, %[c], %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r4, %[a]\n\t"
        "UMAAL	r11, lr, r5, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r6, %[a]\n\t"
        "UMAAL	r11, lr, r7, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r8, %[a]\n\t"
        "UMAAL	r11, lr, r9, %[a]\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	%[a], #0x6329\n\t"
        "MOVT	%[a], #0x5d08\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, %[b], %[a]\n\t"
        "UMAAL	r11, lr, %[c], %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r4, %[a]\n\t"
        "UMAAL	r11, lr, r5, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r6, %[a]\n\t"
        "UMAAL	r11, lr, r7, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r8, %[a]\n\t"
        "UMAAL	r11, lr, r9, %[a]\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        "MOV	%[a], #0x621\n\t"
        "MOVT	%[a], #0xeb21\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMLAL	r10, lr, %[b], %[a]\n\t"
        "UMAAL	r11, lr, %[c], %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r4, %[a]\n\t"
        "UMAAL	r11, lr, r5, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r6, %[a]\n\t"
        "UMAAL	r11, lr, r7, %[a]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "UMAAL	r10, lr, r8, %[a]\n\t"
        "UMAAL	r11, lr, r9, %[a]\n\t"
        "STM	r12!, {r10, r11, lr}\n\t"
        "SUB	r12, r12, #0x20\n\t"
        /* Subtract at 4 * 32 */
        "LDM	r12, {r10, r11}\n\t"
        "SUBS	r10, r10, %[b]\n\t"
        "SBCS	r11, r11, %[c]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r4\n\t"
        "SBCS	r11, r11, r5\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r6\n\t"
        "SBCS	r11, r11, r7\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "SBCS	r10, r10, r8\n\t"
        "SBC	r11, r11, r9\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "SUB	r12, r12, #0x24\n\t"
        "ASR	lr, r11, #25\n\t"
        /* Conditionally subtract order starting at bit 125 */
        "MOV	%[a], #0xa0000000\n\t"
        "MOV	%[b], #0xba7d\n\t"
        "MOVT	%[b], #0x4b9e\n\t"
        "MOV	%[c], #0x4c63\n\t"
        "MOVT	%[c], #0xcb02\n\t"
        "MOV	r4, #0xf39a\n\t"
        "MOVT	r4, #0xd45e\n\t"
        "MOV	r5, #0xdf3b\n\t"
        "MOVT	r5, #0x29b\n\t"
        "MOV	r9, #0x2000000\n\t"
        "AND	%[a], %[a], lr\n\t"
        "AND	%[b], %[b], lr\n\t"
        "AND	%[c], %[c], lr\n\t"
        "AND	r4, r4, lr\n\t"
        "AND	r5, r5, lr\n\t"
        "AND	r9, r9, lr\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADDS	r10, r10, %[a]\n\t"
        "ADCS	r11, r11, %[b]\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, %[c]\n\t"
        "ADCS	r11, r11, r4\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, r5\n\t"
        "ADCS	r11, r11, #0x0\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10, r11}\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "ADCS	r11, r11, #0x0\n\t"
        "STM	r12!, {r10, r11}\n\t"
        "LDM	r12, {r10}\n\t"
        "ADCS	r10, r10, #0x0\n\t"
        "STM	r12!, {r10}\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "MOV	r12, sp\n\t"
        /* Load bits 252-376 */
        "ADD	r12, r12, #0x1c\n\t"
        "LDM	r12, {%[a], %[b], %[c], r4, r5}\n\t"
        "LSL	r5, r5, #4\n\t"
        "ORR	r5, r5, r4, LSR #28\n\t"
        "LSL	r4, r4, #4\n\t"
        "ORR	r4, r4, %[c], LSR #28\n\t"
        "LSL	%[c], %[c], #4\n\t"
        "ORR	%[c], %[c], %[b], LSR #28\n\t"
        "LSL	%[b], %[b], #4\n\t"
        "ORR	%[b], %[b], %[a], LSR #28\n\t"
        "BFC	r5, #29, #3\n\t"
        "SUB	r12, r12, #0x1c\n\t"
        /* Sub product of top 4 words and order */
        "MOV	%[s], sp\n\t"
        /*   * -5cf5d3ed */
        "MOV	%[a], #0x2c13\n\t"
        "MOVT	%[a], #0xa30a\n\t"
        "MOV	lr, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, lr, %[b], %[a]\n\t"
        "UMAAL	r7, lr, %[c], %[a]\n\t"
        "UMAAL	r8, lr, r4, %[a]\n\t"
        "UMAAL	r9, lr, r5, %[a]\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -5812631b */
        "MOV	%[a], #0x9ce5\n\t"
        "MOVT	%[a], #0xa7ed\n\t"
        "MOV	r10, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r10, %[b], %[a]\n\t"
        "UMAAL	r7, r10, %[c], %[a]\n\t"
        "UMAAL	r8, r10, r4, %[a]\n\t"
        "UMAAL	r9, r10, r5, %[a]\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -a2f79cd7 */
        "MOV	%[a], #0x6329\n\t"
        "MOVT	%[a], #0x5d08\n\t"
        "MOV	r11, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r11, %[b], %[a]\n\t"
        "UMAAL	r7, r11, %[c], %[a]\n\t"
        "UMAAL	r8, r11, r4, %[a]\n\t"
        "UMAAL	r9, r11, r5, %[a]\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /*   * -14def9df */
        "MOV	%[a], #0x621\n\t"
        "MOVT	%[a], #0xeb21\n\t"
        "MOV	r12, #0x0\n\t"
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "UMLAL	r6, r12, %[b], %[a]\n\t"
        "UMAAL	r7, r12, %[c], %[a]\n\t"
        "UMAAL	r8, r12, r4, %[a]\n\t"
        "UMAAL	r9, r12, r5, %[a]\n\t"
        "STM	%[s], {r6, r7, r8, r9}\n\t"
        "ADD	%[s], %[s], #0x4\n\t"
        /* Add overflows at 4 * 32 */
        "LDM	%[s], {r6, r7, r8, r9}\n\t"
        "BFC	r9, #28, #4\n\t"
        "ADDS	r6, r6, lr\n\t"
        "ADCS	r7, r7, r10\n\t"
        "ADCS	r8, r8, r11\n\t"
        "ADC	r9, r9, r12\n\t"
        /* Subtract top at 4 * 32 */
        "SUBS	r6, r6, %[b]\n\t"
        "SBCS	r7, r7, %[c]\n\t"
        "SBCS	r8, r8, r4\n\t"
        "SBCS	r9, r9, r5\n\t"
        "SBC	%[a], %[a], %[a]\n\t"
        "SUB	%[s], %[s], #0x10\n\t"
        "LDM	%[s], {%[b], %[c], r4, r5}\n\t"
        "MOV	r10, #0xd3ed\n\t"
        "MOVT	r10, #0x5cf5\n\t"
        "MOV	r11, #0x631a\n\t"
        "MOVT	r11, #0x5812\n\t"
        "MOV	r12, #0x9cd6\n\t"
        "MOVT	r12, #0xa2f7\n\t"
        "MOV	lr, #0xf9de\n\t"
        "MOVT	lr, #0x14de\n\t"
        "AND	r10, r10, %[a]\n\t"
        "AND	r11, r11, %[a]\n\t"
        "AND	r12, r12, %[a]\n\t"
        "AND	lr, lr, %[a]\n\t"
        "ADDS	%[b], %[b], r10\n\t"
        "ADCS	%[c], %[c], r11\n\t"
        "ADCS	r4, r4, r12\n\t"
        "ADCS	r5, r5, lr\n\t"
        "ADCS	r6, r6, #0x0\n\t"
        "ADCS	r7, r7, #0x0\n\t"
        "AND	%[a], %[a], #0x10000000\n\t"
        "ADCS	r8, r8, #0x0\n\t"
        "ADC	r9, r9, %[a]\n\t"
        "BFC	r9, #28, #4\n\t"
        "LDR	%[s], [sp, #68]\n\t"
        /* Store result */
        "STR	%[b], [%[s]]\n\t"
        "STR	%[c], [%[s], #4]\n\t"
        "STR	r4, [%[s], #8]\n\t"
        "STR	r5, [%[s], #12]\n\t"
        "STR	r6, [%[s], #16]\n\t"
        "STR	r7, [%[s], #20]\n\t"
        "STR	r8, [%[s], #24]\n\t"
        "STR	r9, [%[s], #28]\n\t"
        "ADD	sp, sp, #0x50\n\t"
        : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc"
    );
}

#endif /* WOLFSSL_SP_NO_UMAAL */
#endif /* HAVE_ED25519_SIGN */
#endif /* HAVE_ED25519 */

#endif /* !CURVE25519_SMALL || !ED25519_SMALL */
#endif /* HAVE_CURVE25519 || HAVE_ED25519 */
#endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */
