*
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */


#include "SkBitmapProcState.h"
#include "SkColorPriv.h"
#include "SkUtils.h"

#if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
void SI8_D16_nofilter_DX_arm(
    const SkBitmapProcState& s,
    const uint32_t* SK_RESTRICT xy,
    int count,
    uint16_t* SK_RESTRICT colors) __attribute__((optimize("O1")));

void SI8_D16_nofilter_DX_arm(const SkBitmapProcState& s,
                             const uint32_t* SK_RESTRICT xy,
                             int count, uint16_t* SK_RESTRICT colors) {
    SkASSERT(count > 0 && colors != NULL);
    SkASSERT(s.fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask));
    SkASSERT(s.fDoFilter == false);
    
    const uint16_t* SK_RESTRICT table = s.fBitmap->getColorTable()->lock16BitCache();
    const uint8_t* SK_RESTRICT srcAddr = (const uint8_t*)s.fBitmap->getPixels();
    
    // buffer is y32, x16, x16, x16, x16, x16
    // bump srcAddr to the proper row, since we're told Y never changes
    SkASSERT((unsigned)xy[0] < (unsigned)s.fBitmap->height());
    srcAddr = (const uint8_t*)((const char*)srcAddr +
Beispiel #2
0
#include <avr/io.h>
#include <avr/pgmspace.h>

#include "display/st7565.h"
#include "io.h"
#include "constants.h"

#include "menu.h"

void __attribute__((optimize("O0"))) _menu_render_screen(menu_t *data, uint8_t selected_item){

  uint8_t op = 0;
  lcd_clear();
  FontSelector = f6x8;

  if (data->title){
    print_title(data->title);
  }

  if (data->footer_callback){
    data->footer_callback();
  }

  if (data->render_callback){
    data->render_callback(selected_item);
  }
  lcd_update();
}


void __read_input(uint8_t max_value){
Beispiel #3
0
/* Each task maintains its own interrupt status in the critical nesting
variable.  Note this is not saved as part of the task context as context
switches can only occur when uxCriticalNesting is zero. */
static UBaseType_t uxCriticalNesting = 0xaaaaaaaa;

/*
 * Setup the timer to generate the tick interrupts.
 */
static void prvSetupTimerInterrupt( void ) PRIVILEGED_FUNCTION;

/*
 * Standard FreeRTOS exception handlers.
 */
void xPortPendSVHandler( void ) __attribute__ (( naked )) PRIVILEGED_FUNCTION;
void xPortSysTickHandler( void )  __attribute__ ((optimize("3"))) PRIVILEGED_FUNCTION;
void vPortSVCHandler( void ) __attribute__ (( naked )) PRIVILEGED_FUNCTION;

/*
 * Starts the scheduler by restoring the context of the first task to run.
 */
static void prvRestoreContextOfFirstTask( void ) __attribute__(( naked )) PRIVILEGED_FUNCTION;

/*
 * C portion of the SVC handler.  The SVC handler is split between an asm entry
 * and a C wrapper for simplicity of coding and maintenance.
 */
static void prvSVCHandler( uint32_t *pulRegisters ) __attribute__(( noinline )) PRIVILEGED_FUNCTION;

/*-----------------------------------------------------------*/
Beispiel #4
0
 void handleCall(T* curr, Block* outer = nullptr) {
   for (Index i = 0; i < curr->operands.size(); i++) {
     outer = optimize(curr, curr->operands[i], outer);
     if (EffectAnalyzer(curr->operands[i]).hasSideEffects()) return;
   }
 }
Beispiel #5
0
         | 0x0UL << CACHE_CTL_offICALCK                 \
         | 0x0UL << CACHE_CTL_offDCALCK                 \
         | 0x1UL << CACHE_CTL_offDCCWF                  \
         | 0x1UL << CACHE_CTL_offDCPMW)

/*
 * Interrupt priority :
 * PIT(IRQ #2): highest priority
 * Others: lowest priority
 */
#define PRI1_DEFAULT            0xFFFFFFFF
#define PRI2_DEFAULT            0xFFFFFFFF


/* This must be a leaf function, no child function */
void _nds32_init_mem(void) __attribute__((naked, optimize("Os")));
void _nds32_init_mem(void)
{
	/* Enable DLM */
	__nds32__mtsr(EDLM_BASE | 0x1, NDS32_SR_DLMB);
	__nds32__dsb();
}

/*
 * Initialize MMU configure and cache ability.
 */
static void mmu_init(void)
{
//#ifndef __NDS32_ISA_V3M__
//	unsigned int reg;
//
Beispiel #6
0
 void visitStore(Store* curr) {
   optimize(curr, curr->value, optimize(curr, curr->ptr), &curr->ptr);
 }
Beispiel #7
0
 void visitBreak(Break* curr) {
   optimize(curr, curr->condition, optimize(curr, curr->value), &curr->value);
 }
Beispiel #8
0
            gui_bench_draw_results(11, bench.disk_write_raw_bps);
            gui_bench_draw_results(12, bench.disk_write_mem_bps);
            gui_bench_draw_results(13, bench.disk_write_buf_bps);
            gui_bench_draw_results(14, bench.disk_read_buf_bps);

            bench_to_draw = 0;
            break;
        default:
            bench_to_draw = 0;
            break;
    }
}

//-------------------------------------------------------------------
static void __attribute__((optimize("O0"))) bench_screen_write() {
    long t;
    register unsigned int i, s;
    register char c;
    register char *scr;
    
    scr = vid_get_bitmap_fb();
    s = camera_screen.buffer_size;
    t = get_tick_count();
    for (c=0; c<64; ++c)
        for (i=0; i<s; ++i)
            scr[i] = c;
    t = get_tick_count() - t;
    bench.screen_output_bps = s*64*100 / (t/10);
}
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

/* Disable optimization to prevent compiler from un-indirecting 
 * our indirect calls. */
 
struct S
{
	void *(*fn)(size_t);
} s;

void *(*fs[2])(size_t);

int (__attribute__((optimize("O0"))) main)()
{
	void *(*fn)(size_t) = &malloc;
	
	int *blah = (int *) fn(200 * sizeof (int));
	
	for (int i = 0; i < 200; ++i) blah[i] = 42;
	
	void *fake = blah;

	int *recovered = (int *) fake;

	printf("It says: %d\n", recovered[0]);

	free(blah);

	void *(**fn2)(size_t) = &fn;
Beispiel #10
0
/* { dg-do run } */
/* { dg-require-effective-target avx512er } */
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */

#include <math.h>
#include "avx512er-check.h"

#define MAX 1000
#define EPS 0.00001

__attribute__ ((noinline, optimize (1)))
void static
compute_rsqrt_ref (float *a, float *r)
{
  for (int i = 0; i < MAX; i++)
    r[i] = 1.0 / sqrtf (a[i]);
}

__attribute__ ((noinline))
void static
compute_rsqrt_exp (float *a, float *r)
{
  for (int i = 0; i < MAX; i++)
    r[i] = 1.0 / sqrtf (a[i]);
}

void static
avx512er_test (void)
{
  float in[MAX];
  float ref[MAX];
Beispiel #11
0
/* imc_reg_alloc is the main loop of the allocation algorithm. It operates
 * on a single compilation unit at a time.
 */
void
imc_reg_alloc(struct Parrot_Interp *interpreter, IMC_Unit * unit)
{
    int to_spill;
    int todo, first;

    if (!unit)
        return;
    if (!optimizer_level && pasm_file)
        return;

    init_tables(interpreter);
    allocated = 0;

#if IMC_TRACE
    fprintf(stderr, "reg_alloc.c: imc_reg_alloc\n");
    if (unit->instructions->r[1] && unit->instructions->r[1]->pcc_sub) {
        fprintf(stderr, "img_reg_alloc: pcc_sub (nargs = %d)\n",
            unit->instructions->r[1]->pcc_sub->nargs);
    }
#endif

    debug(interpreter, DEBUG_IMC, "\n------------------------\n");
    debug(interpreter, DEBUG_IMC, "processing sub %s\n", function);
    debug(interpreter, DEBUG_IMC, "------------------------\n\n");
    if (IMCC_INFO(interpreter)->verbose ||
            (IMCC_INFO(interpreter)->debug & DEBUG_IMC))
        imc_stat_init(unit);

    /* consecutive labels, if_branch, unused_labels ... */
    pre_optimize(interpreter, unit);
    if (optimizer_level == OPT_PRE && pasm_file)
        return;

    nodeStack = imcstack_new();
    unit->n_spilled = 0;

    todo = first = 1;
    while (todo) {
        find_basic_blocks(interpreter, unit, first);
        build_cfg(interpreter, unit);

        if (first && (IMCC_INFO(interpreter)->debug & DEBUG_CFG))
            dump_cfg(unit);
        first = 0;
        todo = cfg_optimize(interpreter, unit);
    }

    todo = first = 1;
    while (todo) {
        if (!first) {
            find_basic_blocks(interpreter, unit, 0);
            build_cfg(interpreter, unit);
        }
        first = 0;

        compute_dominators(interpreter, unit);
        find_loops(interpreter, unit);

        build_reglist(interpreter, unit);
        life_analysis(interpreter, unit);
        /* optimize, as long as there is something to do */
        if (dont_optimize)
            todo = 0;
        else {
            todo = optimize(interpreter, unit);
            if (todo)
                pre_optimize(interpreter, unit);
        }
    }
    todo = 1;
#if !DOIT_AGAIN_SAM
    build_interference_graph(interpreter, unit);
#endif
    while (todo) {
#if DOIT_AGAIN_SAM
        build_interference_graph(interpreter, unit);
#endif
        if (optimizer_level & OPT_SUB)
            allocate_wanted_regs(unit);
        compute_spilling_costs(interpreter, unit);
#ifdef DO_SIMPLIFY
        /* simplify until no changes can be made */
        while (simplify(unit)) {}
#endif
        order_spilling(unit);          /* put the remaining items on stack */

        to_spill = try_allocate(interpreter, unit);
        allocated = 1;

        if ( to_spill >= 0 ) {
            allocated = 0;
            spill(interpreter, unit, to_spill);
            /*
             * build the new cfg/reglist on the fly in spill() and
             * do life analysis there for only the involved regs
             */
#if DOIT_AGAIN_SAM
            find_basic_blocks(interpreter, unit, 0);
            build_cfg(interpreter, unit);
            build_reglist(interpreter, unit);
            life_analysis(interpreter);
#endif
        }
        else {
            /* the process is finished */
            todo = 0;
        }
    }
    if (optimizer_level & OPT_SUB)
        sub_optimize(interpreter, unit);
    if (IMCC_INFO(interpreter)->debug & DEBUG_IMC)
        dump_instructions(unit);
    if (IMCC_INFO(interpreter)->verbose  ||
            (IMCC_INFO(interpreter)->debug & DEBUG_IMC))
        print_stat(interpreter, unit);
    imcstack_free(nodeStack);
}
Beispiel #12
0
            NUTFATAL(tdp->td_name, __FILE__, __LINE__, "more stack space");
        }
    }
#endif
}
#endif


/*!
 * \brief Initialize system timer.
 *
 * This function is automatically called by Nut/OS during system
 * initialization. It calls the hardware dependent layer to initialze
 * the timer hardware and register a timer interrupt handler.
 */
void NutTimerInit(void)  __attribute__((optimize(0)));
void NutTimerInit(void)
{
#ifdef __NUT_EMULATION__
    gettimeofday( &timeStart, NULL );
#else
    NutRegisterTimer(NutTimerIntr);
    NutEnableTimerIrq();
//Not Used     /* Remember the CPU clock for which the loop counter is valid. */
//Not Used     nut_delay_loops_clk = NutGetCpuClock();
#if !defined(NUT_DELAYLOOPS)
#ifndef NUT_TICK_FREQ
#define NUT_TICK_FREQ   1000UL
#endif
    {
        /* Wait for the next tick. */
Beispiel #13
0
 void visitDrop(Drop* curr) {
   // optimize the dropped value, maybe leaving nothing
   curr->value = optimize(curr->value, false);
   if (curr->value == nullptr) {
     ExpressionManipulator::nop(curr);
     return;
   }
   // a drop of a tee is a set
   if (auto* set = curr->value->dynCast<SetLocal>()) {
     assert(set->isTee());
     set->setTee(false);
     replaceCurrent(set);
     return;
   }
   // if we are dropping a block's return value, we might be able to remove it entirely
   if (auto* block = curr->value->dynCast<Block>()) {
     auto* last = block->list.back();
     if (isConcreteWasmType(last->type)) {
       assert(block->type == last->type);
       last = optimize(last, false);
       if (!last) {
         // we may be able to remove this, if there are no brs
         bool canPop = true;
         if (block->name.is()) {
           BreakSeeker breakSeeker(block->name);
           Expression* temp = block;
           breakSeeker.walk(temp);
           if (breakSeeker.found && breakSeeker.valueType != none) {
             canPop = false;
           }
         }
         if (canPop) {
           block->list.back() = last;
           block->list.pop_back();
           block->type = none;
           // we don't need the drop anymore, let's see what we have left in the block
           if (block->list.size() > 1) {
             replaceCurrent(block);
           } else if (block->list.size() == 1) {
             replaceCurrent(block->list[0]);
           } else {
             ExpressionManipulator::nop(curr);
           }
           return;
         }
       }
     }
   }
   // sink a drop into an arm of an if-else if the other arm ends in an unreachable, as it if is a branch, this can make that branch optimizable and more vaccuming possible
   auto* iff = curr->value->dynCast<If>();
   if (iff && iff->ifFalse && isConcreteWasmType(iff->type)) {
     // reuse the drop in both cases
     if (iff->ifTrue->type == unreachable) {
       assert(isConcreteWasmType(iff->ifFalse->type));
       curr->value = iff->ifFalse;
       iff->ifFalse = curr;
       iff->type = none;
       replaceCurrent(iff);
     } else if (iff->ifFalse->type == unreachable) {
       assert(isConcreteWasmType(iff->ifTrue->type));
       curr->value = iff->ifTrue;
       iff->ifTrue = curr;
       iff->type = none;
       replaceCurrent(iff);
     }
   }
 }
Beispiel #14
0
int linkExecutable(const char* argv0)
{
    Logger::println("*** Linking executable ***");

    // error string
    std::string errstr;

    // find the llvm-ld program
	llvm::sys::Path ldpath = llvm::sys::Program::FindProgramByName("llvm-ld");
    if (ldpath.isEmpty())
    {
		ldpath.set("llvm-ld");
    }

    // build arguments
    std::vector<const char*> args;

    // first the program name ??
    args.push_back("llvm-ld");

    // output filename
    std::string exestr;
    if (global.params.exefile)
    {   // explicit
        exestr = global.params.exefile;
    }
    else
    {   // inferred
        // try root module name
        if (Module::rootModule)
            exestr = Module::rootModule->toChars();
        else
            exestr = "a.out";
    }
    if (global.params.os == OSWindows && !(exestr.substr(exestr.length()-4) == ".exe"))
        exestr.append(".exe");

    std::string outopt = "-o=" + exestr;
    args.push_back(outopt.c_str());

    // set the global gExePath
    gExePath.set(exestr);
    assert(gExePath.isValid());

    // create path to exe
    llvm::sys::Path exedir(gExePath);
    exedir.set(gExePath.getDirname());
    if (!exedir.exists())
    {
        exedir.createDirectoryOnDisk(true, &errstr);
        if (!errstr.empty())
        {
            error("failed to create path to linking output: %s\n%s", exedir.c_str(), errstr.c_str());
            fatal();
        }
    }

    // strip debug info
    if (!global.params.symdebug)
        args.push_back("-strip-debug");

    // optimization level
    if (!optimize())
        args.push_back("-disable-opt");
    else
    {
        switch(optLevel())
        {
        case 0:
            args.push_back("-disable-opt");
            break;
        case 1:
            args.push_back("-globaldce");
            args.push_back("-disable-opt");
            args.push_back("-globaldce");
            args.push_back("-mem2reg");
        case 2:
        case 3:
        case 4:
        case 5:
            // use default optimization
            break;
        default:
            assert(0);
        }
    }

    // inlining
    if (!(global.params.useInline || doInline()))
    {
        args.push_back("-disable-inlining");
    }

    // additional linker switches
    for (int i = 0; i < global.params.linkswitches->dim; i++)
    {
        char *p = (char *)global.params.linkswitches->data[i];
        args.push_back(p);
    }

    // native please
    args.push_back("-native");


    // user libs
    for (int i = 0; i < global.params.libfiles->dim; i++)
    {
        char *p = (char *)global.params.libfiles->data[i];
        args.push_back(p);
    }

    // default libs
    switch(global.params.os) {
    case OSLinux:
    case OSMacOSX:
        args.push_back("-ldl");
    case OSFreeBSD:
        args.push_back("-lpthread");
        args.push_back("-lm");
        break;
    case OSHaiku:
        args.push_back("-lroot");
        break;
    case OSWindows:
        // FIXME: I'd assume kernel32 etc
        break;
    }

    // object files
    for (int i = 0; i < global.params.objfiles->dim; i++)
    {
        char *p = (char *)global.params.objfiles->data[i];
        args.push_back(p);
    }

    // print link command?
    if (!quiet || global.params.verbose)
    {
        // Print it
        for (int i = 0; i < args.size(); i++)
            printf("%s ", args[i]);
        printf("\n");
        fflush(stdout);
    }

    // terminate args list
    args.push_back(NULL);

    // try to call linker!!!
    if (int status = llvm::sys::Program::ExecuteAndWait(ldpath, &args[0], NULL, NULL, 0,0, &errstr))
    {
        error("linking failed:\nstatus: %d", status);
        if (!errstr.empty())
            error("message: %s", errstr.c_str());
        return status;
    }

    return 0;
}
Beispiel #15
0
 void visitReturn(Return* curr) {
   optimize(curr, curr->value);
 }
Beispiel #16
0
 * client requests. Must be called before any client code is run.
 */
void DRD_(clientreq_init)(void)
{
   VG_(needs_client_requests)(handle_client_request);
}

/**
 * DRD's handler for Valgrind client requests. The code below handles both
 * DRD's public and tool-internal client requests.
 */
#if defined(VGP_mips32_linux)
 /* There is a cse related issue in gcc for MIPS. Optimization level
    has to be lowered, so cse related optimizations are not
    included. */
 __attribute__((optimize("O1")))
#endif
static Bool handle_client_request(ThreadId vg_tid, UWord* arg, UWord* ret)
{
   UWord result = 0;
   const DrdThreadId drd_tid = DRD_(thread_get_running_tid)();

   tl_assert(vg_tid == VG_(get_running_tid()));
   tl_assert(DRD_(VgThreadIdToDrdThreadId)(vg_tid) == drd_tid);

   switch (arg[0])
   {
   case VG_USERREQ__MALLOCLIKE_BLOCK:
      if (DRD_(g_free_is_write)) {
         GenericErrInfo GEI = {
            .tid = DRD_(thread_get_running_tid)(),
Beispiel #17
0
 void visitBinary(Binary* curr) {
   optimize(curr, curr->right, optimize(curr, curr->left), &curr->left);
 }
Beispiel #18
0
/* { dg-options "-mips16" } */

void bar (void) {}

void __attribute__((optimize("schedule-insns")))
foo (void)
{
}
Beispiel #19
0
 void visitSelect(Select* curr) {
   optimize(curr, curr->condition, optimize(curr, curr->ifFalse, optimize(curr, curr->ifTrue), &curr->ifTrue), &curr->ifTrue, &curr->ifFalse);
 }
Beispiel #20
0
    volatile unsigned long _CFSR ;
    volatile unsigned long _HFSR ;
    volatile unsigned long _DFSR ;
    volatile unsigned long _AFSR ;
    volatile unsigned long _BFAR ;
    volatile unsigned long _MMAR ;
} FaultInformation;

/** Global instance so that it can be added to the watch expressions */
volatile FaultInformation faultInfo;

/** Decode the stack state prior to an exception occuring.  This code is
 * inspired by FreeRTOS.
 * @param address address of the stack
 */
__attribute__((optimize("-O0"))) void hard_fault_handler_c( unsigned long *hardfault_args )
{
    /* force a reference in the local variables for debug */
    volatile FaultInformation *fault_info = &faultInfo;

    fault_info->stacked_r0 = ((unsigned long)hardfault_args[0]) ;
    fault_info->stacked_r1 = ((unsigned long)hardfault_args[1]) ;
    fault_info->stacked_r2 = ((unsigned long)hardfault_args[2]) ;
    fault_info->stacked_r3 = ((unsigned long)hardfault_args[3]) ;
    fault_info->stacked_r12 = ((unsigned long)hardfault_args[4]) ;
    fault_info->stacked_lr = ((unsigned long)hardfault_args[5]) ;
    fault_info->stacked_pc = ((unsigned long)hardfault_args[6]) ;
    fault_info->stacked_psr = ((unsigned long)hardfault_args[7]) ;

    // Configurable Fault Status Register
    // Consists of MMSR, BFSR and UFSR
Beispiel #21
0
 void visitSwitch(Switch* curr) {
   optimize(curr, curr->condition, optimize(curr, curr->value), &curr->value);
 }
Beispiel #22
0
// { dg-do run }

int *
__attribute__((optimize(("-O0"))))
fn1 (int *a)
{
  return a;
}

void
fn2 ()
{
  for (int i = 0; i < 10; i++)
    {
      int *a;
      (a) = fn1 (a);
    }
}

int main()
{
  fn2();
}
Beispiel #23
0
 void visitCallIndirect(CallIndirect* curr) {
   auto* outer = optimize(curr, curr->target);
   if (EffectAnalyzer(curr->target).hasSideEffects()) return;
   handleCall(curr, outer);
 }
Beispiel #24
0
 void visitUnary(Unary* curr) {
   optimize(curr, curr->value);
 }
Beispiel #25
0
    int len=read( fd, buffer, IN_EVENT_BUF_LEN );
    int i=0;
    while (i<len){
        struct inotify_event *event = ( struct inotify_event * ) &buffer[ i ];
        if (event->mask & IN_OPEN) {
            inotify_q.push(IN_OPEN);
        }
        if (event->mask & IN_CLOSE) {
            inotify_q.push(IN_CLOSE);
        }

        i += IN_EVENT_SIZE + event->len;
    }
}

void __attribute__((optimize("O0"))) VPOutPluginAlsa::rewind()
{
    if (!ATOMIC_CAS(&paused,false,false) ){
        m_pause.lock();
        ATOMIC_CAS(&pause_check,false,true);
        while (!ATOMIC_CAS(&paused,false,false)) {}
    }
}

void __attribute__((optimize("O0"))) VPOutPluginAlsa::resume()
{
    if (ATOMIC_CAS(&paused,false,false) ){

        m_pause.unlock();
        while (ATOMIC_CAS(&paused,false,false)) {}
    }
Beispiel #26
0
 void visitSetLocal(SetLocal* curr) {
   optimize(curr, curr->value);
 }
Beispiel #27
0
/* { dg-do compile } */
/* { dg-options "-O0 -fno-omit-frame-pointer -fno-inline --save-temps" } */

void
leaf (void)
{
  int a = 0;
}

__attribute__ ((optimize("omit-frame-pointer")))
void
non_leaf_1 (void)
{
  leaf ();
}

__attribute__ ((optimize("omit-frame-pointer")))
void
non_leaf_2 (void)
{
  leaf ();
}

/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */

/* { dg-final { cleanup-saved-temps } } */
Beispiel #28
0
 void visitLoad(Load* curr) {
   optimize(curr, curr->ptr);
 }
Beispiel #29
0
        uint8_t m = 0;
        uint8_t l = 0;
        uint8_t u = 91;

        while (!(pgm_read_byte(&cos_tab[m]) == key) && (l <= u)) {
                m = (l + u);
		m >>= 1;
                if(pgm_read_byte(&cos_tab[m]) > key) l = m + 1;
                if(pgm_read_byte(&cos_tab[m]) < key) u = m - 1;
        }
        return m;
}

// square root, abacus algorithm

__attribute__((optimize("O2"))) static uint16_t sqrt_int(uint32_t x)
{
    uint32_t res = 0;
    uint32_t bit = 1L << 30; // The second-to-x bit is set


    // "mask" starts at the highest power of four <= than the argument.
    while (bit > x)
    {
        bit >>= 2;
    }

    while (bit != 0)
    {
        if (x >= res + bit)
        {
Beispiel #30
0
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * WITH THE SOFTWARE.
 */

#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
#define NDEBUG
#endif

#include <math.h>
#include <assert.h>

#include "ekf_math.h"

__attribute__((optimize("-O3"))) void mat_init_zero(MATRIX out)
{
    int i, j;

    for (i = 0; i < MAT_ROWS(out); i++) {
	for (j = 0; j < MAT_COLS(out); j++) {
	    MAT_ELEMENT(out,i,j) = 0.0;
	}
    }
}

__attribute__((optimize("-O3"))) void mat_init_identity(MATRIX out)
{
    int i, j;

    for (i = 0; i < MAT_ROWS(out); i++) {