*******************************************************************************

    Timer based on the cycle counter

*******************************************************************************

void timeit_start(timeit_t t)
void timeit_stop(timeit_t t)

    Gives wall and user time - useful for parallel programming.

    Example usage:
    \begin{lstlisting}[language=c]
    timeit_t t0;
    
    // ...
    
    timeit_start(t0);
    
    // do stuff, take some time
    
    timeit_stop(t0);
    
    flint_printf("cpu = %wd ms  wall = %wd ms\n", t0->cpu, t0->wall);
    \end{lstlisting}

void start_clock(int n)
void stop_clock(int n)
double get_clock(int n)

    Gives time based on cycle counter.

    First one must ensure the processor speed in cycles per second
    is set correctly in \code{profiler.h}, in the macro definition 
    \code{#define FLINT_CLOCKSPEED}.

    One can access the cycle counter directly by \code{get_cycle_counter()}
    which returns the current cycle counter as a \code{double}.

    A sample usage of clocks is:
    \begin{lstlisting}[language=c]
    init_all_clocks();
    
    start_clock(n);
    
    // do something
    
    stop_clock(n);
    
    flint_printf("Time in seconds is %f.3\n", get_clock(n));
    \end{lstlisting}
    where \code{n} is a clock number (from 0-19 by default). The number of 
    clocks can be changed by altering \code{FLINT_NUM_CLOCKS}. One can also 
    initialise an individual clock with \code{init_clock(n)}.

*******************************************************************************

    Framework for repeatedly sampling a single target

*******************************************************************************

void prof_repeat(double *min, double *max, profile_target_t target, void *arg)

    Allows one to automatically time a given function. Here is a sample usage:

    Suppose one has a function one wishes to profile:
    \begin{lstlisting}[language=c]
    void myfunc(ulong a, ulong b);
    \end{lstlisting}
    One creates a struct for passing arguments to our function:
    \begin{lstlisting}[language=c]
    typedef struct 
    {
        ulong a, b;
    } myfunc_t;
    \end{lstlisting}
    a sample function:
    \begin{lstlisting}[language=c]
    void sample_myfunc(void * arg, ulong count)
    {
        myfunc_t * params = (myfunc_t *) arg;

        ulong a = params->a;
        ulong b = params->b;

        for (ulong i = 0; i < count; i++)
        {
            prof_start();
            myfunc(a, b);
            prof_stop();
        }
    }
    \end{lstlisting}
    Then we do the profile
    \begin{lstlisting}[language=c]
    double min, max;

    myfunc_t params;

    params.a = 3;
    params.b = 4;

    prof_repeat(&min, &max, sample_myfunc, &params);
    
    flint_printf("Min time is %lf.3s, max time is %lf.3s\n", min, max);
    \end{lstlisting}

    If either of the first two parameters to \code{prof_repeat} are 
    \code{NULL}, that value is not stored.

    One may set the minimum time in microseconds for a timing run by 
    adjusting\\ \code{DURATION_THRESHOLD} and one may set a target duration 
    in microseconds by adjusting \code{DURATION_TARGET} in \code{profiler.h}.

*******************************************************************************

    Memory usage

*******************************************************************************

void get_memory_usage(meminfo_t meminfo)

    Obtains information about the memory usage of the current process.
    The meminfo object contains the slots \code{size} (virtual memory size),
    \code{peak} (peak virtual memory size), \code{rss} (resident set size),
    \code{hwm} (peak resident set size). The values are stored in kilobytes
    (1024 bytes). This function currently only works on Linux.

*******************************************************************************

    Simple profiling macros

*******************************************************************************

macro TIMEIT_REPEAT(timer, reps)
macro TIMEIT_END_REPEAT(timer, reps)

    Repeatedly runs the code between the \code{TIMEIT_REPEAT} and the
    \code{TIMEIT_END_REPEAT} markers, automatically increasing the number of
    repetitions until the elapsed time exceeds the timer resolution.
    The macro takes as input a predefined \code{timeit_t} object
    and an integer variable to hold the number of repetitions.

macro TIMEIT_START
macro TIMEIT_STOP

    Repeatedly runs the code between the \code{TIMEIT_START} and the
    \code{TIMEIT_STOP}
    markers, automatically increasing the number of repetitions until the
    elapsed time exceeds the timer resolution, and then prints the average
    elapsed cpu and wall time for a single repetition.

macro TIMEIT_ONCE_START
macro TIMEIT_ONCE_STOP

    Runs the code between the \code{TIMEIT_ONCE_START} and the
    \code{TIMEIT_ONCE_STOP}
    markers exactly once and then prints the elapsed cpu and wall time.
    This does not give a precise measurement if the elapsed time is short
    compared to the timer resolution.

macro SHOW_MEMORY_USAGE

    Retrieves memory usage information via \code{get_memory_usage}
    and prints the results.

