Gotos in restricted functions

Gotos are handy to write efficient, compact, and easy to write finite state machines. This is a little example in D language, it generates the "Look and say sequence" (from: http://rosettacode.org/wiki/Look-and-say_sequence#Fast_Imperative_Version ), and it's quite fast:

import core.stdc.stdio, std.math, std.conv, std.algorithm, std.array;
 
void showLookAndSay(bool showArrays)(in uint n) nothrow {
    if (n == 0) // No sequences to generate and show.
        return;
 
    enum Digit : char { nil = '\0', one = '1', two = '2', thr = '3' }
 
    // Allocate an approximate upper bound size for the array.
    static Digit* allocBuffer(in uint m) nothrow {
        immutable len = cast(size_t)(100 + 1.05 *
                                     exp(0.269 * m + 0.2686)) + 1;
        auto a = len.uninitializedArray!(Digit[]);
        printf("Allocated %d bytes.\n", a.length * Digit.sizeof);
        return a.ptr;
    }
 
    // Can't be expressed in the D type system:
    // a1 and a2 are immutable pointers to mutable data.
    auto a1 = allocBuffer(n % 2 ? n : n - 1);
    auto a2 = allocBuffer(n % 2 ? n - 1 : n);
    printf("\n");
 
    a1[0] = Digit.one;
    size_t len1 = 1;
    a1[len1] = Digit.nil;
 
    foreach (immutable i; 0 .. n - 1) {
        static if (showArrays)
            printf("%2u: %s\n", i + 1, a1);
        else
            printf("%2u: n. digits: %u\n", i + 1, len1);
        auto p1 = a1,
             p2 = a2;
 
        S0: final switch (*p1++) with (Digit) { // Initial state.
                case nil: goto END;
                case one: goto S1;
                case two: goto S2;
                case thr: goto S3;
            }
        S1: final switch (*p1++) with (Digit) {
                case nil: *p2++ = one; *p2++ = one; goto END;
                case one: goto S11;
                case two: *p2++ = one; *p2++ = one; goto S2;
                case thr: *p2++ = one; *p2++ = one; goto S3;
            }
        S2: final switch (*p1++) with (Digit) {
                case nil: *p2++ = one; *p2++ = two; goto END;
                case one: *p2++ = one; *p2++ = two; goto S1;
                case two: goto S22;
                case thr: *p2++ = one; *p2++ = two; goto S3;
            }
        S3: final switch (*p1++) with (Digit) {
                case nil: *p2++ = one; *p2++ = thr; goto END;
                case one: *p2++ = one; *p2++ = thr; goto S1;
                case two: *p2++ = one; *p2++ = thr; goto S2;
                case thr: goto S33;
            }
        S11: final switch (*p1++) with (Digit) {
                case nil: *p2++ = two; *p2++ = one; goto END;
                case one: *p2++ = thr; *p2++ = one; goto S0;
                case two: *p2++ = two; *p2++ = one; goto S2;
                case thr: *p2++ = two; *p2++ = one; goto S3;
            }
        S22: final switch (*p1++) with (Digit) {
                case nil: *p2++ = two; *p2++ = two; goto END;
                case one: *p2++ = two; *p2++ = two; goto S1;
                case two: *p2++ = thr; *p2++ = two; goto S0;
                case thr: *p2++ = two; *p2++ = two; goto S3;
            }
        S33: final switch (*p1++) with (Digit) {
                case nil: *p2++ = two; *p2++ = thr; goto END;
                case one: *p2++ = two; *p2++ = thr; goto S1;
                case two: *p2++ = two; *p2++ = thr; goto S2;
                case thr: *p2++ = thr; *p2++ = thr; goto S0;
            }
        END:
            immutable len2 = p2 - a2;
            a2[len2] = Digit.nil;
            a1.swap(a2);
            len1 = len2;
    }
 
    static if (showArrays)
        printf("%2u: %s\n", n, a1);
    else
        printf("%2u: n. digits: %u\n", n, len1);
}
 
void main(in string[] args) {
    immutable n = (args.length == 2) ? args[1].to!uint : 10;
    n.showLookAndSay!true;
}

If you are using GNU-C you can write a little more efficient code, with computed gotos and the builtin_expect:

void audioactive_loop(int n) {
    unsigned long len1, len2, i;
    unsigned char *s1, *p1, *s2, *p2, *pend, *paux;
    const void *labelsS123[] = {&&S1, &&S2, &&S3}; // Comment this out if you don't use GCC

    // ...
    
    len1 = 1;
    s1[0] = '1';
    s1[len1] = '\0';

    for (i = 0; i < n - 1; i++) {
        if (DOPRINT)
            printf("%d: %s\n", i+1, s1);
        else
            printf("%d: size=%d\n", i+1, len1);

        p1 = s1;
        p2 = s2;
        pend = s1 + len1;

        S0:
            if (__builtin_expect(p1 == pend, 0)) goto END;
            goto *labelsS123[*p1++ - '1'];

        S1:
            if (__builtin_expect(p1 == pend, 0)) { *p2++ = '1'; *p2++ = '1'; goto END; }
            switch (*p1++) {
                case '1': goto S11;
                case '2': *p2++ = '1'; *p2++ = '1'; goto S2;
                case '3': *p2++ = '1'; *p2++ = '1'; goto S3;
            }

    // ...

Regarding computed gotos they are handy to write simple but sufficiently efficient interpreters (and if you don't want to create a JIT or to use something like PyPy). For a little self-contained example of an interpreter you can see the solutions to the ICFP 2006 contest:

http://fileformats.archiveteam.org/wiki/Universal_Machine_(ICFP_programming_contest_2006)

This is an efficient solution in GNU-C:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint32;
typedef unsigned long long uint64;

#define CompileTime_Assert( x ) extern char __ctassert[(x)];

uint32 const BUF_SIZE = 4*1024*1024;
uint32 const MIN_ALLOC = 8;

struct OPTYPE
{
    void * m_op;
    uint32 *__restrict m_A;
    uint32 *__restrict m_B;
};


uint32 s_buffer[BUF_SIZE*MIN_ALLOC];
uint32 *s_free = 0;

inline uint32 *allocate( uint32 _size )
{
  uint32 *ptr;
  
  if( (_size < MIN_ALLOC) && s_free )
  {
    ptr = s_free;
    s_free = (uint32*)*ptr;
    *ptr = _size;
        CompileTime_Assert( MIN_ALLOC == 8 ); // unroll
        ptr[1] = 0;
        ptr[2] = 0;
        ptr[3] = 0;
        ptr[4] = 0;
        ptr[5] = 0;
        ptr[6] = 0;
        ptr[7] = 0;
  }
  else
  {
    ptr = new uint32[_size+1];
    *ptr = _size;
    memset(ptr+1,0,_size*4);
  }
  return ptr+1;
}

inline void destroy( void * _ptr )
{
  uint32 *ptr = ((uint32*)_ptr)-1;
  if( ((uint32)(ptr-s_buffer))<(BUF_SIZE*MIN_ALLOC) )
  {
    *ptr = (uint)s_free;
    s_free = ptr;
  }
  else
  {
    delete[] ptr;
  }
}

CompileTime_Assert( sizeof(uint32) == sizeof(uint32 *) );

#define A (*ip->m_A)
#define B (*ip->m_B)
#define C (*ip->m_C)
    
class VM
{    
public:
    static void Go( uint32 *_arr0 )
    {        
        void * const labels[] = 
        {
            &&HALT,
            &&HALT,
            &&HALT,
            &&HALT,
            &&HALT,
            &&HALT,
            &&HALT,
            &&HALT,
            &&OP_08,
            &&OP_09,
            &&OP_10,
            &&OP_11,
            &&OP_12
        };

        void * const OP00[] = 
    {
      &&OP_00_0,
      &&OP_00_1,
      &&OP_00_2,
      &&OP_00_3,
      &&OP_00_4,
      &&OP_00_5,
      &&OP_00_6,
      &&OP_00_7,
    };

        void * const OP01[] = 
    {
      &&OP_01_0,
      &&OP_01_1,
      &&OP_01_2,
      &&OP_01_3,
      &&OP_01_4,
      &&OP_01_5,
      &&OP_01_6,
      &&OP_01_7,
    };

        void * const OP02[] = 
    {
      &&OP_02_0,
      &&OP_02_1,
      &&OP_02_2,
      &&OP_02_3,
      &&OP_02_4,
      &&OP_02_5,
      &&OP_02_6,
      &&OP_02_7,
    };

        void * const OP03[] = 
    {
      &&OP_03_0,
      &&OP_03_1,
      &&OP_03_2,
      &&OP_03_3,
      &&OP_03_4,
      &&OP_03_5,
      &&OP_03_6,
      &&OP_03_7,
    };

        void * const OP04[] = 
    {
      &&OP_04_0,
      &&OP_04_1,
      &&OP_04_2,
      &&OP_04_3,
      &&OP_04_4,
      &&OP_04_5,
      &&OP_04_6,
      &&OP_04_7,
    };

        void * const OP05[] = 
    {
      &&OP_05_0,
      &&OP_05_1,
      &&OP_05_2,
      &&OP_05_3,
      &&OP_05_4,
      &&OP_05_5,
      &&OP_05_6,
      &&OP_05_7,
    };

        void * const OP06[] = 
    {
      &&OP_06_0,
      &&OP_06_1,
      &&OP_06_2,
      &&OP_06_3,
      &&OP_06_4,
      &&OP_06_5,
      &&OP_06_6,
      &&OP_06_7,
    };

        void * const OP13[] = 
    {
      &&OP_13_0,
      &&OP_13_1,
      &&OP_13_2,
      &&OP_13_3,
      &&OP_13_4,
      &&OP_13_5,
      &&OP_13_6,
      &&OP_13_7,
    };

        uint32 __restrict reg[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
        
        OPTYPE * __restrict insns = new OPTYPE[(_arr0[-1]+3)&~3];
        {
            for(uint i = 0; i < _arr0[-1]; i+=4 )
            {
        insns[i+0].m_op = &&XLATE;
        insns[i+1].m_op = &&XLATE;
        insns[i+2].m_op = &&XLATE;
        insns[i+3].m_op = &&XLATE;
            }
        }
        register OPTYPE * __restrict ip = insns;
                        
// JIT compile
XLATE:
    {
      uint32 const op = _arr0[ip-insns];
      uint32 const opcode = (op>>28);
      
      switch( opcode )
      {          
        case 0:
             ip->m_op = OP00[((op>>6)&7)];
          ip->m_A = reg+ ((op>>3)&7);
             ip->m_B = reg+ (op&7);
          goto *ip->m_op;       
             
        case 1:
             ip->m_op = OP01[((op>>3)&7)];
          ip->m_A = reg+ ((op>>6)&7);
             ip->m_B = reg+ (op&7);
          goto *ip->m_op;          

        case 2:
             ip->m_op = OP02[((op>>6)&7)];
          ip->m_A = reg+ ((op>>3)&7);
             ip->m_B = reg+ (op&7);
          goto *ip->m_op;          

        case 3:
             ip->m_op = OP03[((op>>6)&7)];
          ip->m_A = reg+ ((op>>3)&7);
             ip->m_B = reg+ (op&7);
          goto *ip->m_op;       

        case 4:
             ip->m_op = OP04[((op>>6)&7)];
          ip->m_A = reg+ ((op>>3)&7);
             ip->m_B = reg+ (op&7);
          goto *ip->m_op;       

        case 5:
             ip->m_op = OP05[((op>>6)&7)];
          ip->m_A = reg+ ((op>>3)&7);
             ip->m_B = reg+ (op&7);
          goto *ip->m_op;       

        case 6:
             ip->m_op = OP06[((op>>6)&7)];
          ip->m_A = reg+ ((op>>3)&7);
             ip->m_B = reg+ (op&7);
          goto *ip->m_op;       
          
        case 13:
            ip->m_op = OP13[(op>>25)&7];        
          ip->m_A = (uint32*)(_arr0[ip-insns]&((1<<25)-1));
          goto *ip->m_op;
        
        default:
             ip->m_op = labels[opcode];
          ip->m_A = reg+ ((op>>3)&7);
             ip->m_B = reg+ (op&7);
          goto *ip->m_op;          
      }
    }
        
        // ----------------------------------------
OP_00_0:
            if( !B ) { goto *(++ip)->m_op; }
      reg[0] = A;
            goto *(++ip)->m_op;                  
OP_03_0:
      reg[0] = A + B;
            goto *(++ip)->m_op;
OP_04_0:
      reg[0] = A * B;
            goto *(++ip)->m_op;
OP_05_0:
      reg[0] = A / B;
            goto *(++ip)->m_op;
OP_06_0:
            reg[0] = ~(A & B);
            goto *(++ip)->m_op;

OP_00_1:
            if( !B ) { goto *(++ip)->m_op; }
      reg[1] = A;
            goto *(++ip)->m_op;                  
OP_03_1:
      reg[1] = A + B;
            goto *(++ip)->m_op;
OP_04_1:
      reg[1] = A * B;
            goto *(++ip)->m_op;
OP_05_1:
      reg[1] = A / B;
            goto *(++ip)->m_op;
OP_06_1:
            reg[1] = ~(A & B);
            goto *(++ip)->m_op;

OP_00_2:
            if( !B ) { goto *(++ip)->m_op; }
      reg[2] = A;
            goto *(++ip)->m_op;                  
OP_03_2:
      reg[2] = A + B;
            goto *(++ip)->m_op;
OP_04_2:
      reg[2] = A * B;
            goto *(++ip)->m_op;
OP_05_2:
      reg[2] = A / B;
            goto *(++ip)->m_op;
OP_06_2:
            reg[2] = ~(A & B);
            goto *(++ip)->m_op;

OP_00_3:
            if( !B ) { goto *(++ip)->m_op; }
      reg[3] = A;
            goto *(++ip)->m_op;                  
OP_03_3:
      reg[3] = A + B;
            goto *(++ip)->m_op;
OP_04_3:
      reg[3] = A * B;
            goto *(++ip)->m_op;
OP_05_3:
      reg[3] = A / B;
            goto *(++ip)->m_op;
OP_06_3:
            reg[3] = ~(A & B);
            goto *(++ip)->m_op;
      
OP_00_4:
            if( !B ) { goto *(++ip)->m_op; }
      reg[4] = A;
            goto *(++ip)->m_op;                  
OP_03_4:
      reg[4] = A + B;
            goto *(++ip)->m_op;
OP_04_4:
      reg[4] = A * B;
            goto *(++ip)->m_op;
OP_05_4:
      reg[4] = A / B;
            goto *(++ip)->m_op;
OP_06_4:
            reg[4] = ~(A & B);
            goto *(++ip)->m_op;

OP_00_5:
            if( !B ) { goto *(++ip)->m_op; }
      reg[5] = A;
            goto *(++ip)->m_op;                  
OP_03_5:
      reg[5] = A + B;
            goto *(++ip)->m_op;
OP_04_5:
      reg[5] = A * B;
            goto *(++ip)->m_op;
OP_05_5:
      reg[5] = A / B;
            goto *(++ip)->m_op;
OP_06_5:
            reg[5] = ~(A & B);
            goto *(++ip)->m_op;
      
OP_00_6:
            if( !B ) { goto *(++ip)->m_op; }
      reg[6] = A;
            goto *(++ip)->m_op;                  
OP_03_6:
      reg[6] = A + B;
            goto *(++ip)->m_op;
OP_04_6:
      reg[6] = A * B;
            goto *(++ip)->m_op;
OP_05_6:
      reg[6] = A / B;
            goto *(++ip)->m_op;
OP_06_6:
            reg[6] = ~(A & B);
            goto *(++ip)->m_op;

OP_00_7:
            if( !B ) { goto *(++ip)->m_op; }
      reg[7] = A;
            goto *(++ip)->m_op;                  
OP_03_7:
      reg[7] = A + B;
            goto *(++ip)->m_op;
OP_04_7:
      reg[7] = A * B;
            goto *(++ip)->m_op;
OP_05_7:
      reg[7] = A / B;
            goto *(++ip)->m_op;
OP_06_7:
            reg[7] = ~(A & B);
            goto *(++ip)->m_op;
      

OP_08:
      A = (uint32)allocate(B);
            goto *(++ip)->m_op;

OP_09:
      destroy((void*)B);
            goto *(++ip)->m_op;

OP_10:
      fputc( B, stdout );
            goto *(++ip)->m_op;

OP_11:
      B = fgetc( stdin );            
            goto *(++ip)->m_op;

// ---

OP_01_0:
            A = (reg[0] ? ((uint32 *)reg[0]) : _arr0)[B] ;
            goto *(++ip)->m_op;

OP_01_1:
            A = (reg[1] ? ((uint32 *)reg[1]) : _arr0)[B] ;
            goto *(++ip)->m_op;

OP_01_2:
            A = (reg[2] ? ((uint32 *)reg[2]) : _arr0)[B] ;
            goto *(++ip)->m_op;

OP_01_3:
            A = (reg[3] ? ((uint32 *)reg[3]) : _arr0)[B] ;
            goto *(++ip)->m_op;

OP_01_4:
            A = (reg[4] ? ((uint32 *)reg[4]) : _arr0)[B] ;
            goto *(++ip)->m_op;

OP_01_5:
            A = (reg[5] ? ((uint32 *)reg[5]) : _arr0)[B] ;
            goto *(++ip)->m_op;

OP_01_6:
            A = (reg[6] ? ((uint32 *)reg[6]) : _arr0)[B] ;
            goto *(++ip)->m_op;

OP_01_7:
            A = (reg[7] ? ((uint32 *)reg[7]) : _arr0)[B] ;
            goto *(++ip)->m_op;

// ---

OP_02_SMC:
      _arr0[A] = B;
      insns[A].m_op = &&XLATE;
      goto *(++ip)->m_op;

OP_02_0:
      if( !reg[0] ) { goto OP_02_SMC; }
      ((uint32 *__restrict)reg[0])[A] = B;
      goto *(++ip)->m_op;

OP_02_1:
      if( !reg[1] ) { goto OP_02_SMC; }
      ((uint32 *__restrict)reg[1])[A] = B;
      goto *(++ip)->m_op;
      
OP_02_2:
      if( !reg[2] ) { goto OP_02_SMC; }
      ((uint32 *__restrict)reg[2])[A] = B;
      goto *(++ip)->m_op;
      
OP_02_3:
      if( !reg[3] ) { goto OP_02_SMC; }
      ((uint32 *__restrict)reg[3])[A] = B;
      goto *(++ip)->m_op;
      
OP_02_4:
      if( !reg[4] ) { goto OP_02_SMC; }
      ((uint32 *__restrict)reg[4])[A] = B;
      goto *(++ip)->m_op;
      
OP_02_5:
      if( !reg[5] ) { goto OP_02_SMC; }
      ((uint32 *__restrict)reg[5])[A] = B;
      goto *(++ip)->m_op;
      
OP_02_6:
      if( !reg[6] ) { goto OP_02_SMC; }
      ((uint32 *__restrict)reg[6])[A] = B;
      goto *(++ip)->m_op;
      
OP_02_7:
      if( !reg[7] ) { goto OP_02_SMC; }
      ((uint32 *__restrict)reg[7])[A] = B;
      goto *(++ip)->m_op;

// ---

OP_12:
             if( !A ) 
      {
        ip = insns+B;
               goto *ip->m_op;
      }

      {
        uint32 const *source = (uint32 const*)A;
        uint32 const dest = B;
                uint32 const size = source[-1];
                if( size > _arr0[-1] )
                {
                    destroy(_arr0);
                    delete[] insns;
                    _arr0 = allocate(size);
                    insns = new OPTYPE[(size+3)&~3];
                }
                memcpy( _arr0, source, size*4 );
                for(uint i = 0; i < size; i+=4 )
                {
                    insns[i+0].m_op = &&XLATE;
                    insns[i+1].m_op = &&XLATE;
                    insns[i+2].m_op = &&XLATE;
                    insns[i+3].m_op = &&XLATE;
                }
        ip = insns+dest;
        goto XLATE;
      }

// ---

OP_13_0:
            reg[0] = (uint32)ip->m_A;
            goto *(++ip)->m_op;

OP_13_1:
            reg[1] = (uint32)ip->m_A;
            goto *(++ip)->m_op;

OP_13_2:
            reg[2] = (uint32)ip->m_A;
            goto *(++ip)->m_op;

OP_13_3:
            reg[3] = (uint32)ip->m_A;
            goto *(++ip)->m_op;

OP_13_4:
            reg[4] = (uint32)ip->m_A;
            goto *(++ip)->m_op;

OP_13_5:
            reg[5] = (uint32)ip->m_A;
            goto *(++ip)->m_op;

OP_13_6:
            reg[6] = (uint32)ip->m_A;
            goto *(++ip)->m_op;

OP_13_7:
            reg[7] = (uint32)ip->m_A;
            goto *(++ip)->m_op;

HALT:
        return;
    }

    
};

int main( int argc, char const ** _argv )
{
  for(uint32 i = 0; i < (BUF_SIZE-1); i++)
  {
    s_buffer[i*MIN_ALLOC] = (uint)(&s_buffer[((i+1)*MIN_ALLOC)]);
  }
  s_buffer[(BUF_SIZE-1)*MIN_ALLOC] = 0;
  s_free = s_buffer;
  
    if( argc == 2 )
    {
        FILE *file = fopen( _argv[1], "r" );
        if( file )
        {
            fseek( file, 0, SEEK_END );
            uint32 fsize = ftell( file );
            rewind( file );
            
            uint32 size = (fsize+3)/4;
            uint32 *data = new uint32[size+1];
            data[0] = size;
            data++;
            fread( data, 1, fsize, file );

            // endian-reverse big-endian data
            for( uint32 i = 0; i < size; i++ )
            {
                data[i] = ( data[i] >> 24 )
                |            (( data[i] >> 8 ) & 0x0000FF00)
                |            (( data[i] << 8 ) & 0x00FF0000)
                |            ( data[i] << 24 );
            }
            
            fclose( file );
            VM::Go( data );
      
            // Leak data[] and let the OS clean it up after us
        }
        else
        {
            printf( "Couldn't open file %s for reading.\n", _argv[1] );
        }
    }
    else
    {
        printf("Usage: icfp filename.um\n");
    }
    
      return 0;
};