/* Copyright (c) 2010-2011 mbed.org, MIT License
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software
* and associated documentation files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or
* substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
* BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#if defined(TARGET_KL25Z) | defined(TARGET_KL46Z) | defined(TARGET_K20D5M) | defined(TARGET_K64F)

#include <stdarg.h>
#include "USBHAL.h"

// Critical section controls.  This module uses a bunch of static variables,
// and much of the code that accesses the statics can be called from either
// normal application context or IRQ context.  Whenever a shared variable is
// accessed from code that can run in an application context, we have to
// protect against interrupts by entering a critical section.  These macros
// enable and disable the USB IRQ if we're running in application context.
// (They do nothing if we're already in interrupt context, because the
// hardware interrupt controller won't generated another of the same IRQ
// that we're already handling.  We could still be interrupted by a different,
// higher-priority IRQ, but our shared variables are only shared within this
// module, so they won't be affected by other interrupt handlers.)
static bool inIRQ;
#define ENTER_CRITICAL_SECTION \
    if (!inIRQ) \
        NVIC_DisableIRQ(USB0_IRQn);
#define EXIT_CRITICAL_SECTION \
    if (!inIRQ) \
        NVIC_EnableIRQ(USB0_IRQn);

//#define DEBUG_WITH_PRINTF
// debug printf; does a regular printf() in debug mode, nothing in
// normal mode.  Note that many of our routines are called in ISR
// context, so printf should really never be used here.  But in
// practice we can get away with it enough that it can be helpful
// as a limited debugging tool.
#ifdef DEBUG_WITH_PRINTF
#define printd(fmt, ...) printf(fmt, __VA_ARGS__)
#else
#define printd(fmt, ...)
#endif

// Makeshift debug instrumentation.  This is a safer and better
// alternative to printf() that gathers event information in a 
// circular buffer for later useoutside of interrupt context, such 
// as printf() display at intervals in the main program loop.  
//
// Timing is critical to USB, so debug instrumentation is inherently 
// problematic in that it can affect the timing and thereby change 
// the behavior of what we're trying to debug.  Small timing changes
// can create new errors that wouldn't be there otherwise, or even
// accidentally fix the bug were trying to find (e.g., by changing
// the timing enough to avoid a race condition).  To minimize these 
// effects, we use a small buffer and very terse event codes - 
// generally one character per event.  That makes for a cryptic 
// debug log, but it results in almost zero timing effects, allowing
// us to see a more faithful version of the subject program.
//
// Note that the buffer size isn't critical to timing, because any
// printf()-type display should always occur in regular (non-ISR)
// context and thus won't have any significant effect on interrupt
// timing or latency.  The buffer can be expanded if longer logs
// would be helpful.  However, it is important to keep the individual
// event messages short (a character or two in most cases), because
// it takes time to move them into the buffer.  
//#define DEBUG_WITH_EVENTS
#ifdef DEBUG_WITH_EVENTS
const int nevents = 64;  // MUST BE A POWER OF 2
char events[nevents];
char ewrite = 0, eread = 0;
void HAL_DEBUG_EVENT(char c)
{
    events[ewrite] = c;
    ewrite = (ewrite+1) & (nevents-1);
    if (ewrite == eread)
        eread = (eread+1) & (nevents-1);
}
void HAL_DEBUG_EVENT(char a, char b) { 
    HAL_DEBUG_EVENT(a); HAL_DEBUG_EVENT(b); 
}
void HAL_DEBUG_EVENT(char a, char b, char c) { 
    HAL_DEBUG_EVENT(a); HAL_DEBUG_EVENT(b); HAL_DEBUG_EVENT(c); 
}
void HAL_DEBUG_EVENT(const char *s) { 
    while (*s) HAL_DEBUG_EVENT(*s++); 
}
void HAL_DEBUG_EVENTI(char c, int i) {
    HAL_DEBUG_EVENT(c);
    if (i > 1000) HAL_DEBUG_EVENT(((i / 1000) % 10) + '0');
    if (i > 100) HAL_DEBUG_EVENT(((i / 100) % 10) + '0');
    if (i > 10) HAL_DEBUG_EVENT(((i / 10) % 10) + '0');
    HAL_DEBUG_EVENT((i % 10) + '0');
}
void HAL_DEBUG_EVENTF(const char *fmt, ...) {
    va_list va; 
    va_start(va, fmt); 
    char buf[64]; 
    vsprintf(buf, fmt, va); 
    va_end(va); 
    HAL_DEBUG_EVENT(buf);
}
void HAL_DEBUG_PRINTEVENTS(const char *prefix)
{
    if (prefix != 0)
        printf("%s ", prefix);
    else
        printf("ev: ");

    char buf[nevents];
    int i;
    ENTER_CRITICAL_SECTION
    {
        for (i = 0 ; eread != ewrite ; eread = (eread+1) & (nevents - 1))
            buf[i++] = events[eread];
    }
    EXIT_CRITICAL_SECTION
    printf("%.*s\r\n", i, buf);
}
#else
#define HAL_DEBUG_EVENT(...)   void(0)
#define HAL_DEBUG_EVENTf(...)  void(0)
#define HAL_DEBUG_EVENTI(...)  void(0)
void HAL_DEBUG_PRINTEVENTS(const char *) { }
#endif


// static singleton instance pointer
USBHAL * USBHAL::instance;


// Convert physical endpoint number to register bit
#define EP(endpoint) (1<<(endpoint))

// Convert physical endpoint number to logical endpoint number.
// Each logical endpoint has two physical endpoints, one RX and 
// one TX.  The physical endpoints are numbered in RX,TX pairs,
// so the logical endpoint number is simply the physical endpoint
// number divided by 2 (discarding the remainder).
#define PHY_TO_LOG(endpoint)    ((endpoint)>>1)

// Get a physical endpoint's direction.  IN and OUT are from
// the host's perspective, so from our perspective on the device,
// IN == TX and OUT == RX.  The physical endpoints are in RX,TX
// pairs, so the OUT/RX is the even numbered element of a pair
// and the IN/TX is the odd numbered element.
#define IN_EP(endpoint)     ((endpoint) & 1U ? true : false)
#define OUT_EP(endpoint)    ((endpoint) & 1U ? false : true)

// BDT status flags, defined by the SIE hardware.  These are
// bits packed into the 'info' byte of a BDT entry.
#define BD_OWN_MASK        (1<<7)       // OWN - hardware SIE owns the BDT (TX/RX in progress)
#define BD_DATA01_MASK     (1<<6)       // DATA01 - DATA0/DATA1 bit for current TX/RX on endpoint
#define BD_KEEP_MASK       (1<<5)       // KEEP - hardware keeps BDT ownership after token completes
#define BD_NINC_MASK       (1<<4)       // NO INCREMENT - buffer location is a FIFO, so use same address for all bytes
#define BD_DTS_MASK        (1<<3)       // DATA TOGGLE SENSING - hardware SIE checks for DATA0/DATA1 match during RX/TX
#define BD_STALL_MASK      (1<<2)       // STALL - SIE issues STALL handshake in reply to any host access to endpoint

// Endpoint direction (from DEVICE perspective)
#define TX    1
#define RX    0

// Buffer parity.  The hardware has a double-buffering scheme where each
// physical endpoint has two associated BDT entries, labeled EVEN and ODD.
// We disable the double buffering, so only the EVEN buffers are used in
// this implementation.
#define EVEN  0
#define ODD   1

// Get the BDT index for a given logical endpoint, direction, and buffer parity
#define EP_BDT_IDX(logep, dir, odd) (((logep) * 4) + (2 * (dir)) + (1 *  (odd)))

// Get the BDT index for a given physical endpoint and buffer parity
#define PEP_BDT_IDX(phyep, odd)  (((phyep) * 2) + (1 * (odd)))

// Token types reported in the BDT 'info' flags.  
#define TOK_PID(idx)   ((bdt[idx].info >> 2) & 0x0F)
#define SETUP_TOKEN    0x0D
#define IN_TOKEN       0x09
#define OUT_TOKEN      0x01

// Buffer Descriptor Table (BDT) entry.  This is the hardware-defined
// memory structure for the shared memory block controlling an endpoint.
typedef struct BDT {
    uint8_t   info;       // BD[0:7]
    uint8_t   dummy;      // RSVD: BD[8:15]
    uint16_t  byte_count; // BD[16:32]
    uint32_t  address;    // Addr
} BDT;


// There are:
//    * 16 bidirectional logical endpoints -> 32 physical endpoints
//    * 2 BDT entries per endpoint (EVEN/ODD) -> 64 BDT entries
__attribute__((__aligned__(512))) BDT bdt[NUMBER_OF_PHYSICAL_ENDPOINTS * 2];

// Transfer buffers.  We allocate the transfer buffers and point the
// SIE hardware to them via the BDT.  We disable hardware SIE's
// double-buffering (EVEN/ODD) scheme, so we only allocate one buffer
// per physical endpoint.
uint8_t *endpoint_buffer[NUMBER_OF_PHYSICAL_ENDPOINTS];

// Allocated size of each endpoint buffer
size_t epMaxPacket[NUMBER_OF_PHYSICAL_ENDPOINTS];


// SET ADDRESS mode tracking.  The address assignment has to be done in a
// specific order and with specific timing defined by the USB setup protocol 
// standards.  To get the sequencing right, we set a flag when we get the
// address message, and then set the address in the SIE when we're at the 
// right subsequent packet step in the protocol exchange.  These variables
// are just a place to stash the information between the time we receive the
// data and the time we're ready to update the SIE register.
static uint8_t set_addr = 0;
static uint8_t addr = 0;

// Endpoint DATA0/DATA1 bits, packed as a bit vector.  Each endpoint's
// bit is at (1 << endpoint number).  These track the current bit value
// on the endpoint.  For TX endpoints, this is the bit for the LAST
// packet we sent (so the next packet will be the inverse).  For RX
// endpoints, this is the bit value we expect for the NEXT packet.
// (Yes, it's inconsistent.)
static volatile uint32_t Data1  = 0x55555555;

// Endpoint read/write completion flags, packed as a bit vector.  Each 
// endpoint's bit is at (1 << endpoint number).  A 1 bit signifies that
// the last read or write has completed (and hasn't had its result 
// consumed yet).
static volatile uint32_t epComplete = 0;

// Endpoint Realised flags.  We set these flags (arranged in the usual 
// endpoint bit vector format) when endpoints are realised, so that
// read/write operations will know if it's okay to proceed.  The
// control endpoint (EP0) is always realised in both directions.
static volatile uint32_t epRealised = 0x03;

static uint32_t frameNumber() 
{
    return((USB0->FRMNUML | (USB0->FRMNUMH << 8)) & 0x07FF);
}

uint32_t USBHAL::endpointReadcore(uint8_t endpoint, uint8_t *buffer) 
{
    return 0;
}

// Enabled interrupts at startup or reset:
//   TOKDN  - token done
//   SOFTOK - start-of-frame token
//   ERROR  - error
//   SLEEP  - sleep (inactivity on bus)
//   RST    - bus reset
//
// Note that don't enable RESUME (resume from suspend mode), per 
// the hardware reference manual ("When not in suspend mode this 
// interrupt must be disabled").  We also don't enable ATTACH, which
// is only meaningful in host mode.
#define BUS_RESET_INTERRUPTS \
    USB_INTEN_TOKDNEEN_MASK \
    | USB_INTEN_STALLEN_MASK \
    | USB_INTEN_SOFTOKEN_MASK \
    | USB_INTEN_ERROREN_MASK \
    | USB_INTEN_SLEEPEN_MASK \
    | USB_INTEN_USBRSTEN_MASK

// Do a low-level reset on the USB hardware module.  This lets the 
// device software initiate a hard reset.
static void resetSIE(void)
{
    // set the reset bit in the transceiver control register,
    // then wait for it to clear
    USB0->USBTRC0 |= USB_USBTRC0_USBRESET_MASK;
    while (USB0->USBTRC0 & USB_USBTRC0_USBRESET_MASK);
    
    // clear BDT entries
    for (int i = 0 ; i < sizeof(bdt)/sizeof(bdt[0]) ; ++i)
    {
        bdt[i].info = 0;
        bdt[i].byte_count = 0;
    }

    // Set BDT Base Register
    USB0->BDTPAGE1 = (uint8_t)((uint32_t)bdt>>8);
    USB0->BDTPAGE2 = (uint8_t)((uint32_t)bdt>>16);
    USB0->BDTPAGE3 = (uint8_t)((uint32_t)bdt>>24);

    // Clear interrupt flag
    USB0->ISTAT = 0xff;

    // Enable the initial set of interrupts
    USB0->INTEN = BUS_RESET_INTERRUPTS;

    // Disable weak pull downs, and turn off suspend mode
    USB0->USBCTRL = 0;

    // set the "reserved" bit in the transceiver control register
    // (hw ref: "software must set this bit to 1")
    USB0->USBTRC0 |= 0x40;
}

USBHAL::USBHAL(void) 
{
    // Disable IRQ
    NVIC_DisableIRQ(USB0_IRQn);

#if defined(TARGET_K64F)
    MPU->CESR=0;
#endif
    // fill in callback array
    epCallback[0] = &USBHAL::EP1_OUT_callback;
    epCallback[1] = &USBHAL::EP1_IN_callback;
    epCallback[2] = &USBHAL::EP2_OUT_callback;
    epCallback[3] = &USBHAL::EP2_IN_callback;
    epCallback[4] = &USBHAL::EP3_OUT_callback;
    epCallback[5] = &USBHAL::EP3_IN_callback;
    epCallback[6] = &USBHAL::EP4_OUT_callback;
    epCallback[7] = &USBHAL::EP4_IN_callback;
    epCallback[8] = &USBHAL::EP5_OUT_callback;
    epCallback[9] = &USBHAL::EP5_IN_callback;
    epCallback[10] = &USBHAL::EP6_OUT_callback;
    epCallback[11] = &USBHAL::EP6_IN_callback;
    epCallback[12] = &USBHAL::EP7_OUT_callback;
    epCallback[13] = &USBHAL::EP7_IN_callback;
    epCallback[14] = &USBHAL::EP8_OUT_callback;
    epCallback[15] = &USBHAL::EP8_IN_callback;
    epCallback[16] = &USBHAL::EP9_OUT_callback;
    epCallback[17] = &USBHAL::EP9_IN_callback;
    epCallback[18] = &USBHAL::EP10_OUT_callback;
    epCallback[19] = &USBHAL::EP10_IN_callback;
    epCallback[20] = &USBHAL::EP11_OUT_callback;
    epCallback[21] = &USBHAL::EP11_IN_callback;
    epCallback[22] = &USBHAL::EP12_OUT_callback;
    epCallback[23] = &USBHAL::EP12_IN_callback;
    epCallback[24] = &USBHAL::EP13_OUT_callback;
    epCallback[25] = &USBHAL::EP13_IN_callback;
    epCallback[26] = &USBHAL::EP14_OUT_callback;
    epCallback[27] = &USBHAL::EP14_IN_callback;
    epCallback[28] = &USBHAL::EP15_OUT_callback;
    epCallback[29] = &USBHAL::EP15_IN_callback;

    // choose usb src as PLL
    SIM->SOPT2 |= (SIM_SOPT2_USBSRC_MASK | SIM_SOPT2_PLLFLLSEL_MASK);

    // enable OTG clock
    SIM->SCGC4 |= SIM_SCGC4_USBOTG_MASK;

    // Attach IRQ
    instance = this;
    NVIC_SetVector(USB0_IRQn, (uint32_t)&_usbisr);
    NVIC_EnableIRQ(USB0_IRQn);

    // USB Module Configuration
    // Reset USB Module
    resetSIE();
}

USBHAL::~USBHAL(void) 
{
    // Free buffers
    for (int i = 0 ; i < NUMBER_OF_PHYSICAL_ENDPOINTS ; i++) 
    {
        if (endpoint_buffer[i] != NULL)
        {
            delete [] endpoint_buffer[i];
            endpoint_buffer[i] = NULL;
            epMaxPacket[i] = 0;
        }
    }
}

void USBHAL::connect(void) 
{
    // enable USB
    USB0->CTL |= USB_CTL_USBENSOFEN_MASK;
    
    // Pull up enable
    USB0->CONTROL |= USB_CONTROL_DPPULLUPNONOTG_MASK;
}

void USBHAL::disconnect(void) 
{
    // disable USB
    USB0->CTL &= ~USB_CTL_USBENSOFEN_MASK;
    
    // Pull up disable
    USB0->CONTROL &= ~USB_CONTROL_DPPULLUPNONOTG_MASK;
}

void USBHAL::hardReset(void)
{
    // reset the SIE module
    resetSIE();
    
    // do the internal reset work
    internalReset();
}

void USBHAL::configureDevice(void) 
{
    // not needed
}

void USBHAL::unconfigureDevice(void) 
{
    // not needed
}

void USBHAL::setAddress(uint8_t address) 
{
    // we don't set the address now otherwise the usb controller does not ack
    // we set a flag instead
    // see usbisr when an IN token is received
    set_addr = 1;
    addr = address;
 }

bool USBHAL::realiseEndpoint(uint8_t endpoint, uint32_t maxPacket, uint32_t flags) 
{
    // validate the endpoint number
    if (endpoint >= NUMBER_OF_PHYSICAL_ENDPOINTS)
        return false;

    // get the logical endpoint
    uint32_t log_endpoint = PHY_TO_LOG(endpoint);
    
    // Assume this is a bulk or interrupt endpoint.  For these, the hardware maximum
    // packet size is 64 bytes, and we use packet handshaking.
    uint32_t hwMaxPacket = 64;
    uint32_t handshake_flag = USB_ENDPT_EPHSHK_MASK;
    
    // If it's to be an isochronous endpoint, the hardware maximum packet size
    // increases to 1023 bytes, and we don't use handshaking.
    if (flags & ISOCHRONOUS) 
    {
        hwMaxPacket = 1023;
        handshake_flag = 0;
    }

    // limit the requested max packet size to the hardware limit
    if (maxPacket > hwMaxPacket)
        maxPacket = hwMaxPacket;
        
    ENTER_CRITICAL_SECTION
    {
        // if the endpoint buffer hasn't been allocated yet or was previously
        // allocated at a smaller size, allocate a new buffer        
        uint8_t *buf = endpoint_buffer[endpoint];
        if (buf == NULL || epMaxPacket[endpoint] < maxPacket)
        {
            // free any previous buffer
            if (buf != 0)
                delete [] buf;
    
            // allocate at the new size
            endpoint_buffer[endpoint] = buf = new uint8_t[maxPacket];
            
            // set the new max packet size
            epMaxPacket[endpoint] = maxPacket;
        }
        
        // set the endpoint register flags and BDT entry
        if (IN_EP(endpoint)) 
        {
            // IN endpt -> device to host (TX)
            USB0->ENDPOINT[log_endpoint].ENDPT |= handshake_flag | USB_ENDPT_EPTXEN_MASK;  // en TX (IN) tran
            bdt[EP_BDT_IDX(log_endpoint, TX, EVEN)].address = (uint32_t) buf;
            bdt[EP_BDT_IDX(log_endpoint, TX, ODD )].address = 0;
        }
        else 
        {
            // OUT endpt -> host to device (RX)
            USB0->ENDPOINT[log_endpoint].ENDPT |= handshake_flag | USB_ENDPT_EPRXEN_MASK;  // en RX (OUT) tran.
            bdt[EP_BDT_IDX(log_endpoint, RX, EVEN)].address = (uint32_t) buf;
            bdt[EP_BDT_IDX(log_endpoint, RX, ODD )].address = 0;
            
            // set up the first read
            bdt[EP_BDT_IDX(log_endpoint, RX, EVEN)].byte_count = maxPacket;
            bdt[EP_BDT_IDX(log_endpoint, RX, EVEN)].info       = BD_OWN_MASK | BD_DTS_MASK;
            bdt[EP_BDT_IDX(log_endpoint, RX, ODD )].info       = 0;
        }
    
        // Set DATA1 on the endpoint.  For RX endpoints, we just queued up our first
        // read, which will always be a DATA0 packet, so the next read will use DATA1.
        // For TX endpoints, we always flip the bit *before* sending the packet, so
        // (counterintuitively) we need to set the DATA1 bit now to send DATA0 in the
        // next packet.  So in either case, we want DATA1 initially.
        Data1 |= (1 << endpoint);
        
        // mark the endpoint as realised
        epRealised |= (1 << endpoint);
    }
    EXIT_CRITICAL_SECTION
    
    // success
    return true;
}

// read setup packet
void USBHAL::EP0setup(uint8_t *buffer) 
{
    uint32_t sz;
    endpointReadResult(EP0OUT, buffer, &sz);
}

// Start reading the data stage of a SETUP transaction on EP0
void USBHAL::EP0readStage(void) 
{
    if (!(bdt[0].info & BD_OWN_MASK))
    {
        Data1 &= ~1UL;  // set DATA0
        bdt[0].byte_count = MAX_PACKET_SIZE_EP0;
        bdt[0].info = (BD_DTS_MASK | BD_OWN_MASK);
    }
}

// Read an OUT packet on EP0
void USBHAL::EP0read(void) 
{
    if (!(bdt[0].info & BD_OWN_MASK))
    {
        Data1 &= ~1UL;
        bdt[0].byte_count = MAX_PACKET_SIZE_EP0;
        bdt[0].info = (BD_DTS_MASK | BD_OWN_MASK);
    }
}

uint32_t USBHAL::EP0getReadResult(uint8_t *buffer) 
{
    uint32_t sz;
    if (endpointReadResult(EP0OUT, buffer, &sz) == EP_COMPLETED) {
        return sz;
    }
    else {
        return 0;
    }
}

void USBHAL::EP0write(const volatile uint8_t *buffer, uint32_t size) 
{
    endpointWrite(EP0IN, buffer, size);
}

void USBHAL::EP0getWriteResult(void) 
{
}

void USBHAL::EP0stall(void) 
{
    stallEndpoint(EP0OUT);
}

EP_STATUS USBHAL::endpointRead(uint8_t endpoint, uint32_t maximumSize) 
{
    // We always start a new read when we fetch the result of the
    // previous read, so we don't have to do anything here.  Simply
    // indicate that the read is pending so that the caller can proceed
    // to check the results.
    return EP_PENDING;
}

EP_STATUS USBHAL::endpointReadResult(uint8_t endpoint, uint8_t *buffer, uint32_t *bytesRead) 
{
    // validate the endpoint number and direction, and make sure it's realised
    if (endpoint >= NUMBER_OF_PHYSICAL_ENDPOINTS || !OUT_EP(endpoint))
        return EP_INVALID;

    // get the logical endpoint
    uint32_t log_endpoint = PHY_TO_LOG(endpoint);
    
    // get the mode - it's isochronous if it doesn't have the handshake flag
    bool iso = (USB0->ENDPOINT[log_endpoint].ENDPT & USB_ENDPT_EPHSHK_MASK) == 0;
    
    // get the BDT index
    int idx = EP_BDT_IDX(log_endpoint, RX, 0);
        
    // Check to see if the endpoint is ready to read
    if (log_endpoint == 0)
    {
        // control endpoint - just make sure we own the BDT
        if (bdt[idx].info & BD_OWN_MASK)
            return EP_PENDING;
    }
    else
    {
        // If it's not isochronous, check to see if we've received data, and
        // return PENDING if not.  Isochronous endpoints don't use the TOKNE 
        // interrupt (they use SOF instead), so the 'complete' flag doesn't
        // apply if it's an iso endpoint.
        if (!iso && !(epComplete & EP(endpoint)))
            return EP_PENDING;
    }

    EP_STATUS result = EP_INVALID;    
    ENTER_CRITICAL_SECTION
    {
        // proceed only if the endpoint has been realised
        if (epRealised & EP(endpoint))
        {
            // note if we have a SETUP token
            bool setup = (log_endpoint == 0 && TOK_PID(idx) == SETUP_TOKEN);
        
            // get the received data buffer and size
            uint8_t *ep_buf = endpoint_buffer[endpoint];
            uint32_t sz = bdt[idx].byte_count;
        
            // copy the data from the hardware receive buffer to the caller's buffer
            *bytesRead = sz;
            for (uint32_t n = 0 ; n < sz ; n++)
                buffer[n] = ep_buf[n];
            
            // Figure the DATA0/DATA1 bit for the next packet received on this
            // endpoint.  The bit normally toggles on each packet, but it's
            // special for SETUP packets on endpoint 0.  The next OUT packet
            // after a SETUP packet with no data stage is always DATA0, even
            // if the SETUP packet was also DATA0.
            if (setup && (sz >= 7 && buffer[6] == 0)) {
                // SETUP with no data stage -> next packet is always DATA0
                Data1 &= ~1UL;
            }
            else {
                // otherwise just toggle the last bit (assuming it matches our
                // internal state - if not, we must be out of sync, so presumably
                // *not* toggling our state will get us back in sync)
                if (((Data1 >> endpoint) & 1) == ((bdt[idx].info >> 6) & 1))
                    Data1 ^= (1 << endpoint);
            }
        
            // set up the BDT entry to receive the next packet, and hand it to the SIE
            bdt[idx].byte_count = epMaxPacket[endpoint];
            bdt[idx].info = BD_DTS_MASK | BD_OWN_MASK | (((Data1 >> endpoint) & 1) << 6);

            // clear the SUSPEND TOKEN BUSY flag to allow token processing to continue
            USB0->CTL &= ~USB_CTL_TXSUSPENDTOKENBUSY_MASK;
        
            // clear the 'completed' flag - we're now awaiting the next packet
            epComplete &= ~EP(endpoint);
            
            // the read is now complete
            result = EP_COMPLETED;
        }
    }
    EXIT_CRITICAL_SECTION
        
    return result;
}

EP_STATUS USBHAL::endpointWrite(uint8_t endpoint, const volatile uint8_t *data, uint32_t size) 
{
    // validate the endpoint number and direction
    if (endpoint >= NUMBER_OF_PHYSICAL_ENDPOINTS || !IN_EP(endpoint))
        return EP_INVALID;

    // get the BDT index
    int idx = EP_BDT_IDX(PHY_TO_LOG(endpoint), TX, 0);
    
    EP_STATUS result = EP_INVALID;
    ENTER_CRITICAL_SECTION
    {
        // proceed only if the endpoint has been realised and we own the BDT
        if ((epRealised & EP(endpoint)) && !(bdt[idx].info & BD_OWN_MASK))
        {
            // get the endpoint buffer
            uint8_t *ep_buf = endpoint_buffer[endpoint];
        
            // copy the data to the hardware buffer
            bdt[idx].byte_count = size;
            for (uint32_t n = 0 ; n < size ; n++)
                ep_buf[n] = data[n];
            
            // toggle DATA0/DATA1 before sending
            Data1 ^= (1 << endpoint);
    
            // hand the BDT to the SIE to do the send
            bdt[idx].info = BD_OWN_MASK | BD_DTS_MASK | (((Data1 >> endpoint) & 1) << 6);

            // write is now pending in the hardware
            result = EP_PENDING;
        }
    }
    EXIT_CRITICAL_SECTION
    
    return result;
}

EP_STATUS USBHAL::endpointWriteResult(uint8_t endpoint) 
{
    // assume write is still pending
    EP_STATUS result = EP_PENDING;
    
    ENTER_CRITICAL_SECTION
    {
        // If the endpoint isn't realised, the result is 'invalid'.  Otherwise,
        // check the 'completed' flag: if set, the write is completed.
        if (!(epRealised & EP(endpoint)))
        {
            // endpoint isn't realised - can't read it
            result = EP_INVALID;
        }
        else if (epComplete & EP(endpoint)) 
        {
            // the result is COMPLETED
            result = EP_COMPLETED;

            // clear the 'completed' flag - this is consumed by fetching the result
            epComplete &= ~EP(endpoint);
        }
    }
    EXIT_CRITICAL_SECTION
    
    // return the result
    return result;
}

void USBHAL::stallEndpoint(uint8_t endpoint) 
{
    ENTER_CRITICAL_SECTION
    {
        if (epRealised & EP(endpoint))
            USB0->ENDPOINT[PHY_TO_LOG(endpoint)].ENDPT |= USB_ENDPT_EPSTALL_MASK;
    }
    EXIT_CRITICAL_SECTION
}

void USBHAL::unstallEndpoint(uint8_t endpoint) 
{
    ENTER_CRITICAL_SECTION
    {
        if (epRealised & EP(endpoint))
        {
            // clear the stall bit in the endpoint register
            USB0->ENDPOINT[PHY_TO_LOG(endpoint)].ENDPT &= ~USB_ENDPT_EPSTALL_MASK;
            
            // take ownership of the BDT entry
            int idx = PEP_BDT_IDX(endpoint, 0);
            bdt[idx].info &= ~(BD_OWN_MASK | BD_STALL_MASK | BD_DATA01_MASK);
            
            // if this is an RX endpoint, start a new read
            if (OUT_EP(endpoint))
            {
                bdt[idx].byte_count = epMaxPacket[endpoint];
                bdt[idx].info = BD_OWN_MASK | BD_DTS_MASK;
            }
    
            // Reset Data1 for the endpoint - we need to set the bit to 1 for 
            // either TX or RX, by the same logic as in realiseEndpoint()
            Data1 |= (1 << endpoint);
            
            // clear the 'completed' bit for the endpoint
            epComplete &= ~(1 << endpoint);
        }
    }
    EXIT_CRITICAL_SECTION
}

void USBHAL_KL25Z_unstall_EP0(bool force)
{
    ENTER_CRITICAL_SECTION
    {
        if (force || (USB0->ENDPOINT[0].ENDPT & USB_ENDPT_EPSTALL_MASK))
        {
            // clear the stall bit in the endpoint register
            USB0->ENDPOINT[0].ENDPT &= ~USB_ENDPT_EPSTALL_MASK;
        
            // take ownership of the RX and TX BDTs
            bdt[EP_BDT_IDX(0, RX, EVEN)].info &= ~(BD_OWN_MASK | BD_STALL_MASK | BD_DATA01_MASK);
            bdt[EP_BDT_IDX(0, TX, EVEN)].info &= ~(BD_OWN_MASK | BD_STALL_MASK | BD_DATA01_MASK);
            bdt[EP_BDT_IDX(0, RX, EVEN)].byte_count = MAX_PACKET_SIZE_EP0;
            bdt[EP_BDT_IDX(0, TX, EVEN)].byte_count = MAX_PACKET_SIZE_EP0;

            // start a new read on EP0OUT
            bdt[EP_BDT_IDX(0, RX, EVEN)].info = BD_OWN_MASK | BD_DTS_MASK;

            // reset the DATA0/1 bit to 1 on EP0IN and EP0OUT, by the same 
            // logic as in realiseEndpoint()            
            Data1 |= 0x03;
        }
    }
    EXIT_CRITICAL_SECTION
}

bool USBHAL::getEndpointStallState(uint8_t endpoint) 
{
    uint8_t stall = (USB0->ENDPOINT[PHY_TO_LOG(endpoint)].ENDPT & USB_ENDPT_EPSTALL_MASK);
    return (stall) ? true : false;
}

void USBHAL::remoteWakeup(void) 
{
    // [TODO]
}

// Internal reset handler.  Called when we get a Bus Reset signal
// from the host, and when we initiate a reset of the SIE hardware
// from the device side.
void USBHAL::internalReset(void)
{
    ENTER_CRITICAL_SECTION
    {
        int i;
        
        // set the default bus address
        USB0->ADDR = 0x00;
        addr = 0;
        set_addr = 0;
        
        // disable all endpoints
        epRealised = 0x00;
        for (i = 0 ; i < 16 ; i++)
            USB0->ENDPOINT[i].ENDPT = 0x00;
    
        // take control of all BDTs away from the SIE
        for (i = 0 ; i < sizeof(bdt)/sizeof(bdt[0]) ; ++i) 
        {
            bdt[i].info = 0;
            bdt[i].byte_count = 0;
        }
            
        // reset DATA0/1 state
        Data1 = 0x55555555;
    
        // reset endpoint completion status
        epComplete = 0;
    
        // reset EVEN/ODD state (and keep it permanently on EVEN -
        // this disables the hardware double-buffering system)
        USB0->CTL |= USB_CTL_ODDRST_MASK;
        
        // reset error status and enable all error interrupts
        USB0->ERRSTAT = 0xFF;
        USB0->ERREN = 0xFF;
        
        // enable our standard complement of interrupts
        USB0->INTEN = BUS_RESET_INTERRUPTS;
        
        // we're not suspended
        suspendStateChanged(0);
        
        // we're not sleeping
        sleepStateChanged(0);
    
        // notify upper layers of the bus reset, to reset the protocol state
        busReset();
        
        // realise the control endpoint (EP0) in both directions
        realiseEndpoint(EP0OUT, MAX_PACKET_SIZE_EP0, 0);
        realiseEndpoint(EP0IN, MAX_PACKET_SIZE_EP0, 0);
    }
    EXIT_CRITICAL_SECTION
}

void USBHAL::_usbisr(void) 
{
    inIRQ = true;
    instance->usbisr();
    inIRQ = false;
}

void USBHAL::usbisr(void) 
{
    // get the interrupt status - this tells us which event(s)
    // triggered this interrupt
    uint8_t istat = USB0->ISTAT;
       
    // reset interrupt
    if (istat & USB_ISTAT_USBRST_MASK) 
    {
        // do the internal reset work
        internalReset();
        
        // resume token processing if it was suspended
        USB0->CTL &= ~USB_CTL_TXSUSPENDTOKENBUSY_MASK;
        
        // clear the interrupt status
        USB0->ISTAT = USB_ISTAT_USBRST_MASK;
        
        // return immediately, ignoring any other status flags
        return;
    }
    
    // token interrupt
    if (istat & USB_ISTAT_TOKDNE_MASK) 
    {
        // get the endpoint information from the status register
        uint32_t stat = USB0->STAT;
        uint32_t num  = (stat >> 4) & 0x0F;
        uint32_t dir  = (stat >> 3) & 0x01;
        int endpoint = (num << 1) | dir;
        uint32_t ev_odd = (stat >> 2) & 0x01;
        
        // check which endpoint we're working with
        if (num == 0)
        {
            // Endpoint 0 requires special handling
            uint32_t idx = EP_BDT_IDX(num, dir, ev_odd);
            int pid = TOK_PID(idx);
            if (pid == SETUP_TOKEN)
            {
                // SETUP packet - next IN (TX) packet must be DATA1 (confusingly,
                // this means we must clear the Data1 bit, since we flip the bit
                // before each send)
                Data1 &= ~0x02;
                
                // Forcibly take ownership of the EP0IN BDT in case we have
                // unfinished previous transmissions.  The protocol state machine
                // assumes that we don't, so it's probably an error if this code
                // actually does anything, but just in case...
                bdt[EP_BDT_IDX(0, TX, EVEN)].info &= ~BD_OWN_MASK;

                // handle the EP0 SETUP event in the generic protocol layer
                EP0setupCallback();
            } 
            else if (pid == OUT_TOKEN)
            {
                // OUT packet on EP0
                EP0out();
            }
            else if (pid == IN_TOKEN)
            {
                // IN packet on EP0
                EP0in();
                
                // Special case: if the 'set address' flag is set, it means that the
                // host just sent us our bus address.  We must put this into effect
                // in the hardware SIE immediately after sending the reply.  We just
                // did that above, so this is the time.
                if (set_addr) {
                    USB0->ADDR = addr & 0x7F;
                    set_addr = 0;
                }
            }
        }
        else
        {
            // For all other endpoints, note the read/write completion in the flags
            epComplete |= EP(endpoint);
            
            // call the endpoint token callback; if that handles the token, it consumes
            // the 'completed' status, so clear that flag again
            if ((instance->*(epCallback[endpoint - 2]))()) {
                epComplete &= ~EP(endpoint);
            }
        }

        // resume token processing if suspended
        USB0->CTL &= ~USB_CTL_TXSUSPENDTOKENBUSY_MASK;

        // clear the TOKDNE interrupt status bit
        USB0->ISTAT = USB_ISTAT_TOKDNE_MASK;
        return;
    }

    // SOF interrupt
    if (istat & USB_ISTAT_SOFTOK_MASK) 
    {
        // Read frame number and signal the SOF event to the callback
        SOF(frameNumber());
        USB0->ISTAT = USB_ISTAT_SOFTOK_MASK;
    }

    // stall interrupt
    if (istat & USB_ISTAT_STALL_MASK)
    {
        // if the control endpoint (EP 0) is stalled, unstall it
        USBHAL_KL25Z_unstall_EP0(false);
        
        // clear the busy-suspend bit to resume token processing
        USB0->CTL &= ~USB_CTL_TXSUSPENDTOKENBUSY_MASK;
        
        // clear the interrupt status bit for STALL
        USB0->ISTAT = USB_ISTAT_STALL_MASK;
    }

    // Sleep interrupt.  This indicates that the USB bus has been
    // idle for at least 3ms (no frames transacted).  This has
    // several possible causes:
    //
    //  - The USB cable was unplugged
    //  - The host was powered off
    //  - The host has stopped communicating due to a software fault
    //  - The host has stopped communicating deliberately (e.g., due
    //    to user action, or due to a protocol error)
    //
    // A "sleep" event on the SIE is not to be confused with the
    // sleep/suspend power state on the PC.  The sleep event here
    // simply means that the SIE isn't seeing token traffic on the
    // required schedule.
    //
    // Note that the sleep event is the closest thing the KL25Z USB 
    // module has to a disconnect event.  There's no way to detect 
    // if we're physically connected to a host, so all we can really
    // know is that we're not transacting tokens.  USB requires token
    // exchange every 1ms, so if there's no token exchange for a few
    // milliseconds, the connection must be broken at some level.
    if (istat & USB_ISTAT_SLEEP_MASK) 
    {
        // tell the upper layers about the change
        sleepStateChanged(1);

        // resume token processing
        USB0->CTL &= ~USB_CTL_TXSUSPENDTOKENBUSY_MASK;

        // reset the interrupt bit
        USB0->ISTAT = USB_ISTAT_SLEEP_MASK;
    }

    // Resume from suspend mode.
    //
    // NB: Don't confuse "suspend" with "sleep".  Suspend mode refers 
    // to a hardware low-power mode initiated by the device.  "Sleep"
    // means only that the USB connection has been idle (no tokens
    // transacted) for more than 3ms.  A sleep signal means that the
    // connection with the host was broken, either physically or 
    // logically; it doesn't of itself have anything to do with suspend
    // mode, and in particular it doesn't mean that the host has
    // commanded us to enter suspend mode or told us that the host
    // is entering a low-power state.  The higher-level device
    // implementation might choose to enter suspend mode on the device
    // in response to a lost connection, but the USB/HAL layers don't
    // take any such action on their own.  Note that suspend mode can
    // only end with explicit intervention by the host, in the form of
    // a USB RESUME signal, so the host has to be aware that we're
    // doing this sort of power management.
    if (istat & USB_ISTAT_RESUME_MASK) 
    {
        // note the change
        suspendStateChanged(0);

        // remove suspend mode flags
        USB0->USBCTRL &= ~USB_USBCTRL_SUSP_MASK;
        USB0->USBTRC0 &= ~USB_USBTRC0_USBRESMEN_MASK;
        USB0->INTEN &= ~USB_INTEN_RESUMEEN_MASK;
        
        // clear the interrupt status
        USB0->ISTAT = USB_ISTAT_RESUME_MASK;
    }

    // error interrupt
    if (istat & USB_ISTAT_ERROR_MASK) 
    {
        // reset all error status bits, and clear the SUSPEND flag to allow
        // token processing to continue
        USB0->ERRSTAT = 0xFF;
        USB0->CTL &= ~USB_CTL_TXSUSPENDTOKENBUSY_MASK;
        USB0->ISTAT = USB_ISTAT_ERROR_MASK;
    }
}

#endif
