/*
** File: userif.c  
** Project: Linux Driver Framework
** Purpose: Implements mechanism by which OS-independent driver code can register
**          an interface accessible by user-mode programs.
**
** (C) Copyright Alpha Data 2010-2012
**
** Notes:
**
** 1. fork() causes a VMA's open() method to be called with a new VMA.
**
** 2. close() on a device handle does not cause any existing mmap'ed regions
**    to be munmapped.
**
** 3. mremap() can make holes in an mmap'ed region. This will not make the
**    system vulnerable to crashing.
**
** 4. mremap() can be used to extend an mmap'ed region beyond the end of a BAR
**    in a device that the VMA references. In order to prevent this resulting
**    in a  crash vulnerability, the nopage() VMA method should always return
**    VM_FAULT_SIGBUS, thus preventing invalid physical memory being accessed.
**
** 5. The fopsRelease() function is not called until the last outstanding
**    operation on that handle is completed AND any references to the
**    (struct file*) are removed.
**
**    Outstanding operations on a handle include user-space calls such as
**    ioctl(), read(), write(), mmap() etc.
**
**    A vm_area_struct that was created by an mmap() call through the
**    (struct file*) counts as a reference to the (struct file*), as does
**    duplicating a file descriptor, e.g. using dup().
**
**    The implication of this is that (unlike Windows and the IRP_MJ_CLEANUP
**    IRP), no special cancellation of blocking client requests is required
**    inside fopsRelease(), because it won't be called until all client
**    requests are finished. On the other hand, if there is a nonblocking
**    client request, it needs to be cancelled inside fopsRelease().
*/

#include "df.h"
#include "dflinux.h"
#include <ioctl_dflinux.h>

#define DEBUGLEVEL_ERROR  (0) /* Level of debug messages relating to driver logic errors */
#define DEBUGLEVEL_CANCEL (1) /* Level of debug messages relating to cancellation / cleanup */
#define DEBUGLEVEL_OPEN   (2) /* Level of debug messages relating to open / close operations */
#define DEBUGLEVEL_IO     (5) /* Level of debug messages relating to other I/O operations */

typedef enum _NonBlockState {
  NonBlockStateIdle = 0,     /* No non-blocking operation in progress */
  NonBlockStateActive = 1    /* A non-blocking operation is in progress */
} NonBlockState;

static uint MaxNumMinor = 16;
module_param(MaxNumMinor, uint, S_IRUGO);
MODULE_PARM_DESC(
  MaxNumMinor,
  "Number of device nodes in the filesystem that the driver attempts to reserve. Default is 16.");

/*
** ---------------------------------------------------------------------------
** Functions for managing client request lists
** ---------------------------------------------------------------------------
*/

static inline void
requestInit(
  DfClientRequest* pReq,
  DfClientObject* pClObj)
{
  pReq->system.pClObj = pClObj;
  pReq->system.pCancelCallback = NULL;
  pReq->system.bCancel = FALSE; /* Cancellation not pending */
  pReq->system.bCompleted = FALSE; /* Not completed */
  pReq->system.pContext = NULL;
  pReq->system.pWaitQueue = NULL;
}

/* Attempts to cancel a request.
** MUST BE CALLED WHILE HOLDING CLIENT OBJECT LOCK.
** If request is already completed, returns FALSE and sets ppCancelCallback to NULL.
** If request is not yet completed:
**   - If nobody else is trying to cancel the request AND the request has a cancel
**     callback, sets ppCancelCallback to the cancel callback; otherwise, sets
**     ppCancelCallback to NULL.
**   - Returns TRUE */
static boolean_t
cancelRequestUnsafe(
  DfClientObject* pClObj,
  DfClientRequest* pReq,
  DfCancelCallback** ppCancelCallback)
{
  DfCancelCallback* pCancelCallback = NULL;

  if (pReq->system.bCompleted) {
    /* Request has already completed, no need to wait for this request */
    dfDebugPrint(DEBUGLEVEL_CANCEL, ("cancelRequestUnsafe: Request %p already completed,\n", pReq));
    *ppCancelCallback = NULL;
    return FALSE;
  } else {
    if (pReq->system.bCancel) {
      /* Somebody else is already cancelling this request, so no need to cancel it */
      dfDebugPrint(DEBUGLEVEL_CANCEL, ("cancelRequestUnsafe: Request %p already pending cancellation\n", pReq));
    } else {
      /* Nobody else is already cancelling this request, so we can cancel it */
      dfDebugPrint(DEBUGLEVEL_CANCEL, ("cancelRequestUnsafe: Cancelling request %p\n", pReq));
      pReq->system.bCancel = TRUE; /* Mark the request as having cancellation pending */
      pCancelCallback = pReq->system.pCancelCallback;
      if (NULL != pCancelCallback) {
        pReq->system.pCancelCallback = NULL;
        dfListRemove(&pReq->system.node);
      }
    }
    *ppCancelCallback = pCancelCallback;
    return TRUE; /* Must wait uninterruptibly for this request to complete */
  }
}

static DfIoStatus
waitRequest(
  DfClientObject* pClObj,
  DfClientRequest* pReq)
{
  DfSpinLockFlags f;
  DfIoStatus status;
  DfCancelCallback* pCancelCallback;
  wait_queue_head_t waitQueue;
  wait_queue_t wait;
  boolean_t bMustWait;

  spin_lock_irqsave(&pClObj->system.lock, f);
  if (pReq->system.bCompleted) {
    spin_unlock_irqrestore(&pClObj->system.lock, f);
    status = pReq->system.status;
  } else {
    pReq->system.pWaitQueue = &waitQueue;
    init_waitqueue_head(&waitQueue);
    init_wait(&wait);
    prepare_to_wait(&waitQueue, &wait, TASK_INTERRUPTIBLE);
    spin_unlock_irqrestore(&pClObj->system.lock, f);
    schedule();
    if (signal_pending(current)) {
      dfDebugPrint(DEBUGLEVEL_CANCEL, ("waitRequest: attempting to cancel, pRequest=%p\n", pReq));
      spin_lock_irqsave(&pClObj->system.lock, f);
      bMustWait = cancelRequestUnsafe(pClObj, pReq, &pCancelCallback);
      if (bMustWait) {
        set_current_state(TASK_UNINTERRUPTIBLE);
        spin_unlock_irqrestore(&pClObj->system.lock, f);
        if (NULL != pCancelCallback) {
          dfDebugPrint(DEBUGLEVEL_CANCEL, ("waitRequest: invoking cancel callback, pRequest=%p\n", pReq));
          pCancelCallback(pReq, DfIoStatusCancelled);
        }
        schedule();
      } else {
        spin_unlock_irqrestore(&pClObj->system.lock, f);
      }
      status = -ECANCELED;
    } else {
      status = pReq->system.status;
    }
    remove_wait_queue(&waitQueue, &wait);
  }

  return status;
}

static DfIoStatus
deliverRequestBlocking(
  DfDeviceObject* pDevObj,
  DfClientObject* pClObj,
  DfOnIoctlMethod* pHandler,
  unsigned int reqCode,
  void* pIoctlStruct,
  unsigned int sizeIn,
  unsigned int sizeOut)
{
  struct {
    uint8_t data[DFLINUX_MAX_IOCTL_SIZE];
  } ioctlStruct;
  DfClientRequest* pRequest;
  DfIoStatus status;

  pRequest = dfPoolAlloc(&pDevObj->system.request.pool, DfClientRequest);
  if (NULL == pRequest) {
    return -ENOMEM;
  }
  requestInit(pRequest, pClObj);

  if (copy_from_user(&ioctlStruct, pIoctlStruct, sizeIn)) {
    dfDebugPrint(DEBUGLEVEL_IO, ("deliverRequestBlocking: fault copying ioctl struct from user space, pRequest=%p pIoctl=%p sizeIn=%u\n", pRequest, pIoctlStruct, sizeIn));
    status = -EFAULT;
    goto out;
  }

  status = pHandler(pDevObj, pClObj, pRequest, reqCode, &ioctlStruct, sizeIn, sizeOut);
  if (status == DfIoStatusDeferred) {
    status = waitRequest(pClObj, pRequest);
  }

  if (DfIoStatusSuccess == status && 0 != sizeOut) {
    if (copy_to_user(pIoctlStruct, &ioctlStruct, sizeOut)) {
      dfDebugPrint(DEBUGLEVEL_IO, ("deliverRequestBlocking: fault copying ioctl struct to user space, pRequest=%p pIoctl=%p sizeOut=%u\n", pRequest, pIoctlStruct, sizeOut));
      status = -EFAULT;
    }
  }

out:
  dfPoolFree(&pDevObj->system.request.pool, pRequest);
  return status;
}

static DfIoStatus
deliverRequestNonBlocking(
  DfDeviceObject* pDevObj,
  DfClientObject* pClObj,
  DfOnIoctlMethod* pHandler,
  unsigned int reqCode,
  void* pIoctlStruct,
  unsigned int sizeIn,
  unsigned int sizeOut)
{
  DfIoStatus status;
  DfLinuxClientRequestNonBlock* pReq;
  DfSpinLockFlags f;

  pReq = dfPoolAlloc(&pDevObj->system.request.poolNonBlock, DfLinuxClientRequestNonBlock);
  if (NULL == pReq) {
    return -ENOMEM;
  }
  requestInit(&pReq->clientRequest, pClObj);

  if (copy_from_user(&pReq->ioctlStruct, pIoctlStruct, sizeIn)) {
    dfDebugPrint(DEBUGLEVEL_IO, ("deliverRequestNonBlocking: fault copying ioctl struct from user space, pIoctl=%p sizeIn=%u\n", pIoctlStruct, sizeIn));
    dfPoolFree(&pDevObj->system.request.poolNonBlock, pReq);
    return -EFAULT;
  }

  spin_lock_irqsave(&pClObj->system.lock, f);
  if (NonBlockStateIdle != pClObj->system.nonblock.state) {
    spin_unlock_irqrestore(&pClObj->system.lock, f);
    dfPoolFree(&pDevObj->system.request.poolNonBlock, pReq);
    return -EBUSY;
  } else {
    pClObj->system.nonblock.state = NonBlockStateActive;
    pClObj->system.nonblock.pHead = pReq;
    pReq->clientRequest.system.pWaitQueue = &pClObj->system.nonblock.pollWaitQueue;
    spin_unlock_irqrestore(&pClObj->system.lock, f);
    status = pHandler(pDevObj, pClObj, &pReq->clientRequest, reqCode, &pReq->ioctlStruct, sizeIn, sizeOut);
    if (DfIoStatusIsError(status)) {
      spin_lock_irqsave(&pClObj->system.lock, f);
      pClObj->system.nonblock.state = NonBlockStateIdle;
      pClObj->system.nonblock.pHead = NULL;
      spin_unlock_irqrestore(&pClObj->system.lock, f);
      dfPoolFree(&pDevObj->system.request.poolNonBlock, pReq);
    }
  }
  return status;
}

boolean_t
dfRequestClearCallback(
  DfClientRequest* pClientRequest)
{
  DfClientObject* pClObj;
  DfSpinLockFlags f;

  dfAssert(!pClientRequest->system.bCompleted);

  pClObj = pClientRequest->system.pClObj;

  spin_lock_irqsave(&pClObj->system.lock, f);
  if (pClientRequest->system.bCancel) {
    /* Too late to clear the cancel callback - cancel routine is executing or has already executed */
    spin_unlock_irqrestore(&pClObj->system.lock, f);
    return FALSE;
  } else {
    /* OK to clear the cancel callback */
    dfListRemove(&pClientRequest->system.node);
    pClientRequest->system.pCancelCallback = NULL;
    spin_unlock_irqrestore(&pClObj->system.lock, f);
    return TRUE;
  }
}

void
dfRequestComplete(
  DfClientRequest* pClientRequest,
  DfIoStatus status)
{
  DfClientObject* pClObj;
  DfSpinLockFlags f;
  wait_queue_head_t* pWaitQueue;

  dfAssert(!pClientRequest->system.bCompleted);
  dfAssert(pClientRequest->system.pCancelCallback == NULL);

  pClObj = pClientRequest->system.pClObj;

  spin_lock_irqsave(&pClObj->system.lock, f);
  pClientRequest->system.status = status;
  pClientRequest->system.bCompleted = TRUE;
  pWaitQueue = pClientRequest->system.pWaitQueue;
  if (NULL != pWaitQueue) {
    wake_up(pWaitQueue);
  }
  spin_unlock_irqrestore(&pClObj->system.lock, f);
}

boolean_t
dfRequestSetCallback(
  DfClientRequest* pClientRequest,
  DfCancelCallback* pCallback)
{
  DfClientObject* pClObj;
  DfSpinLockFlags f;

  dfAssert(!pClientRequest->system.bCompleted);
  dfAssert(pClientRequest->system.pCancelCallback == NULL);

  pClObj = pClientRequest->system.pClObj;

  spin_lock_irqsave(&pClObj->system.lock, f);
  if (pClientRequest->system.bCancel) {
    /* Too late to set a cancel callback - cancellation is already pending */
    spin_unlock_irqrestore(&pClObj->system.lock, f);
    return FALSE;
  } else {
    /* OK to set a cancel callback */
    pClientRequest->system.pCancelCallback = pCallback;
    dfListAddToTail(&pClObj->system.requests.list, &pClientRequest->system.node);
    spin_unlock_irqrestore(&pClObj->system.lock, f);
    return TRUE;
  }
}

void
dfLinuxUserIfInit(
  void)
{
  /* Nothing to do */
}

/*
** ---------------------------------------------------------------------------
** Linux driver framework special IOCTL handlers
** ---------------------------------------------------------------------------
*/

static int
ioctlGetMmapAddress(
  DfDeviceObject* pDevObj,
  DfClientObject* pClObj,
  void* pIoctl)
{
  DfLinuxIoctlGetMmapAddress ioctl;
  DfMmappableRegion* pRegion;
  unsigned int regionTag;

  if (copy_from_user(&ioctl.in, pIoctl, sizeof(ioctl.in))) {
    dfDebugPrint(DEBUGLEVEL_IO, ("ioctlGetMmapAddress: fault copying ioctl struct from user space, pIoctl=%p size=%u\n",
      pIoctl, (unsigned int)sizeof(ioctl.in)));
    return -EFAULT;
  }

  regionTag = ioctl.in.regionTag;

  dfDebugPrint(DEBUGLEVEL_IO, ("ioctlGetMmapAddress: regionTag=%u(0x%x)\n", regionTag, regionTag));

  /* No existing mapping found, return info needed for mmap() from user-mode */
  pRegion = pDevObj->system.pMmappableRegions;
  while (NULL != pRegion) {
    if (pRegion->system.regionTag == ioctl.in.regionTag) {
      break;
    }
    pRegion = pRegion->system.pNext;
  }
  if (NULL == pRegion) {
    return DfLinuxMmapInvalidTag;
  }

  ioctl.out.address = (uint64_t)pRegion->system.pfnStart * PAGE_SIZE + pRegion->system.pageOffset;
  ioctl.out.length = pRegion->system.kernelSize;
  dfDebugPrint(DEBUGLEVEL_IO, ("ioctlGetMmapAddress: returning mmap info, address=0x%08lx_%08lx size=0x%08lx_%08lx\n",
    dfSplitUint64(ioctl.out.address), dfSplitUint64(ioctl.out.length)));

  if (copy_to_user(pIoctl, &ioctl.out, sizeof(ioctl.out))) {
    dfDebugPrint(DEBUGLEVEL_IO, ("ioctlGetMmapAddress: fault copying ioctl struct to user space, pIoctl=%p size=%u\n",
      pIoctl, (unsigned int)sizeof(ioctl.out)));
    return -EFAULT;
  }

  return DfLinuxMmapSuccess;
}

static int
ioctlCancelRequests(
  DfDeviceObject* pDevObj,
  DfClientObject* pClObj)
{
  DfList complete;
  DfListNode* pHead;
  DfListNode* pNode;
  DfClientRequest* pCurrent;
  DfCancelCallback* pCancelCallback;
  DfSpinLockFlags f;

  dfDebugPrint(DEBUGLEVEL_CANCEL, ("ioctlCancelRequests: entered\n"));

  /*
  ** Confiscate all current client requests but keep the list intact. Clears the
  ** list head pointer in the client object.
  */
  f = dfSpinLockGet(&pClObj->system.lock);
  dfListConfiscate(&complete, &pClObj->system.requests.list);
  pNode = dfListGetHead(&complete);
  while (NULL != pNode) {
    pCurrent = DF_CONTAINER_OF(pNode, DfClientRequest, system.node);
    pCurrent->system.bCancel = TRUE; /* Mark the request as having cancellation pending */
    pNode = dfListGetNext(&complete, pNode);
  }
  dfSpinLockPut(&pClObj->system.lock, f);

  /* Now iterate through the confiscated list and cancel the client requests */
  while (1) {
    f = dfSpinLockGet(&pClObj->system.lock);
    pHead = dfListGetHead(&complete);
    if (pHead == NULL) {
      dfSpinLockPut(&pClObj->system.lock, f);
      break;
    }
    dfListRemove(pHead);
    pCurrent = DF_CONTAINER_OF(pHead, DfClientRequest, system.node);
    pCancelCallback = pCurrent->system.pCancelCallback;
    pCurrent->system.pCancelCallback = NULL;
    dfSpinLockPut(&pClObj->system.lock, f);      

    dfDebugPrint(DEBUGLEVEL_CANCEL, ("ioctlCancelRequests: cancelling request %p\n", pCurrent));
    dfAssert(NULL != pCancelCallback);
    pCancelCallback(pCurrent, DfIoStatusCancelled);
  }

  return DfLinuxCancelSuccess;
}

static int
ioctlFinishNonBlock(
  DfDeviceObject* pDevObj,
  DfClientObject* pClObj,
  void* pIoctlOut,
  unsigned int sizeOut,
  boolean_t bBlock)
{
  DfIoStatus status;
  DfLinuxClientRequestNonBlock* pReq;
  DfSpinLockFlags f;
  wait_queue_t wait;
  
  f = dfSpinLockGet(&pClObj->system.lock);
  switch (pClObj->system.nonblock.state) {
  case NonBlockStateActive:
    pReq = pClObj->system.nonblock.pHead;
    dfAssert(NULL != pReq);
    if (pReq->clientRequest.system.bCompleted) {
      pClObj->system.nonblock.state = NonBlockStateIdle;
      pClObj->system.nonblock.pHead = NULL;
      dfSpinLockPut(&pClObj->system.lock, f);
    } else {
      if (bBlock) {
        init_wait(&wait);
        prepare_to_wait(&pClObj->system.nonblock.pollWaitQueue, &wait, TASK_INTERRUPTIBLE);
        dfSpinLockPut(&pClObj->system.lock, f);
        schedule();
        remove_wait_queue(&pClObj->system.nonblock.pollWaitQueue, &wait);
        if (signal_pending(current)) {
          return -ECANCELED;
        }
        /* The only way to get here is by being woken up, which means the non-blocking operation was completed */
        f = dfSpinLockGet(&pClObj->system.lock);
        pClObj->system.nonblock.state = NonBlockStateIdle;
        pClObj->system.nonblock.pHead = NULL;
        dfSpinLockPut(&pClObj->system.lock, f);
      } else {
        dfSpinLockPut(&pClObj->system.lock, f);
        return -EINPROGRESS;
      }
    }
    break;

  case NonBlockStateIdle:
  default:
    dfSpinLockPut(&pClObj->system.lock, f);
    return -EPIPE;
  }

  /* If we get here, the non-blocking operation was completed, so we can copy the results to user space and free the request object */
  status = pReq->clientRequest.system.status;
  if (status == DfIoStatusSuccess && 0 != sizeOut) {
    if (copy_to_user(pIoctlOut, &pReq->ioctlStruct, sizeOut)) {
      dfDebugPrint(DEBUGLEVEL_IO, ("ioctlFinishNonBlocking: fault copying ioctl struct to user space, pIoctlOut=%p sizeOut=%u\n",
                       pIoctlOut, sizeOut));
      status = -EFAULT;
    }
  }
  dfPoolFree(&pDevObj->system.request.poolNonBlock, pReq);
  return status;
}

static int
specialIoctl(
  DfDeviceObject* pDevObj,
  DfClientObject* pClObj,
  unsigned int code,
  void* pIoctl,
  unsigned int sizeOut)
{
  dfDebugPrint(DEBUGLEVEL_IO, ("specialIoctl: entered, code=%u(0x%x) sizeOut=%u pIoctl=%p\n", code, code, sizeOut, pIoctl));

  switch (code) {
  case DFLINUX_IOCTLCODE_GETMMAPADDRESS:
    return ioctlGetMmapAddress(pDevObj, pClObj, pIoctl);
    break;

  case DFLINUX_IOCTLCODE_CANCEL:
    return ioctlCancelRequests(pDevObj, pClObj);
    break;

  case DFLINUX_IOCTLCODE_FINISH:
    return ioctlFinishNonBlock(pDevObj, pClObj, pIoctl, sizeOut, FALSE);
    break;

  case DFLINUX_IOCTLCODE_FINISHWAIT:
    return ioctlFinishNonBlock(pDevObj, pClObj, pIoctl, sizeOut, TRUE);
    break;

  default:
    return -EINVAL;
  }
}

/*
** ---------------------------------------------------------------------------
** file_operations methods
**
** These functions are called for every DfInterface object, regardless of
** class. In general, after identifying the DfInterface object associated with
** the 'struct file' object, they call the methods associated with the
** DfInterface object in order to achieve the desired class-specific behavior.
** ---------------------------------------------------------------------------
*/

static int
fopsOpen(
  struct inode* pInode,
  struct file* pFile)
{
  DfIoStatus status = DfIoStatusSuccess;
  struct cdev* pCdev;
  DfInterface* pInterface;
  DfDeviceObject* pDevObj;
  DfClientObject* pClObj = NULL;
  DfClientRequest* pReq = NULL;
  DfOnOpenMethod* pOnOpen;
  boolean_t bPassive = TRUE;

  dfDebugPrint(DEBUGLEVEL_OPEN, ("fopsOpen: entered, pInode=%p pFile=%p i_mode=0x%lx f_mode=0x%lx\n",
    pInode, pFile, (unsigned long)pInode->i_mode, (unsigned long)pFile->f_mode));

  pCdev = pInode->i_cdev;
  pInterface = DF_CONTAINER_OF(pCdev, DfInterface, system.cdev);
  pDevObj = pInterface->system.pDevObj;

  dfDebugPrint(DEBUGLEVEL_OPEN, ("fopsOpen: mode=0x%x perm=0x%x\n", pFile->f_mode, pInode->i_mode));

  if ((pFile->f_mode & FMODE_WRITE) == FMODE_WRITE && (pFile->f_mode & FMODE_READ) == FMODE_READ) {
    /* Read-write mode - IOCTLs that attempt to alter state of device are OK */
    bPassive = FALSE;
  } else if ((pFile->f_mode & FMODE_READ) == FMODE_READ) {
    /* "Read-only" mode - IOCTLs that attempt to alter state of device will fail */
    bPassive = TRUE;
  } else {
    /* Reject */
    status = -EACCES;
    goto done;
  }

  pClObj = (DfClientObject*)dfMalloc(sizeof(*pClObj));
  if (pClObj == NULL) {
    status = -ENOMEM;
    goto done;
  }
  dfSpinLockInit(&pClObj->system.lock);
  pClObj->system.pFile = pFile;
  pClObj->system.pDevObj = pDevObj;
  pClObj->system.pInterface = pInterface;
  pClObj->system.bPassive = bPassive;
  pClObj->system.requests.count = 0;
  dfListInit(&pClObj->system.requests.list);
  pClObj->system.nonblock.state = NonBlockStateIdle;
  pClObj->system.nonblock.pHead = NULL;
  init_waitqueue_head(&pClObj->system.nonblock.pollWaitQueue);
  pClObj->system.pContext = NULL;
  pFile->private_data = (void*)pClObj;

  pOnOpen = pInterface->system.methods.pOnOpen;
  if (NULL != pOnOpen) {
    pReq = dfPoolAlloc(&pDevObj->system.request.pool, DfClientRequest);
    if (NULL == pReq) {
      status = -ENOMEM;
      goto done;
    }
    requestInit(pReq, pClObj);
    status = pOnOpen(pDevObj, pClObj, pReq, pInterface, bPassive ? FALSE : TRUE);
    if (DfIoStatusDeferred == status) {
      status = waitRequest(pClObj, pReq);
    }
    if (DfIoStatusIsError(status)) {
      goto done;
    }
  }

  nonseekable_open(pInode, pFile);

done:
  if (NULL != pReq) {
    dfPoolFree(&pDevObj->system.request.pool, pReq);
  }
  if (DfIoStatusIsError(status)) {
    if (NULL != pClObj) {
      dfFree(pClObj);
    }
  }
  return status;
}

static int
fopsRelease(
  struct inode* pInode,
  struct file* pFile)
{
  DfDeviceObject* pDevObj;
  DfClientObject* pClObj;
  DfInterface* pInterface;
  DfOnCleanupMethod* pOnCleanup;
  DfOnCloseMethod* pOnClose;
  DfSpinLockFlags f;
  DfLinuxClientRequestNonBlock* pReq = NULL;
  DfCancelCallback* pCancelCallback = NULL;
  boolean_t bMustWait;
  wait_queue_head_t waitQueue;
  wait_queue_t wait;
  int err = 0;
  
  dfDebugPrint(DEBUGLEVEL_OPEN, ("fopsRelease: Entered, pInode=%p pFile=%p\n", pInode, pFile));

  pClObj = (DfClientObject*)pFile->private_data;
  dfAssert(NULL != pClObj);

  pInterface = pClObj->system.pInterface;
  pDevObj = pClObj->system.pDevObj;

  /* If there is a non-blocking request still in progress, stop it */
  f = dfSpinLockGet(&pClObj->system.lock);
  switch (pClObj->system.nonblock.state) {
  case NonBlockStateActive:
    pReq = pClObj->system.nonblock.pHead;
    dfAssert(NULL != pReq);
    init_waitqueue_head(&waitQueue);
    init_wait(&wait);
    add_wait_queue(&waitQueue, &wait);
    pReq->clientRequest.system.pWaitQueue = &waitQueue;
    dfDebugPrint(DEBUGLEVEL_CANCEL, ("fopsRelease: attempting to cancel, pRequest=%p\n", pReq));
    bMustWait = cancelRequestUnsafe(pClObj, &pReq->clientRequest, &pCancelCallback);
    if (bMustWait) {
      set_current_state(TASK_UNINTERRUPTIBLE);
      dfSpinLockPut(&pClObj->system.lock, f);
      if (NULL != pCancelCallback) {
        dfDebugPrint(DEBUGLEVEL_CANCEL, ("fopsRelease: Invoking cancel callback, pRequest=%p\n", pReq));
        pCancelCallback(&pReq->clientRequest, DfIoStatusCancelled);
      }
      schedule();
    } else {
      dfSpinLockPut(&pClObj->system.lock, f);
      dfDebugPrint(DEBUGLEVEL_CANCEL, ("fopsRelease: not waiting, pRequest=%p\n", pReq));
    }
    remove_wait_queue(&waitQueue, &wait);
    dfPoolFree(&pDevObj->system.request.poolNonBlock, pReq);
    pClObj->system.nonblock.pHead = NULL;
    pClObj->system.nonblock.state = NonBlockStateIdle;
    break;

  case NonBlockStateIdle:
    dfSpinLockPut(&pClObj->system.lock, f);
    /* Nothing to do */
    break;

  default:
    /* Should never get here */
    dfSpinLockPut(&pClObj->system.lock, f);
    dfAssert(FALSE);
    break;
  }
  
  pOnCleanup = pInterface->system.methods.pOnCleanup;
  if (NULL != pOnCleanup) {
    pOnCleanup(pDevObj, pClObj);
  }

  pOnClose = pInterface->system.methods.pOnClose;
  if (NULL != pOnClose) {
    pOnClose(pDevObj, pClObj);
  }

  /* There should be no non-blocking operations now */
  dfAssert(pClObj->system.nonblock.pHead == NULL);
  dfAssert(pClObj->system.nonblock.state == NonBlockStateIdle);

  dfFree(pClObj);

  return err;
}

#if _IOC_SIZEBITS < 12
# error _IOC_SIZEBITS is less than 12
#endif

static int
fopsIoctl(
  struct inode* pInode,
  struct file* pFile,
  unsigned int code,
  unsigned long arg)
{
  DfClientObject* pClObj;
  DfDeviceObject* pDevObj;
  DfInterface* pInterface;
  DfOnIoctlMethod* pHandler;
  DfIoStatus status = DfIoStatusInvalidCode;
  unsigned int iocSize, sizeIn, sizeOut, reqCode;
  boolean_t bNonBlock;

  dfDebugPrint(DEBUGLEVEL_IO, ("fopsIoctl: entered, pInode=%p pFile=%p code=%u(%x) arg=%lu(0x%lx)\n",
		   pInode, pFile, code, code, arg, arg));

  pClObj = (DfClientObject*)pFile->private_data;
  dfAssert(NULL != pClObj);
  pDevObj = pClObj->system.pDevObj;
  pInterface = pClObj->system.pInterface;

  /* The IOCTL structure in and out sizes are encoded in the IOC size field of 'code' */
  iocSize = _IOC_SIZE(code);
  sizeIn = iocSize & 0x3fU;
  sizeOut = (iocSize >> 6) & 0x3fU;
  reqCode = _IOC_NR(code);
  if (reqCode & DFLINUX_IOCTL_BASE) {
    return specialIoctl(pDevObj, pClObj, reqCode, (void*)(uintptr_t)arg, sizeOut);
  }
  bNonBlock = (reqCode & 0x40U) ? TRUE : FALSE;
  reqCode &= 0x3fU;
  dfAssert(!bNonBlock || 0 == sizeOut);

  dfDebugPrint(DEBUGLEVEL_IO, ("fopsIoctl: sizeIn=%u sizeOut=%u reqCode=%u bNonBlock=%s\n",
    sizeIn, sizeOut, reqCode, bNonBlock ? "TRUE" : "FALSE"));

  pHandler = pInterface->system.methods.pOnIoctl;
  if (NULL != pHandler) {
    if (bNonBlock) {
      status = deliverRequestNonBlocking(pDevObj, pClObj, pHandler, reqCode, (void*)(uintptr_t)arg, sizeIn, sizeOut);
    } else {
      status = deliverRequestBlocking(pDevObj, pClObj, pHandler, reqCode, (void*)(uintptr_t)arg, sizeIn, sizeOut);
    }
  }

  return status;
}

static long
fopsUnlockedIoctl(
  struct file* pFile,
  unsigned int code,
  unsigned long arg)
{
  dfDebugPrint(DEBUGLEVEL_IO, ("fopsUnlockedIoctl: entered, pFile=%p code=%u(%x) arg=%lu(0x%lx)\n", pFile, code, code, arg, arg));

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
  return fopsIoctl(file_inode(pFile), pFile, code, arg);
#else
  return fopsIoctl(pFile->f_dentry->d_inode, pFile, code, arg);
#endif
}

#if DF_NEED_THUNK
static long
fopsCompatIoctl(
  struct file* pFile,
  unsigned int code,
  unsigned long arg)
{
  DfClientObject* pClObj;
  DfDeviceObject* pDevObj;
  DfInterface* pInterface;
  void* pArg;
  DfOnIoctlMethod* pHandler;
  DfIoStatus status = DfIoStatusInvalidCode;
  unsigned int iocSize, sizeIn, sizeOut, reqCode;
  boolean_t bNonBlock;

  dfDebugPrint(DEBUGLEVEL_IO, ("fopsCompatIoctl: entered, pFile=%p code=%u(%x) arg=%lu(0x%lx)\n",
		   pFile, code, code, arg, arg));

  pClObj = (DfClientObject*)pFile->private_data;
  dfAssert(NULL != pClObj);
  pDevObj = pClObj->system.pDevObj;
  pInterface = pClObj->system.pInterface;
  dfAssert(NULL != pClObj);

  pArg = compat_ptr(arg);

  /* The IOCTL structure in and out sizes are encoded in the IOC size field of 'code' */
  iocSize = _IOC_SIZE(code);
  sizeIn = iocSize & 0x3fU;
  sizeOut = (iocSize >> 6) & 0x3fU;
  reqCode = _IOC_NR(code);
  if (reqCode & DFLINUX_IOCTL_BASE) {
    return specialIoctl(pDevObj, pClObj, reqCode, pArg, sizeOut);
  }
  bNonBlock = (reqCode & 0x40U) ? TRUE : FALSE;
  reqCode &= 0x3fU;
  dfAssert(!bNonBlock || 0 == sizeOut);

  dfDebugPrint(DEBUGLEVEL_IO, ("fopsCompatIoctl: sizeIn=%u sizeOut=%u reqCode=%u bNonBlock=%s\n",
    sizeIn, sizeOut, reqCode, bNonBlock ? "TRUE" : "FALSE"));

  pHandler = pInterface->system.methods.pOnIoctlThunk;
  if (NULL != pHandler) {
    if (bNonBlock) {
      status = deliverRequestNonBlocking(pDevObj, pClObj, pHandler, reqCode, pArg, sizeIn, sizeOut);
    } else {
      status = deliverRequestBlocking(pDevObj, pClObj, pHandler, reqCode, pArg, sizeIn, sizeOut);
    }
  }

  return status;
}
#endif

static void
vmaClose(
  struct vm_area_struct* pVma)
{
  dfDebugPrint(DEBUGLEVEL_IO, ("vmaClose: entered, pVma=%p pVma->vm_private_data=%p\n", pVma, pVma->vm_private_data));

  dfAssert(pVma->vm_file != NULL);
}

/* We define this function to prevent mremap from being able to extend an existing mapping */
#if NO_PAGE_STRUCT_VM_FAULT
static int
vmaFault(
  struct vm_area_struct* pVma,
  struct vm_fault* pVmf)
{
  dfDebugPrint(DEBUGLEVEL_IO, ("vmaFault: entered, pVma=%p pVma->vm_private_data=%p\n", pVma, pVma->vm_private_data));

  dfAssert(pVma->vm_file != NULL);
  dfAssert(pVma->vm_private_data != NULL);

  return VM_FAULT_SIGBUS;
}
#else
static struct page*
vmaNoPage(
  struct vm_area_struct* pVma,
  unsigned long address,
  int* pType)
{
  dfDebugPrint(DEBUGLEVEL_IO, ("vmaNoPage: entered, pVma=%p address=0x%lx pVma->vm_private_data=%p\n",
		   pVma, address, pVma->vm_private_data));

  dfAssert(pVma->vm_file != NULL);
  dfAssert(pVma->vm_private_data != NULL);

  return NOPAGE_SIGBUS;
}
#endif

static void
vmaOpen(
  struct vm_area_struct* pVma)
{
  dfDebugPrint(DEBUGLEVEL_IO, ("vmaOpen: entered, pVma=%p pVma->vm_private_data=%p\n", pVma, pVma->vm_private_data));

  dfAssert(pVma->vm_file != NULL);
}

static struct vm_operations_struct vmOps = {
  .open = vmaOpen,
  .close = vmaClose,
#if NO_PAGE_STRUCT_VM_FAULT
  .fault = vmaFault
#else
  .nopage = vmaNoPage
#endif
};

static int
fopsMmap(
  struct file* pFile,
  struct vm_area_struct* pVma)
{
  DfDeviceObject* pDevObj;
  DfClientObject* pClObj;
  DfMmappableRegion* pRegion;
  unsigned long size, numPage;

  dfDebugPrint(DEBUGLEVEL_IO, ("fopsMmap: entered, pFile=%p pVma=%p\n", pFile, pVma));

  pClObj = (DfClientObject*)pFile->private_data;
  dfAssert(NULL != pClObj);
  pDevObj = pClObj->system.pDevObj;

  if (pClObj->system.bPassive) {
    return -EPERM; /* Don't let passively opened handles do mmap() */
  }

  size = pVma->vm_end - pVma->vm_start;
  numPage = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  if (0 == numPage) {
    return -EINVAL; /* sanity check */
  }

  pRegion = pDevObj->system.pMmappableRegions;
  while (NULL != pRegion) {
    if (pVma->vm_pgoff >= pRegion->system.pfnStart && pVma->vm_pgoff <= pRegion->system.pfnEnd) {
      break;
    }
    pRegion = pRegion->system.pNext;
  }
  if (NULL == pRegion) {
    return -EINVAL;
  }
  if (pVma->vm_pgoff + numPage - 1 > pRegion->system.pfnEnd) {
    return -EINVAL; /* attempting to map past end of region */
  }

  pVma->vm_page_prot = pgprot_noncached(pVma->vm_page_prot);
#ifdef VM_RESERVED
  pVma->vm_flags |= VM_RESERVED; /* Don't swap physical pages */
#endif
#ifdef VM_DONTEXPAND
  pVma->vm_flags |= VM_DONTEXPAND; /* Don't allow region to be expanded (i.e. via mremap() in user mode) */
#endif
  if (pRegion->system.bIsMemory) {
    dfDebugPrint(DEBUGLEVEL_IO, ("fopsMmap: vm_flags=0x%lX\n", (unsigned long)pVma->vm_flags));
    if (remap_pfn_range(pVma, pVma->vm_start, pVma->vm_pgoff, size, pVma->vm_page_prot)) {
      dfDebugPrint(DEBUGLEVEL_ERROR, ("*** fopsMmap: io_remap_pfn_range failed, pVma=%p pVma->vm_start=%p pfn=%lu(0x%lx), size=%lu(0x%lx)\n",
        pVma, (void*)(uintptr_t)pVma->vm_start, (unsigned long)pVma->vm_pgoff, (unsigned long)pVma->vm_pgoff,
        (unsigned long)size, (unsigned long)size));
      return -EAGAIN;
    }
  } else {
    pVma->vm_flags |= VM_IO; /* Memory-mapped I/O mapping */
#ifdef VM_PFNMAP
    pVma->vm_flags |= VM_PFNMAP; /* Pure PFN-based mapping, although we don't use "nopfn" method from vm_ops at the moment */
#endif
#ifdef VM_DONTDUMP
  pVma->vm_flags |= VM_DONTDUMP; /* Since this is memory-mapped I/O, don't include in core dump */
#endif
    dfDebugPrint(DEBUGLEVEL_IO, ("fopsMmap: vm_flags=0x%lX\n", (unsigned long)pVma->vm_flags));
    if (io_remap_pfn_range(pVma, pVma->vm_start, pVma->vm_pgoff, size, pVma->vm_page_prot)) {
      dfDebugPrint(DEBUGLEVEL_ERROR, ("*** fopsMmap: io_remap_pfn_range failed, pVma=%p pVma->vm_start=%p pfn=%lu(0x%lx), size=%lu(0x%lx)\n",
        pVma, (void*)(uintptr_t)pVma->vm_start, (unsigned long)pVma->vm_pgoff, (unsigned long)pVma->vm_pgoff,
        (unsigned long)size, (unsigned long)size));
      return -EAGAIN;
    }
  }
  pVma->vm_ops = &vmOps;

  return 0;
}

static unsigned int
fopsPoll(
  struct file* pFile,
  struct poll_table_struct* pTable)
{
  DfClientObject* pClObj;
  DfLinuxClientRequestNonBlock* pReq;
  DfSpinLockFlags f;
  unsigned int mask = 0;

  dfDebugPrint(DEBUGLEVEL_IO, ("fopsPoll: entered, pFile=%p pTable=%p\n", pFile, pTable));

  pClObj = (DfClientObject*)pFile->private_data;
  dfAssert(NULL != pClObj);

  poll_wait(pFile, &pClObj->system.nonblock.pollWaitQueue, pTable);

  f = dfSpinLockGet(&pClObj->system.lock);
  switch (pClObj->system.nonblock.state) {
  case NonBlockStateActive:
    pReq = pClObj->system.nonblock.pHead;
    dfAssert(NULL != pReq);
    if (pReq->clientRequest.system.bCompleted) {
      dfSpinLockPut(&pClObj->system.lock, f);
      mask |= POLLPRI;
    } else { 
      dfSpinLockPut(&pClObj->system.lock, f);
    }
    break;

  case NonBlockStateIdle:
    dfSpinLockPut(&pClObj->system.lock, f);
    break;

  default:
    dfAssert(FALSE);
    dfSpinLockPut(&pClObj->system.lock, f);
    break;
  }

  return mask;
}

static struct file_operations g_fops = {
  .owner = THIS_MODULE,
#if DF_NEED_THUNK
  .compat_ioctl = fopsCompatIoctl,
#endif
#if defined(HAVE_UNLOCKED_IOCTL)
  .unlocked_ioctl = fopsUnlockedIoctl,
#else
  .ioctl = fopsIoctl,
#endif
  .llseek = no_llseek,
  .mmap = fopsMmap,
  .open = fopsOpen,
  .poll = fopsPoll,
  .release = fopsRelease
};

/*
** -----------------------------------------------------------------
** Functions exported by Linux driver framework
** -----------------------------------------------------------------
*/

boolean_t
dfInterfaceClassRegister(
  DfDriverObject* pDrvObj,
  const char* pClassName,
  DfInterfaceClass** ppClass)
{
  DfInterfaceClass* pClass;
  dev_t devNum;
  int result;

  pClass = (DfInterfaceClass*)dfMalloc(sizeof(*pClass));
  if (NULL == pClass) {
    dfDebugPrint(DEBUGLEVEL_ERROR, ("*** dfInterfaceClassRegister: failed to allocate struct, pDrvObj=%p\n", pDrvObj));
    return FALSE;
  }
  dfStrCpy(pClass->system.name, DF_ARRAY_LENGTH(pClass->system.name), pClassName);
#if USE_CLASS_SIMPLE
  pClass->system.pClass = class_simple_create(THIS_MODULE, pClass->system.name);
#else
  pClass->system.pClass = class_create(THIS_MODULE, pClass->system.name);
#endif
  if (IS_ERR(pClass->system.pClass)) {
    dfFree(pClass);
    return FALSE;
  }
  result = alloc_chrdev_region(&devNum, 0, MaxNumMinor, pClassName);
  if (result < 0) {
#if USE_CLASS_SIMPLE
    class_simple_destroy(pClass->system.pClass);
#else
    class_destroy(pClass->system.pClass);
#endif
    dfFree(pClass);
    return FALSE;
  }
  pClass->system.devNum = devNum;
  pClass->system.numInterface = 0;

  *ppClass = pClass;
  return TRUE;
}

DF_ATTR_PASSIVE_CONTEXT
void
dfInterfaceClassUnregister(
  DfInterfaceClass* pClass)
{
  dfAssertPassiveContext();

  dfAssert(NULL != pClass);
  dfAssert(NULL != pClass && 0 == pClass->system.numInterface);
  unregister_chrdev_region(pClass->system.devNum, MaxNumMinor);
#if USE_CLASS_SIMPLE
  class_simple_destroy(pClass->system.pClass);
#else
  class_destroy(pClass->system.pClass);
#endif
  dfFree(pClass);
}

DF_ATTR_PASSIVE_CONTEXT
boolean_t
dfInterfaceRegister(
  DfDeviceObject* pDevObj,
  DfInterface* pInterface,
  DfInterfaceClass* pClass,
  const DfInterfaceMethods* pMethods)
{
  int index;

  dfAssertPassiveContext();

  pInterface->system.methods.pOnCleanup = pMethods->pOnCleanup;
  pInterface->system.methods.pOnClose = pMethods->pOnClose;
  pInterface->system.methods.pOnIoctl = pMethods->pOnIoctl;
#if DF_NEED_THUNK
  pInterface->system.methods.pOnIoctlThunk = pMethods->pOnIoctlThunk;
#endif
  pInterface->system.methods.pOnOpen = pMethods->pOnOpen;
  pInterface->system.pDevObj = pDevObj;
  pInterface->system.pNext = pDevObj->system.interfaceList.pHead;
  pInterface->system.pContext = NULL;
  pInterface->system.bEnabled = FALSE;
  pDevObj->system.interfaceList.pHead = pInterface;
  pInterface->system.pClass = pClass;
  cdev_init(&pInterface->system.cdev, &g_fops);
  pInterface->system.cdev.owner = THIS_MODULE;
  index = pClass->system.numInterface;
  pInterface->system.index = index;
  pClass->system.numInterface = index + 1;
  return TRUE;
}

DF_ATTR_PASSIVE_CONTEXT
boolean_t
dfInterfaceUnregister(
  DfDeviceObject* pDevObj,
  DfInterface* pInterface)
{
  DfInterfaceClass* pClass;
  DfInterface** pp;
  DfInterface* p;

  dfAssertPassiveContext();

  dfAssert(!pInterface->system.bEnabled);
  dfAssert(NULL != pInterface->system.pClass);

  pClass = pInterface->system.pClass;
  pClass->system.numInterface--;

  pp = &pDevObj->system.interfaceList.pHead;
  while (1) {
    p = *pp;
    if (NULL == p) {
      break;
    }
    if (p == pInterface) {
      *pp = p->system.pNext;
      break;
    }
  }

  return (NULL == p) ? FALSE : TRUE;
}

DF_ATTR_PASSIVE_CONTEXT
boolean_t
dfInterfaceEnable(
  DfDeviceObject* pDevObj,
  DfInterface* pInterface,
  boolean_t bEnable)
{
  DfInterfaceClass* pClass;
  dev_t devNum;
  int result;

  dfAssertPassiveContext();

  pClass = pInterface->system.pClass;
  devNum = MKDEV(MAJOR(pClass->system.devNum), MINOR(pClass->system.devNum) + pInterface->system.index);
  if (bEnable) {
    dfAssert(!pInterface->system.bEnabled);
    if (pInterface->system.index >= MaxNumMinor) {
      dfDebugPrint(DEBUGLEVEL_ERROR, ("*** dfInterfaceEnable: can't enable; minor numbers exhausted, index=%u\n", pInterface->system.index));
      return FALSE;
    }
    result = cdev_add(&pInterface->system.cdev, devNum, 1);
    if (result < 0) {
      return FALSE;
    }
#if USE_CLASS_SIMPLE
    pInterface->system.pDevice = class_simple_device_add(pClass->system.pClass, devNum, &pDevObj->system.device.pPci->dev, "%s%d", pClass->system.name, pInterface->system.index);
#else
    pInterface->system.pDevice = device_create(pClass->system.pClass,
					       &pDevObj->system.device.pPci->dev,
					       devNum,
# if DEVICE_CREATE_DRVDATA
					       (void*)pDevObj, /* drvdata */
# endif
					       "%s%d",
					       pClass->system.name,
					       pInterface->system.index);
#endif
    if (NULL == pInterface->system.pDevice) {
      cdev_del(&pInterface->system.cdev);
      return FALSE;
    }
    pInterface->system.bEnabled = TRUE;
    return TRUE;
  } else {
    dfAssert(pInterface->system.bEnabled);
    pInterface->system.bEnabled = FALSE;
#if USE_CLASS_SIMPLE
    class_simple_device_remove(devNum);
#else
    device_destroy(pClass->system.pClass, devNum);
#endif
    cdev_del(&pInterface->system.cdev);
    return TRUE;
  }
}

