#include "l4.h"
#include "assert.h"

#include <linux/config.h>

#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/kernel.h>
#include <linux/bootmem.h>
#include <linux/utsname.h>
#include <linux/fs.h>
#include <linux/root_dev.h>
#include <linux/initrd.h>
#include <linux/highmem.h>
#include <linux/file.h>
#include <linux/module.h>

#include <asm/page.h>
#include <asm/uaccess.h>
#include <asm/syscalls.h>
#include <asm/signal.h>
#include <asm/signal_l4.h>
#include <asm/tlbflush.h>
#include <asm/setup.h>

#include <iguana/thread.h>
#include <iguana/asynch.h>
#include <iguana/cap.h>
#include <iguana/memsection.h>

#include <naming/naming.h>


#undef OLD_PREEMPTION

#include INC_SYSTEM2(exception.h)

#define STR(x)   #x
#define XSTR(x)  STR(x)

/* Our per cpu irq enable/disable. Where should this go? */
DEFINE_PER_CPU(unsigned long, _l4_irq_state) = { 0 };
EXPORT_PER_CPU_SYMBOL(_l4_irq_state);
#define RCU_tasklet(cpu) (per_cpu(rcu_tasklet, cpu))

L4_Word_t start_phys_mem, end_phys_mem;
L4_ThreadId_t main_thread;
L4_ThreadId_t timer_thread;

/* Changed during early boot? (We hope?) */
pgd_t swapper_pg_dir[1024];

void __init
setup_machine_name(void)
{
	sprintf(system_utsname.machine, "L4/Linux " XSTR(__SYSTEM__) " " 
		XSTR(__MACHINE__));
}

/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 *
 * error_code:
 *	bit 0 == 0 means no page found, 1 means protection fault
 *	bit 1 == 0 means read, 1 means write
 *	bit 2 == 0 means kernel, 1 means user-mode
 *
 * adjusted for l4/linux. do_pagefault changed to l4_do_pagefault
 * which simply:
 *	- calls handle_m_fault if there is a valid vma,
 *	- forces a signal if the page fault was raised by a user,
 *	  calls generate_fake_interrupt to force the user into the
 *	  kernel (it still is in an ipc waiting for the reply to its
 *	  page fault) and returns (-1) to signal the error condition.
 *	- returns (-1) if the pagefault happens within the kernel
 *	  context and leaves the rest to the calling function (should
 *	  be a uacess function which in turn will return an EFAULT to
 *	  its calling function)
 *
 */
/* XXX Note, this fuction does not currently handle faults from
 * vmalloc/vmaped'd memory. That should probably be in a separate
 * function anyway.
 */
int
l4_do_page_fault(unsigned long address, long access)
{
	struct vm_area_struct * vma;
	struct mm_struct *mm = current->mm;
	int fault, si_code = SEGV_MAPERR;
	siginfo_t info;

	/* If we're in an interrupt context, or have no user context,
	   we must not take the fault.  */
	if (!mm) /* || in_interrupt()) */
		goto bad_area_nosemaphore;

	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (expand_stack(vma, address))
		goto bad_area;

	/* Ok, we have a good vm_area for this memory access, so
	   we can handle it.  */
 good_area:
	si_code = SEGV_ACCERR;
	if (/* LOAD */ access & 0x4) {
		/* Allow reads even for write/execute-only mappings */
		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
			goto bad_area;
	} else if (/* FETCH */ access & 0x1) {
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
	} else {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
	}

 survive:
	/* If for any reason at all we couldn't handle the fault,
	   make sure we exit gracefully rather than endlessly redo
	   the fault.  */

	fault = handle_mm_fault(mm, vma, address, access & 0x2);
	up_read(&mm->mmap_sem);

	switch (fault) {
	      case VM_FAULT_MINOR:
		current->min_flt++;
		break;
	      case VM_FAULT_MAJOR:
		current->maj_flt++;
		break;
	      case VM_FAULT_SIGBUS:
		goto do_sigbus;
	      case VM_FAULT_OOM:
		goto out_of_memory;
	      default:
		BUG();
	}
	return 0;

	/* Something tried to access memory that isn't in our memory map.
	   Fix it, but check if it's kernel or user first.  */
 bad_area:
	up_read(&mm->mmap_sem);
	/* Check if it is at TASK_SIG_BASE */
	if (user_mode(regs) && ((address & PAGE_MASK) == TASK_SIG_BASE) &&
			(access & 0x1/* Execute */))
	{
		L4_MapItem_t map;
		extern char __user_exregs_page[];
		extern void __wombat_user_sig_fault(void);

		/* Fault page into wombat first with execute rights */
		__wombat_user_sig_fault();

		map = L4_MapItem ( (L4_Fpage_t) (
					L4_FpageLog2 ((L4_Word_t)&__user_exregs_page,
						PAGE_SHIFT).raw + 0x1 ),
				TASK_SIG_BASE );
		L4_MsgPut (&current_regs()->msg, 0, 0, (L4_Word_t *) 0, 2, &map);
		return 0;
	}

 bad_area_nosemaphore:
	if (user_mode(regs))
		goto do_sigsegv;
	return -1;

/* We only enter here if from kernel access to user memory */
 no_context:
	return -1;

	/* We ran out of memory, or some other thing happened to us that
	   made us unable to handle the page fault gracefully.  */
 out_of_memory:
	printk("OUT OF MEMORY!\n");
	if (current->pid == 1) {
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	printk(KERN_ALERT "VM: killing process %s(%d)\n",
	       current->comm, current->pid);
	if (!user_mode(regs))
		goto no_context;
	do_exit(SIGKILL);

 do_sigbus:
	/* Send a sigbus, regardless of whether we were in kernel
	   or user mode.  */
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void *) address;
	force_sig_info(SIGBUS, &info, current);
	if (!user_mode(regs))
		goto no_context;
	return -1;

 do_sigsegv:
#if 0
	printk("SIGSEGV(%d,%lx): @ %p  ip=%p\n", current->pid,
			current->thread_info->user_tid.raw,
			(void*)address,
			(void*)L4_MsgWord (&current_regs()->msg, 1));
//	L4_KDB_Enter("SIGSEGV");
#endif
	info.si_signo = SIGSEGV;
	info.si_errno = 0;
	info.si_code = si_code;
	info.si_addr = (void *) address;
	force_sig_info(SIGSEGV, &info, current);
	return -1;
}


#define L4_PAGEFAULT		((-2ul<<20)>>20)
#define L4_PREEMPTION		((-3ul<<20)>>20)
#define L4_ARCH_EXCEPTION	((-5ul<<20)>>20)

/* When syscall loop is called, there is unsaved state in the
 * utcb, so it is unsafe to call anything that will generate
 * an IPC untill after the L4_MsgStore. (eg printk)
 */
void NORET_TYPE
syscall_loop (void)
{
	L4_Word_t attributes[4] = {0,0,0,0};
	L4_Word_t syscall = 0;
	L4_MemoryControl (-2, attributes);

	for(;;) {
		local_irq_enable();
		set_user_ipc_received(current_thread_info());

		/* At this point we have recieved an IPC */
		if (L4_IpcFailed(current_thread_info()->tag)) {
			L4_Word_t ec = L4_ErrorCode();
			printk("%s:%d:%lx Ipcfailed: error code = %lx\n",
					current->comm, current->pid,
					current_thread_info()->user_tid.raw, ec);
			force_sig(SIGILL, current);
			goto handle_signal;
		}

		L4_MsgStore (current_thread_info()->tag,
				&current_regs()->msg); /* Get the tag */

	restart_syscall:
#if 0
		printk ("syscall_loop: got msg from %p %lx, (0x%p)\n", 
			(void *) current_thread_info()->user_tid.raw,
			(long) L4_Label (current_thread_info()->tag),
			(void *) L4_MsgWord (&current_regs()->msg, 0));
#endif
		/*
		 * Dispatch IPC according to protocol.
		 */
		syscall = 0;
		switch (L4_Label(current_thread_info()->tag) >> 4) {
		case L4_PAGEFAULT:
			{
				L4_Word_t addr;
				/* A pagefault occured. Dispatch to the pager */
				addr = L4_MsgWord (&current_regs()->msg, 0);
				set_fault_status_true(current_thread_info());
				/* Page table lookup */
				l4_do_page_fault(addr, L4_Label(current_thread_info()->tag) & 0xf);
				set_fault_status_false(current_thread_info());
			}
			break;
#if OLD_PREEMPTION
		case L4_PREEMPTION:
			{
				int r;
				//printk("preemption %lx\n", current_thread_info()->user_tid.raw);

				r = L4_Set_Timeslice (current_thread_info()->user_tid,
							L4_TimePeriod(10000),	/* Timeslice  XXX -
										   get this properly*/
							L4_TimePeriod(0));	/* Quantum */
				assert (r != 0);
			}
			break;
#endif
		/* Trampoline */
		/* Direct syscall */
		case L4_ARCH_EXCEPTION:
			if (L4_ARCH_IS_SYSCALL(current_thread_info()->tag))
			{
				L4_Word_t abi, result;
				long sys_num = l4_arch_lookup_syscall(current_regs(), &abi);

				if (sys_num > 0)
				{
					assert(l4_syscall_table[sys_num].fn);

					syscall = 1;
					syscall_entry();

					result = l4_arch_abi_call(current_regs(), sys_num, abi);
#if 0
					printk("Got a syscall: (%3ld, %2ld) from %3d (%s)  -> %ld\n", 
						sys_num, abi, current->pid, current->comm, (signed long)result);
#endif
				} else {
					printk("%s:%d:%lx unknown syscall\n",
							current->comm, current->pid,
							current_thread_info()->user_tid.raw);
					force_sig(SIGILL, current);
				}
			}
			else if (L4_ARCH_IS_EXCEPTION(current_thread_info()->tag))
			{
				l4_arch_handle_exception(current_regs());

			} else {
				printk("%s:%d:%lx broken message received\n",
						current->comm, current->pid,
						current_thread_info()->user_tid.raw);
				force_sig(SIGILL, current);
			}
			break;
		default:
			printk ("%s:%d:%lx unknown ipc request, "
				"(%p, %p, %p)\n",
				current->comm, current->pid,
				current_thread_info()->user_tid.raw,
				(void *) current_thread_info()->tag.raw,
				(void *) L4_MsgWord (&current_regs()->msg, 0), 
				(void *) L4_MsgWord (&current_regs()->msg, 1));

			force_sig(SIGILL, current);
			break;
		}
handle_signal:
		work_pending(syscall);

		if (syscall && l4_arch_restart_syscall(current_regs()))
		{
			printk("%s:%d:%lx restart syscall\n",
					current->comm, current->pid,
					current_thread_info()->user_tid.raw);
			goto restart_syscall;
		}

		local_irq_enable();

		syscall_exit(syscall);
#if OLD_PREEMPTION
		if (reply_user_ipc(current_thread_info())) {
			L4_MsgLoad(&current_regs()->msg);
			/* Reply to caller and wait for next IPC */
			current_thread_info()->tag = L4_Call(
					current_thread_info()->user_tid);
		} else {
			current_thread_info()->tag =
				L4_Receive(current_thread_info()->user_tid);
		}
#else	/* fast user-premption - improves Linux scheduler */
		{
			L4_MsgTag_t tag;
			L4_ThreadId_t from;

			if (reply_user_ipc(current_thread_info())) {
				L4_MsgLoad(&current_regs()->msg);
				/* Reply to caller and wait for next IPC */
				tag = L4_ReplyWait(current_thread_info()->user_tid,
						&from);
			} else {
retry:
				if (user_need_restart(current_thread_info()))
				{
					L4_Start_SpIpFlags(current_thread_info()->user_tid,
							current_thread_info()->request.u.restart.user_sp,
							current_thread_info()->request.u.restart.user_ip,
							current_thread_info()->request.u.restart.user_flags);
					clear_need_restart(current_thread_info());
				}
				/* Open Wait */
				tag = L4_Wait (&from);
			}

			if (from.raw == current_thread_info()->user_tid.raw)
			{
				current_thread_info()->tag = tag;
			}
			else if (from.raw == timer_thread.raw)
			{
				/* Preemption */
				work_pending_preempt();    /* Handle any signals / reschedules */
				goto retry;
			}
			else
			{
				/* XXX
				 * Here we have to lookup the linux thread from
				 * the L4 thread_id, which may be expensive.
				 * Probably someone did a ThreadSwitch to this thread.
				 * We cannot kill it because an unpriviledged linux
				 * thread could use this to kill root processes.
				 */
				printk("illegal ipc from %lx, expected %lx\n", from.raw,
						current_thread_info()->user_tid.raw);
				L4_KDB_Enter("ill");
				goto retry;
			}
		}
#endif

		if (L4_IpcFailed(current_thread_info()->tag) &&
				(L4_ErrorCode() & 1) == 0) {
			L4_Word_t ec = L4_ErrorCode();

			if (ec == L4_ErrNoMem) {
				L4_Fpage_t fpage = L4_CompleteAddressSpace;

				L4_Set_Rights(&fpage, L4_FullyAccessible);
				L4_UnmapFpage(fpage);
				goto retry;
			}

			printk("reply_wait: (%p) IPC error (%p) %lx\n",
				(void *) L4_Myself().raw, 
				(void *) current_thread_info()->user_tid.raw, ec);

			force_sig(SIGILL, current);
			syscall = 0;
			goto handle_signal;
		}
	}
	BUG();
}

#include <linux/mman.h>

/* FIXME - copied from mips below */
unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
#define COLOUR_ALIGN(addr,pgoff)				\
	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))

unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
	unsigned long len, unsigned long pgoff, unsigned long flags)
{
	struct vm_area_struct * vmm;
	int do_color_align;

	if (flags & MAP_FIXED) {
		/*
		 * We do not accept a shared mapping if it would violate
		 * cache aliasing constraints.
		 */
		if ((flags & MAP_SHARED) && (addr & shm_align_mask))
			return -EINVAL;
		return addr;
	}

	if (len > TASK_SIZE)
		return -ENOMEM;

	do_color_align = 0;
	if (filp || (flags & MAP_SHARED))
		do_color_align = 1;

	if (addr) {
		if (do_color_align)
			addr = COLOUR_ALIGN(addr, pgoff);
		else
			addr = PAGE_ALIGN(addr);
		vmm = find_vma(current->mm, addr);
		if (TASK_SIZE - len >= addr &&
		    (!vmm || addr + len <= vmm->vm_start))
			return addr;
	}
	addr = TASK_UNMAPPED_BASE;

	if (do_color_align)
		addr = COLOUR_ALIGN(addr, pgoff);
	else
		addr = PAGE_ALIGN(addr);

	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
		/* At this point:  (!vmm || addr < vmm->vm_end). */
		if (TASK_SIZE - len < addr)
			return -ENOMEM;
		if (!vmm || addr + len <= vmm->vm_start)
			return addr;
		addr = vmm->vm_end;
		if (do_color_align)
			addr = COLOUR_ALIGN(addr, pgoff);
	}
}

pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
	if (pte)
		clear_page(pte);
	return pte;
}

extern void start_kernel(void);

void *console_out, *console_in;
/*
Malloc
*/
void *
malloc(size_t size)
{
	void *ret;
	//printk("malloc: %d\n", (int)size);
	ret = kmalloc(size, GFP_ATOMIC);
	//printk("malloc returned: %p\n", ret);
	return ret;
}
void
free(void *ptr)
{
	return kfree(ptr);
}

int
puts(const char *format)
{
	printk("puts called\n");
	return 0;
}

int
printf(const char *format, ...)
{
	printk("printf\n");
	return 0;
//	return printk(format);
}

void *__callback_buffer;

uintptr_t temp_cap_slot;
uintptr_t temp_cap_used;
uintptr_t temp_cap_size;
uintptr_t temp_cap_addr;
extern cap_t *first_cap;

void 
__linux_cap_init(uintptr_t cap_slot, uintptr_t cap_used, uintptr_t cap_size, uintptr_t cap_addr)
{
	temp_cap_slot = cap_slot;
	temp_cap_used = cap_used;
	temp_cap_size = cap_size;
	temp_cap_addr = cap_addr;
	first_cap = (cap_t*) cap_addr;
}

#if 0
void 
__linux_naming_init(uintptr_t naming_obj)
{
}
#endif

extern bootmem_area_t bootmem_area[MAX_PHYSMEM_RANGES];
extern int bootmem_areas;

void
__libc_setup(void *callback, void *stdin_p, void *stdout_p, void *stderr_p, 
	     unsigned long heap_base, unsigned long heap_end)
{
	console_out = stdout_p;
	console_in = stdin_p;
	__callback_buffer = callback;

	/* Setup the memory areas from iguana */
	assert(heap_end > heap_base);
	bootmem_areas = 1;
	bootmem_area[0].page_base = heap_base >> PAGE_SHIFT;
	bootmem_area[0].pages = (heap_end >> PAGE_SHIFT) - bootmem_area[0].page_base;
}

void
__lib_init(uintptr_t *buf)
{
	/* args for libc initialisation */
	void *callback	= (void *)*(buf + 0);
	void *stdin_p	= (void *)*(buf + 1);
	void *stdout_p	= (void *)*(buf + 2);
	void *stderr_p	= (void *)*(buf + 3);
	unsigned long heap_base = (unsigned long)*(buf + 4);
	unsigned long heap_end	= (unsigned long)*(buf + 5);

	/* args for cap initialisation */
	uintptr_t cap_slot = (uintptr_t)*(buf + 6);
	uintptr_t cap_used = (uintptr_t)*(buf + 7);
	uintptr_t cap_size = (uintptr_t)*(buf + 8);
	uintptr_t cap_addr = (uintptr_t)*(buf + 9);

	/* args for naming initialisation */
	objref_t naming_server = (objref_t)*(buf + 10);

	__libc_setup(callback, stdin_p, stdout_p, stderr_p, heap_base, heap_end);
	__linux_cap_init(cap_slot, cap_used, cap_size, cap_addr);
	__naming_init(naming_server);
}

#define STACK_SIZE 0x2000
static uintptr_t main_stack[STACK_SIZE];
extern void interrupt_loop(void);

extern char saved_command_line[COMMAND_LINE_SIZE];

extern void __tls_init(void *tls_buffer, void *asynch_buffer, void *naming_buffer);

#define KERNEL_THREADS 3

struct naming_session main_name_session[KERNEL_THREADS];
struct asynch main_asynch_objects[KERNEL_THREADS];

#define SESSION() (*(((struct naming_session **)__L4_TCR_ThreadLocalStorage()) + 2))

void *main_tls_area[KERNEL_THREADS][32];

void 
setup_tls(int thread_num)
{
	__tls_init(main_tls_area[thread_num], &main_asynch_objects[thread_num], &main_name_session[thread_num]);
}

int
main(int argc, char **argv)
{
	int r;
	/* Start a new thread */
	assert(argc > 0 && argv[0] != NULL);
	strlcpy(saved_command_line, argv[0], COMMAND_LINE_SIZE);
	L4_KDB_SetThreadName(L4_Myself(), "L_timer");
	thread_create(&main_thread);
	r  = L4_Set_Priority(main_thread, 99);
	assert (r != 0);
	timer_thread = L4_Myself();
	L4_KDB_SetThreadName(main_thread, "L_syscall");
	/* Setup our TLS as well */
	setup_tls(0);
	L4_Start_SpIp(main_thread, 
		      (L4_Word_t) &main_stack[STACK_SIZE-1], 
		      (L4_Word_t) start_kernel);
	/* Now we go and do the timer stuff */
	interrupt_loop();
	return 0;
}
