#pragma once

#include <functional>

namespace rk_sse {
	template<uint dim>
	void rk4(double *, double *, std::function<void(double *, double *)>, double);

	template<>
	void rk4<1>(double * in, double * out, std::function<void(double *, double *)> step, double h) {
		static double _step[2];

		step(in, _step);
		
		asm volatile (
			"movdqa (%[in]), %%xmm15\n"
			"movdqa (%[step]), %%xmm14\n"
			"movddup %[h], %%xmm9\n"
			"movddup %[half], %%xmm8\n"
			"mulpd %%xmm9, %%xmm14\n"
			"movdqa %%xmm14, %%xmm11\n"
			"mulpd %%xmm8, %%xmm14\n"
			"addpd %%xmm15, %%xmm14\n"
			"movdqa %%xmm14, (%[step])\n"
			:
			: [in] "r" (in), [step] "r" (_step), [h] "x" (h), [half] "x" (0.5)
		);

		step(_step, _step);

		asm volatile (
			"movdqa (%[step]), %%xmm14\n"
			"mulpd %%xmm9, %%xmm14\n"
			"movdqa %%xmm14, %%xmm12\n"
			"mulpd %%xmm8, %%xmm14\n"
			"addpd %%xmm15, %%xmm14\n"
			"movdqa %%xmm14, (%[step])\n"		
			:
			: [step] "r" (_step)
		);
		
		step(_step, _step);
		
		asm volatile (
			"movdqa (%[step]), %%xmm14\n"
			"mulpd %%xmm9, %%xmm14\n"
			"movdqa %%xmm14, %%xmm13\n"
			"addpd %%xmm15, %%xmm14\n"
			"movdqa %%xmm14, (%[step])\n"		
			:
			: [step] "r" (_step)
		);

		step(_step, _step);

		asm volatile (
			"movdqa (%[step]), %%xmm14\n"
			"mulpd %%xmm9, %%xmm14\n"
			"movddup %[two], %%xmm9\n"
			"movddup %[rsix], %%xmm8\n"
			"mulpd %%xmm9, %%xmm13\n"
			"mulpd %%xmm9, %%xmm12\n"
			"addpd %%xmm11, %%xmm14\n"
			"addpd %%xmm12, %%xmm14\n"
			"addpd %%xmm13, %%xmm14\n"
			"mulpd %%xmm8, %%xmm14\n"
			"addpd %%xmm14, %%xmm15\n"
			"movdqa %%xmm15, (%[out])\n"
			:
			: [step] "r" (_step), [two] "x" (2.), [rsix] "x" (1./6.), [out] "r" (out)
		);
	}
}
