#!/usr/bin/perl

use strict;
use warnings;
use Test::More;
use PDL::LiteF;
use PDL::Types;
use PDL::FFTW3;
use PDL::Complex;

# Please be careful about rearranging these tests, since they depend on the
# global FFTW plan cache, and thus order can matter.
#
# Here I use GNU octave to compute the reference results. To transform 'format
# long' octave output to the PDL input that appears here I use this:
#
# perl -ane 'if(/Column/) { print "],\n[" } if( @F == 3 ) { $F[2] =~ s/i//; print "[" . "$F[0],$F[1]$F[2]" . "],"; } END {print "],\n"} '

# I turn on the threading to stress things out a bit more, and to make sure it
# works
set_autopthread_targ(2);
set_autopthread_size(0);

# With potentially-unaligned input I can no longer predict when a new plan will
# be created, so I disable plan-creation checks. When PDL itself produces only
# aligned data buffers this should be re-enabled.
my $do_check_plan_creations = undef;

use constant approx_eps_double => 1e-8;
use constant approx_eps_single => 1e-3;

my $Nplans = 0;

# 1D basic test
{
  my $x = sequence(10)->cat(sequence(10)**2)->mv(-1,0);

  # from octave: conj( fft( (0:9) + i* ((0:9).**2) )' )
  my $Xref = pdl( [45.0000000000000,+285.0000000000000],
                  [-158.8841768587627,+17.7490974608742],
                  [-73.8190960235587,-28.6459544426446],
                  [-41.3271264002680,-38.7279671349711],
                  [-21.2459848116453,-42.8475374738350],
                  [-5.0000000000000,-45.0000000000000],
                  [11.2459848116453,-46.0967344361641],
                  [31.3271264002680,-45.9933924150247],
                  [63.8190960235587,-42.4097736473563],
                  [148.8841768587627,-13.0277379108784] );

  ok_should_make_plan( all( approx( fft1($x), $Xref, approx_eps_double) ),
                      "Basic 1D complex FFT - double precision" );

  ok_should_reuse_plan( all( approx( $x->fft1, $Xref, approx_eps_double) ),
                        "Basic 1D complex FFT as a method - double precision" );

  ok_should_make_plan( all( approx( fft1(float $x), float($Xref), approx_eps_single) ),
                      "Basic 1D complex FFT - single precision" );

  ok_should_make_plan( all( approx( fft1(cplx $x), $Xref, approx_eps_double) ),
                      "Basic 1D complex FFT with PDL::Complex" );

  ok_should_make_plan( all( approx( ifft1(fft1($x)), $x , approx_eps_double) ),
                      "Basic 1D complex FFT - inverse(forward) should be the same (normalized)" );
}

# 2D basic test
{
  my $x = sequence(5,7)->cat(1e-2 * sequence(5,7)**2)->mv(-1,0);

  # from octave: fft2( reshape(0:34,5,7)' + i* (1e-2*reshape(0:34,5,7)' .** 2) )
  my $Xref =
    pdl( [ [5.95000000000000e+02, + 1.36850000000000e+02],   [-2.59303392628859e+01, + 1.84682083666705e+01],   [-1.94901331394265e+01, - 2.45430074349120e-01],   [-1.55098668605735e+01, - 1.16176194425008e+01],   [-9.06966073711407e+00, - 2.97051588498206e+01]],
         [[-1.58361292658031e+02, + 1.70810364558179e+02],    [3.02129040241307e+00, - 1.62582569424951e+00],    [2.10126095620458e+00, + 2.84635136279005e-01],    [1.53265148779700e+00, + 1.46536486372099e+00],    [6.12622041588519e-01, + 3.37582569424951e+00]],
         [[-1.14713779395612e+02, + 4.28112631783535e+01],    [1.90212339568438e+00, - 8.54244602002882e-02],    [9.82093949475899e-01, + 6.48274540139187e-01],    [4.13484481068306e-01, + 1.10172545986082e+00],   [-5.06544965140175e-01, + 1.83542446020029e+00]],
         [[-9.52888085635639e+01, - 9.55078000010450e+00],    [1.40404722050367e+00, + 6.00118582335890e-01],    [4.84017774295172e-01, + 8.10109299679749e-01],   [-8.45916941124162e-02, + 9.39890700320246e-01],   [-1.00462114032089e+00, + 1.14988141766411e+00]],
         [[-7.97111914364361e+01, - 4.94933880183808e+01],    [1.00462114032090e+00, + 1.14988141766412e+00],    [8.45916941124141e-02, + 9.39890700320234e-01],   [-4.84017774295178e-01, + 8.10109299679762e-01],   [-1.40404722050366e+00, + 6.00118582335895e-01]],
         [[-6.02862206043880e+01, - 9.67465798760672e+01],    [5.06544965140170e-01, + 1.83542446020029e+00],   [-4.13484481068314e-01, + 1.10172545986081e+00],   [-9.82093949475905e-01, + 6.48274540139193e-01],   [-1.90212339568438e+00, - 8.54244602002870e-02]],
         [[-1.66387073419690e+01, - 1.92580879841980e+02],   [-6.12622041588522e-01, + 3.37582569424950e+00],   [-1.53265148779701e+00, + 1.46536486372098e+00],   [-2.10126095620459e+00, + 2.84635136279020e-01],   [-3.02129040241307e+00, - 1.62582569424950e+00]] );


  ok_should_make_plan( all( approx( fft2($x), $Xref, approx_eps_double) ),
     "Basic 2D complex FFT - double precision" );

  ok_should_make_plan( all( approx( fft2(float $x), float($Xref), approx_eps_single) ),
     "Basic 2D complex FFT - single precision" );

  ok_should_make_plan( all( approx( ifft2(fft2($x)), $x, approx_eps_double) ),
                      "Basic 2D complex FFT - inverse(forward) should be the same (normalized)" );
}

# lots of 1D ffts threaded in a 2d array
{
  my $x = sequence(6,5)->cat( sequence(30)->xlinvals(18,-11)->reshape(6,5)->abs->sqrt )->mv(-1,0);

  # octave result: fft( reshape(0:29,6,5) + i*sqrt(abs(reshape(18:-1:-11,6,5))) )
  my $Xref = pdl( [[15.00000000000000,+23.58593832118229],[-2.32805351899395,+5.55930952077235],[-2.77551605086160,+2.11251769696778],[-3.00000000000000,+0.38265782660416],[-3.22448394913840,-1.35158391816997],[-3.67194648100605,-4.83299532464091]],
                  [[51.00000000000000,+18.41718250147231],[-2.12988347946625,+5.64608969609710],[-2.70812956895156,+2.21961197953935],[-3.00000000000000,+0.49243029863233],[-3.29187043104844,-1.24448963559840],[-3.87011652053375,-4.74621514931617]],
                  [[87.00000000000000,+10.83182209022494],[-1.42222779450344,+5.82451856548428],[-2.43683966685802,+2.58845058798449],[-3.00000000000000,+0.89558452008761],[-3.56316033314198,-0.87565102715326],[-4.57777220549656,-4.56778627992898]],
                  [[123.00000000000000,+8.38233234744176],[-4.57777220549656,+3.37502882270110],[-3.56316033314198,+0.13896084520131],[-3.00000000000000,-1.55390522269557],[-2.43683966685802,-3.32514076993644],[-1.42222779450344,-7.01727602271216]],
                  [[159.00000000000000,+17.40257062911774],[-3.87011652053375,+4.63147782374252],[-3.29187043104844,+1.20500010718478],[-3.00000000000000,-0.52218157372224],[-2.70812956895156,-2.25910150795298],[-2.12988347946625,-5.76082702167074]] );

  ok_should_make_plan( all( approx( fft1($x), $Xref, approx_eps_double) ),
     "1D FFTs threaded inside a 2D piddle" );

  ok_should_make_plan( all( approx( ifft1(fft1($x)), $x , approx_eps_double) ),
                      "1D FFTs threaded inside a 2D piddle - inverse(forward) should be the same" );
}

# lots of 1D ffts threaded in a 3d array
{
  my $x = PDL::cat( sequence(6,5,4)**1.1,
                    (abs(sequence(6,5,4) - 10))**0.8 )->mv(-1,0);

  # octave reference code (3d array collapsed to 2d)
  #  re = reshape(0:119,6,4*5) .** 1.1
  #  im = (abs(reshape(0:119,6,4*5) - 10)) .** 0.8
  #  fft(re + i*im)
  my $Xref = reshape( pdl( [[1.69598045826025e+01,+2.99472886475101e+01],[-4.57128799578198e-01,+7.88558765388318e+00],[-2.51287898808965e+00,+3.70301251723728e+00],[-3.48312389248108e+00,+1.61384695353585e+00],[-4.40181702820775e+00,-4.91751648931500e-01],[-6.10485587424586e+00,-4.80054345442333e+00]],
                           [[6.33118711499336e+01,+9.18075894489374e+00],[-1.28375178011374e+00,+9.98129736230945e+00],[-4.36076655499263e+00,+3.82708231830186e+00],[-4.08027791431578e+00,+3.64309574332352e-01],[-3.78420764599829e+00,-9.13541864133209e-01],[-6.73854409586578e+00,-4.25130753757941e+00]],
                           [[1.13759697869505e+02,+1.97408963697151e+01],[-7.29388611445114e+00,+5.57273237248353e+00],[-5.32061252013794e+00,+6.73706431548140e-01],[-4.30915950819925e+00,-1.80990242523002e+00],[-3.28801256905250e+00,-4.31960446632102e+00],[-1.23762967061300e+00,-9.41122152264220e+00]],
                           [[1.66435716068732e+02,+3.92801435749690e+01],[-7.00620607474377e+00,+6.20403184755403e+00],[-5.31853826280121e+00,+1.07388595520872e+00],[-4.46232098197304e+00,-1.50418919374508e+00],[-3.59897150704107e+00,-4.09148521833613e+00],[-1.85441208781864e+00,-9.29419710710112e+00]],
                           [[2.20709491043481e+02,+5.64630711694275e+01],[-6.91312575495935e+00,+6.55086880329684e+00],[-5.36308316775101e+00,+1.27431097299403e+00],[-4.57890644153963e+00,-1.37165712546438e+00],[-3.78905073734364e+00,-4.02301895420251e+00],[-2.19365625629182e+00,-9.34243162811467e+00]],
                           [[2.76241513888165e+02,+7.23930056605892e+01],[-6.87318460924704e+00,+6.80271191149501e+00],[-5.41174690593730e+00,+1.41218875469242e+00],[-4.67359735938111e+00,-1.28842511961541e+00],[-3.93071118249889e+00,-3.99277491384701e+00],[-2.43142506404472e+00,-9.41307369494718e+00]],
                           [[3.32816672889238e+02,+8.74786737952202e+01],[-6.85538501192059e+00,+7.00327114642510e+00],[-5.45840640198368e+00,+1.51804628653698e+00],[-4.75361170451194e+00,-1.22862468678152e+00],[-4.04474354399512e+00,-3.97811872251649e+00],[-2.61520470809143e+00,-9.48587494925505e+00]],
                           [[3.90285534816727e+02,+1.01934524303480e+02],[-6.84870160277348e+00,+7.17094139230458e+00],[-5.50194078556953e+00,+1.60420335500904e+00],[-4.82306119868588e+00,-1.18240994885097e+00],[-4.14060148105651e+00,-3.97127361702190e+00],[-2.76521775751010e+00,-9.55598548492096e+00]],
                           [[4.48537844122192e+02,+1.15891995850952e+02],[-6.84822636641758e+00,+7.31547544486356e+00],[-5.54234829173794e+00,+1.67694646900063e+00],[-4.88452106175259e+00,-1.14500594148577e+00],[-4.22349575457002e+00,-3.96881837596082e+00],[-2.89204567772393e+00,-9.62221002325373e+00]],
                           [[5.07488694175294e+02,+1.29439468309100e+02],[-6.85145842410931e+00,+7.44274887104558e+00],[-5.57990116405240e+00,+1.73994109240401e+00],[-4.93971551455435e+00,-1.11374817715806e+00],[-4.29663683461546e+00,-3.96901565750555e+00],[-3.00194446700983e+00,-9.68443780878244e+00]],
                           [[5.67070568283683e+02,+1.42640190784627e+02],[-6.85700195115374e+00,+7.55660524048573e+00],[-5.61491735058256e+00,+1.79551966393514e+00],[-4.98985787485722e+00,-1.08700470396320e+00],[-4.36215484055982e+00,-3.97089501060820e+00],[-3.09892860515759e+00,-9.74290039627863e+00]],
                           [[6.27228420472482e+02,+1.55541513973010e+02],[-6.86402517554169e+00,+7.65970907217783e+00],[-5.64769495207659e+00,+1.84526159054989e+00],[-5.03583553656148e+00,-1.06370689880151e+00],[-4.42154056842631e+00,-3.97387614329543e+00],[-3.18573117267543e+00,-9.79792420309997e+00]],
                           [[6.87916464776208e+02,+1.68180119564361e+02],[-6.87200754524014e+00,+7.75398935566485e+00],[-5.67849584768631e+00,+1.89028747465175e+00],[-5.07831785783708e+00,-1.04311878827984e+00],[-4.47588053315932e+00,-3.97759390422351e+00],[-3.26429827802791e+00,-9.84984260267090e+00]],
                           [[7.49095987297811e+02,+1.80585198360161e+02],[-6.88061078534553e+00,+7.84089054155888e+00],[-5.70754494005455e+00,+1.93142189375022e+00],[-5.11782264695134e+00,-1.02471305032201e+00],[-4.52599231404356e+00,-3.98180928875154e+00],[-3.33606544685574e+00,-9.89896527117856e+00]],
                           [[8.10733802053247e+02,+1.92780489751580e+02],[-6.88960853349083e+00,+7.92152398109467e+00],[-5.73503408146089e+00,+1.96928962600128e+00],[-5.15475900259173e+00,-1.00809962679505e+00],[-4.57250726514005e+00,-3.98636092807034e+00],[-3.40212222387915e+00,-9.94556934146171e+00]],
                           [[8.72801129019100e+02,+2.04785648596696e+02],[-6.89884580787876e+00,+7.99676402590117e+00],[-5.76112695592524e+00,+2.00437559511875e+00],[-5.18945595634560e+00,-9.92982320347998e-01],[-4.61592350811448e+00,-3.99113714088274e+00],[-3.46331527512791e+00,-9.98989898003569e+00]],
                           [[9.35272758698146e+02,+2.16617194443934e+02],[-6.90821460405405e+00,+8.06731155157295e+00],[-5.78596367678119e+00,+2.03706383976585e+00],[-5.22218222758994e+00,-9.79131204378469e-01],[-4.65664122235683e+00,-3.99605908789967e+00],[-3.52031568871528e+00,-1.00321680493426e+01]],
                           [[9.98126416838528e+02,+2.28289190542651e+02],[-6.91763865128097e+00,+8.13373739118195e+00],[-5.80966474975681e+00,+2.06766375929783e+00],[-5.25316021545925e+00,-9.66364422864565e-01],[-4.69498691572975e+00,-4.00107022982894e+00],[-3.57366444372853e+00,-1.00725636708949e+01]],
                           [[1.06134227249634e+03,+2.39813741913757e+02],[-6.92706358972381e+00,+8.19651289056715e+00],[-5.83233436936525e+00,+2.09642832609833e+00],[-5.28257613941830e+00,-9.54535801950115e-01],[-4.73123057917205e+00,-4.00612951750744e+00],[-3.62380403823212e+00,-1.01112498044903e+01]],
                           [[1.12490255099778e+03,+2.51201368661019e+02],[-6.93645047026591e+00,+8.25603193408341e+00],[-5.85406312069293e+00,+2.12356705077917e+00],[-5.31058753759640e+00,-9.43526187589065e-01],[-4.76559809982867e+00,-4.01120687194793e+00],[-3.67110104753344e+00,-1.01483705104741e+01]] ),
                      2,6,5,4 );

  my $f = fft1($x);

  is( get_autopthread_actual(), 2, "1D FFTs threaded inside a 3D piddle - CPU threading should work" );

  ok_should_reuse_plan( all( approx( $f, $Xref, approx_eps_double) ),
     "1D FFTs threaded inside a 3D piddle" );
}

# try out some different ways of calling the module, make sure the argument
# verification works
{
  eval( 'fft1( )' );
  ok( $@, "Calling fft1 with no arguments should fail" );

  eval( 'fft1(sequence(2,5), sequence(2,5), sequence(2,5) )' );
  ok( $@, "Calling fft1 with too many arguments should fail" );

  eval( 'fft1(sequence(5))' );
  ok( $@, "Calling fft1 without dim(0)==2 should fail. Trial 1");

  eval( 'fft1(sequence(1,5))' );
  ok( $@, "Calling fft1 without dim(0)==2 should fail. Trial 2");

  eval( 'fft1(sequence(3,5))' );
  ok( $@, "Calling fft1 without dim(0)==2 should fail. Trial 3");

  eval( 'fft1(null))' );
  ok( $@, "Calling fft1(null) should fail.");

  eval( 'fft1( {} ))' );
  ok( $@, "Calling fft1( {} ) should fail (want piddle input).");

  eval( 'fft1(sequence(2,5), sequence(3,5) )' );
  ok( $@, "Calling fft1 with mismatched arguments should fail" );

  # should be able to ask for output in the arglist
  my $x  = random(2,10);
  my $f1 = fft1( $x );
  my $f2;
  eval( 'fft1( $x, $f2 )' );
  ok( $@, "Calling fft1 with undef argument should fail" );

  $f2 = null;
  eval( 'fft1( $x, $f2 )' );
  ok_should_reuse_plan( !$@ && all( approx( $f1, $f2 ), approx_eps_double),
                        "Should be able to ask for output in the arglist" );

  eval( 'fft4( sequence(2,5,3,4,4) );' );
  ok_should_make_plan( ! $@, "dimensionality baseline" );

  eval( 'fft4( sequence(2,5,4,4) );' );
  ok( $@, "too few dimensions should fail" );
}

# inplace checks
{
  my $xorig = sequence(10)->cat(sequence(10)**2)->mv(-1,0);

  # from octave: conj( fft( (0:9) + i* ((0:9).**2) )' )
  my $Xref = pdl( [45.0000000000000,+285.0000000000000],
                  [-158.8841768587627,+17.7490974608742],
                  [-73.8190960235587,-28.6459544426446],
                  [-41.3271264002680,-38.7279671349711],
                  [-21.2459848116453,-42.8475374738350],
                  [-5.0000000000000,-45.0000000000000],
                  [11.2459848116453,-46.0967344361641],
                  [31.3271264002680,-45.9933924150247],
                  [63.8190960235587,-42.4097736473563],
                  [148.8841768587627,-13.0277379108784] );

  my $x = $xorig->copy;
  fft1($x, $x);
  ok_should_make_plan( all( approx( $x, $Xref, approx_eps_double) ),
                       'In-place test: fft1($x,$x)' );

  $x = $xorig->copy;
  fft1( $x->inplace );
  ok_should_reuse_plan( all( approx( $x, $Xref, approx_eps_double) ),
                        'In-place test:   fft1( $x->inplace )' );

  ok( !$x->is_inplace, "After computation the in-place flag should be cleared" );
}

# lots of 2D ffts threaded in a 3d array
{
  my $x = PDL::cat( sequence(6,5,4)**1.1,
                    (abs(sequence(6,5,4) - 10))**0.8 )->mv(-1,0);

  # in octave I compute 4 slices separately
  #  re = reshape(0:119,6,4*5) .** 1.1
  #  im = (abs(reshape(0:119,6,4*5) - 10)) .** 0.8
  #  slice1 = fft2(re(:, 1:5 ) + i*im(:, 1:5 ))
  #  slice2 = fft2(re(:, 6:10) + i*im(:, 6:10))
  #  slice3 = fft2(re(:,11:15) + i*im(:,11:15))
  #  slice4 = fft2(re(:,16:20) + i*im(:,16:20))
  my $Xref = pdl( [
                   [[581.176580714253419,+154.612158706515430],[-22.954098523846195,+36.194518039527026],[-22.875879493772423,+10.551998195290025],[-20.913788738508782,-2.707592216571274],[-18.862059487643258,-13.839402151924361],[-18.129097984835099,-37.099701249860743]],
                   [[-178.408651850856160,+183.139542083867042],[11.470377256968625,-1.718053156591536],[5.282150186239193,+2.913557271028725],[2.408626479527148,+3.419465916129992],[1.652807905147736,+4.600098529135501],[-1.590381734378646,+10.091671271587607]],
                   [[-135.442076986364611,+37.496948236259065],[4.372069715046538,-5.432458423609452],[3.947276983624399,-0.475185262726981],[2.122893057072843,+1.257276333468205],[1.641541660378284,+1.195733119702670],[3.269737048112678,+3.674743286679739]],
                   [[-117.024242102743898,-47.339310480754413],[-0.861443876013559,+1.732467533240662],[0.185127597800303,+0.707054055400392],[-0.499359313992656,+1.552118887461319],[-2.447776485966577,+0.609947466490766],[-2.937831933834375,-2.841282678209592]],
                   [[-65.502586861276143,-178.172895308336507],[5.687451429953602,+8.651464276849186],[0.896929785660302,+4.817638327194231],[-0.533990946503966,+4.547965847191024],[-3.993598732954924,+4.974864791937925],[-11.136704766293862,+2.171852097686326]] ],

                  [
                   [[1.95537025989162e+03,+5.07137667919341e+02],[-3.42769560144680e+01,+3.57351487661338e+01],[-2.74943435492808e+01,+7.95132595764308e+00],[-2.40745068388859e+01,-5.95821387389173e+00],[-2.06361887967360e+01,-1.98800012868518e+01],[-1.37058376743800e+01,-4.77615819611594e+01]],
                   [[-1.90823299051261e+02,+1.63561853708853e+02],[-5.30707810726989e-01,-4.48966067048194e-01],[-1.41545809310629e-01,-3.74843067838806e-01],[5.33592439251135e-02,-3.42459575841451e-01],[2.48256646407656e-01,-3.13193921558001e-01],[6.37372950089715e-01,-2.63898986015401e-01]],
                   [[-1.55758721083882e+02,+1.14830386090883e+01],[-1.37557316201933e-01,-4.05958779660870e-01],[4.43148065741885e-02,-2.42630352946638e-01],[1.36711806861231e-01,-1.63554000296091e-01],[2.29943537575820e-01,-8.62535289493658e-02],[4.18482577171305e-01,+6.29473735169093e-02]],
                   [[-1.32979537236871e+02,-8.30537608826256e+01],[1.04159629258482e-01,-4.09670807209363e-01],[1.66802170317476e-01,-1.76664338294510e-01],[2.00610593363786e-01,-6.16794569397552e-02],[2.35974686283565e-01,+5.21905158295986e-02],[3.11024493644906e-01,+2.76346187116616e-01]],
                   [[-9.46011330787756e+01,-2.37163771051710e+02],[4.75138465903229e-01,-4.56993554740357e-01],[3.66037852013303e-01,-9.62444251010284e-02],[3.15838397830188e-01,+8.37813088919830e-02],[2.68458013974529e-01,+2.63383652294485e-01],[1.81832333250489e-01,+6.20818911805349e-01]]],

                  [
                   [[3.44204524288343e+03,+8.39727512433739e+02],[-3.43632539907719e+01,+3.87327181909819e+01],[-2.83836871718609e+01,+9.43178024888828e+00],[-2.53765929187988e+01,-5.22664306816161e+00],[-2.23580755213291e+01,-1.98905352749490e+01],[-1.62871457265958e+01,-4.92352018146898e+01]],
                   [[-1.93850055487056e+02,+1.78604950208849e+02],[-2.81086565720529e-01,-2.74439891768760e-01],[-6.30094446091169e-02,-2.17686087051074e-01],[4.62218954441780e-02,-1.90640300235732e-01],[1.55551122009561e-01,-1.64483923137678e-01],[3.74411612383404e-01,-1.14838044041313e-01]],
                   [[-1.62294627938734e+02,+1.82927539954391e+01],[-5.11143306149971e-02,-2.36649532393143e-01],[4.16430850746093e-02,-1.33253277381126e-01],[8.85453597042875e-02,-8.22033677083361e-02],[1.35780317473130e-01,-3.15956544938805e-02],[2.31197389980492e-01,+6.82673228827312e-02]],
                   [[-1.42113448440296e+02,-8.10603929075218e+01],[9.13716757316451e-02,-2.22938826506865e-01],[1.09204470637664e-01,-8.58346301593518e-02],[1.18905834471482e-01,-1.75430229641282e-02],[1.29121710583797e-01,+5.05580698311880e-02],[1.51066988141103e-01,+1.86140992994433e-01]],
                   [[-1.08434269598928e+02,-2.42363869807370e+02],[3.19073455607117e-01,-2.15663737884522e-01],[2.21262307844932e-01,-1.74079346210403e-02],[1.73630454892792e-01,+8.20062392537970e-02],[1.26848168463476e-01,+1.81581729708371e-01],[3.58267103028470e-02,+3.81129561460766e-01]]],

                  [
                   [[4.99244512804990e+03,+1.14070714415806e+03],[-3.45882131232035e+01,+4.06503577933066e+01],[-2.90431528725214e+01,+1.03290985710599e+01],[-2.62579620764095e+01,-4.83653993713021e+00],[-2.34643803252018e+01,-2.00056028480667e+01],[-1.78522004933373e+01,-5.03542510152376e+01]],
                   [[-1.96380323533266e+02,+1.88152304723188e+02],[-1.92865464527335e-01,-2.02889176717224e-01],[-3.86947461251954e-02,-1.56831303578437e-01],[3.85138755642588e-02,-1.34443894895275e-01],[1.15794735723374e-01,-1.12485367180954e-01],[2.70543304787344e-01,-6.98569500483150e-02]],
                   [[-1.66872451417648e+02,+2.23039858589711e+01],[-2.77487657486899e-02,-1.70406376180963e-01],[3.47883788886858e-02,-9.33380414940140e-02],[6.63337828406928e-02,-5.50977611340899e-02],[9.80588055564104e-02,-1.70571735903083e-02],[1.62032394212931e-01,+5.84144732089472e-02]],
                   [[-1.48137315640352e+02,-8.03755018459729e+01],[7.46992540366458e-02,-1.55140422934376e-01],[8.17655449197855e-02,-5.64025227538078e-02],[8.56906163946222e-02,-7.12349856999575e-03],[8.98748544483531e-02,+4.20897732548463e-02],[9.90136072387319e-02,+1.40302117404749e-01]],
                   [[-1.17049392363129e+02,-2.46859689910763e+02],[2.39899060049070e-01,-1.38101687968226e-01],[1.59658915211960e-01,-6.48727639912472e-04],[1.20144019881925e-01,+6.82934899895806e-02],[8.10343889012709e-02,+1.37369911169460e-01],[4.03481145874271e-03,+2.75896474493798e-01]]] );

  ok_should_make_plan( all( approx( fft2($x), $Xref, approx_eps_double) ),
     "2D FFTs threaded inside a 3D piddle" );

  ok_should_make_plan( all( approx( fft2( float $x), float($Xref), approx_eps_single) ),
     "2D FFTs threaded inside a 3D piddle - single precision" );


  # Now I take a slice of the input matrix, and fft that. This makes sure the
  # nembed logic works correctly. It turns out that $P() in PP physicalizes the
  # data, so no nembed logic currently exists, and stuff works anyway

  my $x_slice1_connected = $x->slice(':,0:4,:,:');
  my $x_slice1_severed   = $x_slice1_connected->copy;

  # octave ref; x is the same, but I slice it differently
  #  slice1 = fft2(re(1:5, 1:5 ) + i*im(1:5, 1:5 ))
  #  slice2 = fft2(re(1:5, 6:10) + i*im(1:5, 6:10))
  #  slice3 = fft2(re(1:5,11:15) + i*im(1:5,11:15))
  #  slice4 = fft2(re(1:5,16:20) + i*im(1:5,16:20))
  my $X_slice1_ref =
    pdl( [
          [[466.673919234444952,+126.917907968545364],[-17.870670056332415,+22.924773314027945],[-17.419713146828549,+4.981405529320815],[-17.185596880754357,-6.395001262195541],[-16.329892079156576,-25.335769447218507]],
          [[-146.569840424967879,+155.523541145286003],[7.951065332965691,-1.600154624557469],[4.722562521481950,+2.138095446385981],[2.501374281282990,+4.563007814082358],[-1.259388175222825,+8.080745148100860]],
          [[-111.129976688165755,+32.155437078172781],[2.298297449575815,-3.696832692971159],[2.653255527169226,-1.211338065207176],[2.961744281698524,+0.603384484335323],[3.142880661280621,+3.580230270481282]],
          [[-98.149556168903700,-38.308859759070558],[-1.167503315289326,+2.243222101185434],[-1.415433176872478,+0.465280753105545],[-1.265331912942163,-0.563640202617083],[-0.990113854951290,-1.818507239579064]],
          [[-55.390996616595444,-144.671074787325381],[4.429337544491914,+7.214235144120515],[-0.507891051226907,+5.073209513809378],[-3.336844872242201,+3.567780589580991],[-7.345688413939850,+1.308257900249694]]],
         [
          [[1.60938166615551e+03,+4.17670759558360e+02],[-2.68270274976814e+01,+2.26046920161292e+01],[-2.16496476842982e+01,+1.53675022177060e+00],[-1.84235786296893e+01,-1.15007886011593e+01],[-1.31627231622858e+01,-3.26244585940880e+01]],
          [[-1.58974908440522e+02,+1.36021676093101e+02],[-3.47634511617340e-01,-3.58080303406544e-01],[-4.81671641422727e-02,-3.04113508354926e-01],[1.37085201618827e-01,-2.75199098858623e-01],[4.36488388932893e-01,-2.35539773688715e-01]],
          [[-1.29686452688029e+02,+9.43606648956182e+00],[-7.01487711558949e-02,-2.99496693116574e-01],[7.08541090235068e-02,-1.75726626102805e-01],[1.59295212504510e-01,-1.01725734277373e-01],[3.04097411071888e-01,+1.38736645617663e-02]],
          [[-1.10651829493696e+02,-6.92608523091722e+01],[1.02487962542310e-01,-2.85298738134981e-01],[1.52660684703708e-01,-1.07199757251788e-01],[1.85962069038061e-01,+1.35561877936201e-03],[2.43247390743176e-01,+1.74296200677137e-01]],
          [[-7.85760340725247e+01,-1.97565816638999e+02],[3.69940012121665e-01,-2.93442198214098e-01],[2.89796229462924e-01,-1.61887501790537e-02],[2.44337576650966e-01,+1.54687612811214e-01],[1.77103574452566e-01,+4.29905675781014e-01]]],
         [
          [[2.84720460273767e+03,+6.95426680279720e+02],[-2.71001945282257e+01,+2.47316707441501e+01],[-2.25465424823261e+01,+2.50812248540162e+00],[-1.97162495543114e+01,-1.12342767103843e+01],[-1.51112098777464e+01,-3.34825045357138e+01]],
          [[-1.61503358924060e+02,+1.48680117938248e+02],[-1.79689739185806e-01,-2.15121551579987e-01],[-1.29520991006925e-02,-1.72606756571795e-01],[9.02589918624682e-02,-1.47576720159049e-01],[2.57436047526738e-01,-1.09094607758747e-01]],
          [[-1.35172303389293e+02,+1.51762198152973e+01],[-1.92334762886415e-02,-1.71645475592242e-01],[5.19956985158810e-02,-9.30825631541810e-02],[9.64968448637267e-02,-4.51405104074254e-02],[1.69230891271143e-01,+3.14149724776526e-02]],
          [[-1.18329666591565e+02,-6.75646155694635e+01],[8.08712125581526e-02,-1.51645886509917e-01],[9.50510458629830e-02,-4.70696243011893e-02],[1.04542849160088e-01,+1.73074187214384e-02],[1.21053350041535e-01,+1.21015057947666e-01]],
          [[-9.02186573536845e+01,-2.01901820815281e+02],[2.41850811057668e-01,-1.30135033446000e-01],[1.68066917394274e-01,+2.15612297762638e-02],[1.23657088647714e-01,+1.15562097610201e-01],[5.37254517374679e-02,+2.67979230130139e-01]]],
         [
          [[4.13847519887461e+03,+9.46564617413697e+02],[-2.73984844843996e+01,+2.60748739526840e+01],[-2.31796005168213e+01,+3.07463500922328e+00],[-2.05604995190013e+01,-1.11449862512929e+01],[-1.63039317216984e+01,-3.41606345226538e+01]],
          [[-1.63618296453467e+02,+1.56682279811098e+02],[-1.21816276616064e-01,-1.57768013235654e-01],[-4.20394797933782e-03,-1.23055796103499e-01],[6.85931647323148e-02,-1.02200070987194e-01],[1.86530323460939e-01,-6.94242384647662e-02]],
          [[-1.39005402614916e+02,+1.85410801291960e+01],[-7.24932070775660e-03,-1.22713508583900e-01],[4.06094617668798e-02,-6.41494708720088e-02],[7.04423276692341e-02,-2.82307037179682e-02],[1.19111127939139e-01,+2.94310371284693e-02]],
          [[-1.23376794373435e+02,-6.69854082776712e+01],[6.41877749756504e-02,-1.04443436094924e-01],[6.98384223826521e-02,-2.92264510229911e-02],[7.36940494064069e-02,+1.71728447983243e-02],[8.05143239080554e-02,+9.20916536812421e-02]],
          [[-9.74417295843736e+01,-2.05659722932842e+02],[1.79920562610709e-01,-8.03744824636155e-02],[1.19169692096844e-01,+2.45697427068684e-02],[8.21878607839186e-02,+8.96232824108548e-02],[2.32671625274674e-02,+1.95170681256241e-01]]] );

  ok_should_make_plan( all( approx( fft2($x_slice1_severed), $X_slice1_ref, approx_eps_double) ),
     "2D FFTs threaded inside a 3D piddle - slice1 - severed" );

  ok_should_reuse_plan( all( approx( fft2($x_slice1_connected), $X_slice1_ref, approx_eps_double) ),
     "2D FFTs threaded inside a 3D piddle - slice1 - connected" );

  # now go again with single precision. If $P() didn't physicalize its input,
  # this would no longer be aligned like before, since the input has shifted by
  # 4 bytes
  ok_should_make_plan( all( approx( fft2(float $x_slice1_connected), float($X_slice1_ref), approx_eps_single) ),
                       "2D FFTs threaded inside a 3D piddle - slice1 - connected - single precision" );




  # Similar, but slicing from 1
  my $x_slice2_connected = $x->slice(':,1:5,:,:');
  my $x_slice2_severed   = $x_slice2_connected->copy;

  # octave ref; x is the same, but I slice it differently
  #  slice1 = fft2(re(2:6, 1:5 ) + i*im(2:6, 1:5 ))
  #  slice2 = fft2(re(2:6, 6:10) + i*im(2:6, 6:10))
  #  slice3 = fft2(re(2:6,11:15) + i*im(2:6,11:15))
  #  slice4 = fft2(re(2:6,16:20) + i*im(2:6,16:20))
  my $X_slice2_ref =
    pdl( [
          [[501.602971299978947,+129.993495486019384],[-19.456482657442123,+24.258038724117910],[-18.901134436232283,+3.773486895387808],[-16.305859228821138,-7.743879151484332],[-15.423234561310013,-24.567626816829364]],
          [[-151.877806557964107,+149.398495098007515],[8.163865009246718,+0.126518424791354],[2.817072235106799,+2.881419182368629],[1.352437467320572,+2.913388776160406],[-0.169443715571585,+6.850269367688697]],
          [[-115.427317232676344,+31.210772021296826],[3.796588391538631,-3.335504645848345],[2.597359558283342,+0.777874238982427],[0.879920386805304,+1.061145569333210],[2.149823747278043,+1.912391035534666]],
          [[-96.426654416952132,-39.742809611359242],[-0.598036090192128,+0.778294180312916],[0.320622108376032,+0.964395257139944],[-1.544335750271782,+1.347131498949573],[-2.866826597745860,-1.701987219366836]],
          [[-53.072170179373629,-152.671376980423531],[2.832891540353520,+6.174977250984833],[0.255424405994367,+3.432508834397477],[-1.781776614496669,+4.184361527846315],[-7.917898111232422,+2.712874425873987]]],
         [
          [[1.64950652205531e+03,+4.27600276999139e+02],[-2.68326169419681e+01,+2.27006099751444e+01],[-2.16864604518378e+01,+1.58304952104838e+00],[-1.84802721265835e+01,-1.14845056637592e+01],[-1.32524144503064e+01,-3.26556345359360e+01]],
          [[-1.59063871746115e+02,+1.36592105027094e+02],[-3.36871694070668e-01,-3.49609863474421e-01],[-4.55953991858456e-02,-2.96056972955683e-01],[1.34600311442187e-01,-2.67131294666781e-01],[4.25889674867783e-01,-2.27029553320656e-01]],
          [[-1.29914250138566e+02,+9.70844038896290e+00],[-6.62751362095687e-02,-2.91745099839182e-01],[7.01619885190578e-02,-1.70438268145247e-01],[1.55723949511221e-01,-9.77926603796963e-02],[2.95815595456073e-01,+1.58897872028383e-02]],
          [[-1.10986042959537e+02,-6.91582210856051e+01],[1.02120011495870e-01,-2.76831416225680e-01],[1.49564602040562e-01,-1.03018025874890e-01],[1.81059831817083e-01,+3.01286202283804e-03],[2.35263568251515e-01,+1.72089158783488e-01]],
          [[-7.91021617428083e+01,-1.97705600191950e+02],[3.63757532989153e-01,-2.82417409570807e-01],[2.82874371475266e-01,-1.27278916830977e-02],[2.36733361180215e-01,+1.53574423567642e-01],[1.68068158380308e-01,+4.21594307082665e-01]]],
         [
          [[2.88949916162442e+03,+7.04137573981104e+02],[-2.71104520320376e+01,+2.47850199832838e+01],[-2.25708531361947e+01,+2.53121804927141e+00],[-1.97494432727707e+01,-1.12297375097047e+01],[-1.51590850953107e+01,-3.35077499355859e+01]],
          [[-1.61580394342465e+02,+1.48997806548413e+02],[-1.76879506003449e-01,-2.12465169857628e-01],[-1.24755429736926e-02,-1.70267020812346e-01],[8.92911645313911e-02,-1.45396031420977e-01],[2.54131545443049e-01,-1.07112102202037e-01]],
          [[-1.35319865252548e+02,+1.53132007477149e+01],[-1.85746548309280e-02,-1.69343727504296e-01],[5.15118373088666e-02,-9.16869090114727e-02],[9.52949133188648e-02,-4.42851974580498e-02],[1.66850120576454e-01,+3.14287260996196e-02]],
          [[-1.18527818813508e+02,-6.75353911868008e+01],[8.01879439510876e-02,-1.49371193378786e-01],[9.38997734474098e-02,-4.61642470396344e-02],[1.03082280030402e-01,+1.73771906993944e-02],[1.19061337332500e-01,+1.19751924894898e-01]],
          [[-9.05079981819587e+01,-2.02038499149128e+02],[2.39079369215351e-01,-1.27615555152443e-01],[1.65776790935290e-01,+2.18486772332205e-02],[1.21629148805613e-01,+1.14469852669599e-01],[5.20654175199115e-02,+2.64657747522687e-01]]],
         [
          [[4.18223859152336e+03,+9.54625443037726e+02],[-2.74079447865556e+01,+2.61124638323619e+01],[-2.31978971811230e+01,+3.09007255890941e+00],[-2.05843589930420e+01,-1.11431800313476e+01],[-1.63369532442610e+01,-3.41807837482412e+01]],
          [[-1.63682484895292e+02,+1.56906338384727e+02],[-1.20524688463036e-01,-1.56416089806520e-01],[-4.03733743270347e-03,-1.21913001592987e-01],[6.80631350314347e-02,-1.01174630711859e-01],[1.84871712650214e-01,-6.85694062663622e-02]],
          [[-1.39115953613999e+02,+1.86329023623412e+01],[-7.02295195511970e-03,-1.21580690846917e-01],[4.03182507468609e-02,-6.34983391169523e-02],[6.98267599630536e-02,-2.78713424589018e-02],[1.17963938106273e-01,+2.93281548087439e-02]],
          [[-1.23519603679800e+02,-6.69735391127112e+01],[6.37479744806300e-02,-1.03380542771702e-01],[6.92370420039889e-02,-2.88488700507459e-02],[7.29858824833397e-02,+1.71294493033826e-02],[7.96224057648701e-02,+9.13717087078728e-02]],
          [[-9.76419555018576e+01,-2.05773543168977e+02],[1.78415452819879e-01,-7.93210083326257e-02],[1.18035987634609e-01,+2.45615120500224e-02],[8.12721813043989e-02,+8.89567757119387e-02],[2.26856083517065e-02,+1.93436447059624e-01]]] );

  ok_should_reuse_plan( all( approx( fft2($x_slice2_severed), $X_slice2_ref, approx_eps_double) ),
                        "2D FFTs threaded inside a 3D piddle - slice2 - severed" );

  ok_should_reuse_plan( all( approx( fft2($x_slice2_connected), $X_slice2_ref, approx_eps_double) ),
                        "2D FFTs threaded inside a 3D piddle - slice2 - connected" );

  # now go again with single precision. If $P() didn't physicalize its input,
  # this would no longer be aligned like before, since the input has shifted by
  # 4 bytes
  ok_should_reuse_plan( all( approx( fft2(float $x_slice2_connected), float($X_slice2_ref), approx_eps_single) ),
                        "2D FFTs threaded inside a 3D piddle - slice2 - connected - single precision" );


  # I now try to fft INTO a slice. The out-of-slice pieces shouldn't be touched
  my $x_orig = $x->copy;
  fft2( $x_slice2_severed, $x_slice2_connected );
  is( get_autopthread_actual(), 2, "1D FFTs threaded inside a 3D piddle - slice2 - connected - in-slice correct - CPU threading should work" );
  ok_should_reuse_plan( all( approx( $x_slice2_connected, $X_slice2_ref, approx_eps_double) ),
                        "2D FFTs threaded inside a 3D piddle - slice2 - connected - in-slice correct" );

  my $X_partialfft_ref = $x_orig->copy;
  $X_partialfft_ref->slice(':,1:5,:,:') .= $x_slice2_connected;

  ok( all( approx( $x, $X_partialfft_ref, approx_eps_double) ),
      "2D FFTs threaded inside a 3D piddle - slice2 - connected - out-of-slice correct" );
}

# check the type logic
{
  my $x = sequence(10)->cat(sequence(10)**2)->mv(-1,0);

  # from octave: conj( fft( (0:9) + i* ((0:9).**2) )' )
  my $Xref = pdl( [45.0000000000000,+285.0000000000000],
                  [-158.8841768587627,+17.7490974608742],
                  [-73.8190960235587,-28.6459544426446],
                  [-41.3271264002680,-38.7279671349711],
                  [-21.2459848116453,-42.8475374738350],
                  [-5.0000000000000,-45.0000000000000],
                  [11.2459848116453,-46.0967344361641],
                  [31.3271264002680,-45.9933924150247],
                  [63.8190960235587,-42.4097736473563],
                  [148.8841768587627,-13.0277379108784] );

  my $f = null;
  my $F;

  fft1($x, $f);
  ok_should_reuse_plan( all( approx( $f, $Xref, approx_eps_double) ),
                        "Type-checking baseline" );
  ok( $PDL::FFTW3::_last_do_double_precision,
      "Unspecified FFTs should be double-precision" );

  $f = null;
  $F = fft1($x->float, $f);
  ok_should_reuse_plan( all( approx( $f, $Xref, approx_eps_single) ),
                        "Float input/Unspecified output baseline" );
  ok( ! $PDL::FFTW3::_last_do_double_precision,
      "Float input/Unspecified output should do a float FFT" );
  ok( $F->type == float,
      "Float input/Unspecified output should return a float" );

  $f = null;
  $F = fft1($x->byte, $f);
  ok_should_reuse_plan( all( approx( $f, $Xref, approx_eps_single) ),
                        "Byte input/Unspecified output baseline" );
  ok( ! $PDL::FFTW3::_last_do_double_precision,
      "Byte input/Unspecified output should do a float FFT" );
  ok( $F->type == float,
      "Byte input/Unspecified output should return a float" );

  $f = $x->zeros;
  fft1($x, $f);
  ok_should_reuse_plan( all( approx( $f, $Xref, approx_eps_double) ),
                        "double input/double output baseline" );
  ok( $PDL::FFTW3::_last_do_double_precision,
      "double input/double output should do a double FFT" );

  $f = $x->zeros->float;
  fft1($x, $f);
  ok_should_reuse_plan( all( approx( $f, $Xref, approx_eps_single) ),
                        "double input/float output baseline" );
  ok( ! $PDL::FFTW3::_last_do_double_precision,
      "double input/float output should do a float FFT" );

  $f = $x->zeros->byte;
  eval( 'fft1($x, $f)' );
  ok( $@, "Output to 'byte' should fail");

  $f = $x->zeros;
  fft1($x->float, $f->double );
  ok_should_reuse_plan( all( approx( $f, $Xref, approx_eps_double) ),
                        "float input/double output baseline" );
  ok( $PDL::FFTW3::_last_do_double_precision,
      "float input/double output should do a double FFT" );
}

# real fft basic checks
{
  my $x6 = sequence(6)**2;
  my $x7 = sequence(7)**2;

  # octave refs parse with this:
  # perl -ane '@F[2] =~ s/i//g; print "[$F[0],$F[1]$F[2]],\n";'

  # octave reference: conj(fft( (0:5).**2 )') and conj(fft( (0:6).**2 )')
  my $fx6_ref = pdl( [55.00000000000000,+0.00000000000000],
                     [-6.00000000000000,+31.17691453623979],
                     [-14.00000000000000,+10.39230484541326],
                     [-15.00000000000000,+0.00000000000000],
                     [-14.00000000000000,-10.39230484541326],
                     [-6.00000000000000,-31.17691453623979] );

  my $fx7_ref = pdl( [91.00000000000000,+0.00000000000000],
                     [-5.90820611352046,+50.87477421602225],
                     [-18.77412667908545,+19.53809802761889],
                     [-20.81766720739410,+5.59196512255867],
                     [-20.81766720739410,-5.59196512255867],
                     [-18.77412667908545,-19.53809802761889],
                     [-5.90820611352046,-50.87477421602225] );

  my $fx6 = rfft1($x6);
  ok_should_make_plan( all( approx( $fx6, $fx6_ref->slice(':,0:3'), approx_eps_double) ),
                       "rfft basic test - forward - 6long" );
  my $fx7 = rfft1($x7);
  ok_should_make_plan( all( approx( $fx7, $fx7_ref->slice(':,0:3'), approx_eps_double) ),
                       "rfft basic test - forward - 7long" );

  my $x6_back = irfft1($fx6_ref->slice(':,0:3'), zeros(6) );
  ok_should_make_plan( all( approx( $x6, $x6_back, approx_eps_double) ),
                       "rfft basic test - backward - 6long - output in arglist" );

  $x6_back = irfft1($fx6_ref->slice(':,0:3'));
  ok_should_reuse_plan( all( approx( $x6, $x6_back, approx_eps_double) ),
                        "rfft basic test - backward - 6long - output returned" );

  my $x7_back = irfft1($fx7_ref->slice(':,0:3'), zeros(7) );
  ok_should_make_plan( all( approx( $x7, $x7_back, approx_eps_double) ),
                       "rfft basic test - backward - 7long" );


  # Currently a single plan is made for ALL the thread slices. These tests are
  # meant to exercise cases where this is a bad assumption. I.e. where some
  # slices have different alignment than others. For some reason I'm not seeing
  # these tests trigger any failures, so I'm not adding any plan-per-slice
  # logic. I'm leaving these tests here so that if error DO show up, we'll know
  # about them.
  #
  # I have two 1d threaded real ffts with odd size. I do this both with
  # single-precision and with double-precision
  #
  # octave code:
  # conj(fft((0:4).**2)') and conj(fft((5+(0:4)).**2)')
  my $x5_double = sequence(10)**2;
  $x5_double = $x5_double->reshape(5,2);
  my $x5_single = $x5_double->float;

  my $fx5_ref = pdl( [ [30.00000000000000, + 0.00000000000000],
                       [-5.26393202250021, +17.20477400588967],
                       [-9.73606797749979, + 4.06149620291133],
                       [-9.73606797749979, - 4.06149620291133],
                       [-5.26393202250021, -17.20477400588967]],
                     [ [255.0000000000000, + 0.0000000000000],
                       [-30.2639320225002, +51.6143220176690],
                       [-34.7360679774998, +12.1844886087340],
                       [-34.7360679774998, -12.1844886087340],
                       [-30.2639320225002, -51.6143220176690]]);

  my $fx5_double = rfft1($x5_double);
  ok_should_make_plan( all( approx( $fx5_double, $fx5_ref->slice(':,0:2'), approx_eps_double) ),
                       "rfft threaded double precision, odd number. May need 2 plans" );

  my $fx5_single = rfft1($x5_single);
  ok_should_make_plan( all( approx( $fx5_single, $fx5_ref->slice(':,0:2'), approx_eps_single) ),
                       "rfft threaded single precision, odd number. May need 2 plans" );
}

# real fft 2D checks
{
  my $x64 = sequence(6,4)**1.1;
  my $x65 = sequence(6,5)**1.1;
  my $x74 = sequence(7,4)**1.1;
  my $x75 = sequence(7,5)**1.1;

  my $x64_ref = pdl( [[360.467089670772225,+0.000000000000000],[-96.799893286902019,+103.123844918798000],[-99.028084766558067,+0.000000000000000],[-96.799893286902019,-103.123844918798000]],
                     [[-15.988207248715064,+28.700459428988125],[0.351953528236895,-1.568072536701052],[0.894706789826859,-1.030374425555896],[1.617577583003193,-0.729750250118165]],
                     [[-16.292902538160519,+9.547035210008918],[0.634591040891635,-0.785541150275863],[0.769581432672688,-0.358960145970985],[1.059338032001399,-0.013005581424496]],
                     [[-16.334882296969155,+0.000000000000000],[0.826035615718165,-0.382043067657264],[0.750315495608504,+0.000000000000000],[0.826035615718165,+0.382043067657264]],
                     [[-16.292902538160519,-9.547035210008918],[1.059338032001399,+0.013005581424496],[0.769581432672688,+0.358960145970985],[0.634591040891635,+0.785541150275863]],
                     [[-15.988207248715064,-28.700459428988125],[1.617577583003193,+0.729750250118165],[0.894706789826859,+1.030374425555896],[0.351953528236895,+1.568072536701052]] )->xchg(1,2);

  my $x65_ref = pdl( [[581.176580714253532,+0.000000000000000],[-121.955619356066137,+180.656218696101774],[-126.233159544554283,+42.418129358506746],[-126.233159544554283,-42.418129358506746],[-121.955619356066137,-180.656218696101774]],
                     [[-20.541598254340641,+36.647109644693884],[0.166836245337382,-1.944952627138933],[0.717118890606079,-1.295587872699933],[1.204146586049559,-0.971137876719541],[2.048534847787472,-0.720103497369214]],
                     [[-20.868969490707826,+12.195700173607197],[0.644275726642125,-1.030653760454608],[0.749750248828911,-0.542566364608868],[0.913334629089297,-0.244339532151135],[1.274868845404011,+0.108769899029373]],
                     [[-20.913788738508799,+0.000000000000000],[0.937317766511587,-0.564249965530514],[0.811766871540105,-0.147421276996557],[0.811766871540105,+0.147421276996557],[0.937317766511587,+0.564249965530514]],
                     [[-20.868969490707826,-12.195700173607197],[1.274868845404011,-0.108769899029373],[0.913334629089297,+0.244339532151135],[0.749750248828911,+0.542566364608868],[0.644275726642125,+1.030653760454608]],
                     [[-20.541598254340641,-36.647109644693884],[2.048534847787472,+0.720103497369214],[1.204146586049559,+0.971137876719541],[0.717118890606079,+1.295587872699933],[0.166836245337382,+1.944952627138933]] )->xchg(1,2);

  my $x74_ref = pdl( [[501.493394074383787,+0.000000000000000],[-133.959755823278442,+142.617956550047921],[-137.025115324985222,+0.000000000000000],[-133.959755823278442,-142.617956550047921]],
                     [[-18.865669184773111,+40.763000085858437],[0.303558154663907,-2.139893880507619],[1.092794692703986,-1.466640717037794],[2.101733986260737,-1.130389132503664]],
                     [[-19.293372490503472,+15.621007669166248],[0.660940909046470,-1.114601274898615],[0.916525999732684,-0.589874650055651],[1.356292422245748,-0.197237286031954]],
                     [[-19.369109179404475,+4.468560279899923],[0.870724656176023,-0.640911587295642],[0.881225310656959,-0.170789696788245],[1.069971171773709,+0.261394699800439]],
                     [[-19.369109179404475,-4.468560279899923],[1.069971171773709,-0.261394699800439],[0.881225310656959,+0.170789696788245],[0.870724656176023,+0.640911587295642]],
                     [[-19.293372490503472,-15.621007669166248],[1.356292422245748,+0.197237286031954],[0.916525999732684,+0.589874650055651],[0.660940909046470,+1.114601274898615]],
                     [[-18.865669184773111,-40.763000085858437],[2.101733986260737,+1.130389132503664],[1.092794692703986,+1.466640717037794],[0.303558154663907,+2.139893880507619]] )->xchg(1,2);

  my $x75_ref = pdl( [[807.475069006401100,+0.000000000000000],[-168.753063158798682,+249.823189325119387],[-174.641491905125804,+58.661407474458414],[-174.641491905125804,-58.661407474458414],[-168.753063158798682,-249.823189325119387]],
                     [[-24.254907872104781,+52.050559234805071],[0.014162346363465,-2.632174401873477],[0.825892556600018,-1.808402549148552],[1.517875094286842,-1.417505033220904],[2.687499935923872,-1.159881805800431]],
                     [[-24.714356335554129,+19.955552964153426],[0.626228017088200,-1.433208444983354],[0.856153542379253,-0.823419842535374],[1.123969929564233,-0.469253829892776],[1.658488397174277,-0.080552774016870]],
                     [[-24.795144499563577,+5.709126404576390],[0.953872055406139,-0.885288945105194],[0.915476598274172,-0.361029355303055],[0.992228878787710,-0.012896599792635],[1.249581916098234,+0.447905615145083]],
                     [[-24.795144499563577,-5.709126404576390],[1.249581916098234,-0.447905615145083],[0.992228878787710,+0.012896599792635],[0.915476598274172,+0.361029355303055],[0.953872055406139,+0.885288945105194]],
                     [[-24.714356335554129,-19.955552964153426],[1.658488397174277,+0.080552774016870],[1.123969929564233,+0.469253829892776],[0.856153542379253,+0.823419842535374],[0.626228017088200,+1.433208444983354]],
                     [[-24.254907872104781,-52.050559234805071],[2.687499935923872,+1.159881805800431],[1.517875094286842,+1.417505033220904],[0.825892556600018,+1.808402549148552],[0.014162346363465,+2.632174401873477]] )->xchg(1,2);


  # forward
  my $fx64 = rfft2($x64);
  ok_should_make_plan( all( approx( $fx64, $x64_ref->slice(':,0:3,:'), approx_eps_double) ),
                       "rfft 2d test - forward - 6,4" );

  my $fx65 = rfft2($x65);
  ok_should_make_plan( all( approx( $fx65, $x65_ref->slice(':,0:3,:'), approx_eps_double) ),
                       "rfft 2d test - forward - 6,5" );

  my $fx74 = rfft2($x74);
  ok_should_make_plan( all( approx( $fx74, $x74_ref->slice(':,0:3,:'), approx_eps_double) ),
                       "rfft 2d test - forward - 7,4" );

  my $fx75 = rfft2($x75);
  ok_should_make_plan( all( approx( $fx75, $x75_ref->slice(':,0:3,:'), approx_eps_double) ),
                       "rfft 2d test - forward - 7,5" );



  # backward
  my $x64_back = irfft2($x64_ref->slice(':,0:3,:') );
  ok_should_make_plan( all( approx( $x64, $x64_back, approx_eps_double) ),
                       "rfft 2d test - backward - 6,4 - output returned" );

  $x64_back = zeros(6,4);
  irfft2($x64_ref->slice(':,0:3,:'), $x64_back );
  ok_should_reuse_plan( all( approx( $x64, $x64_back, approx_eps_double) ),
                        "rfft 2d test - backward - 6,4 - output in arglist" );

  my $x65_back = irfft2($x65_ref->slice(':,0:3,:') );
  ok_should_make_plan( all( approx( $x65, $x65_back, approx_eps_double) ),
                       "rfft 2d test - backward - 6,5 - output returned" );

  $x65_back = zeros(6,5);
  irfft2($x65_ref->slice(':,0:3,:'), $x65_back );
  ok_should_reuse_plan( all( approx( $x65, $x65_back, approx_eps_double) ),
                        "rfft 2d test - backward - 6,5 - output in arglist" );

  my $x74_back = irfft2($x74_ref->slice(':,0:3,:'), zeros(7,4) );
  ok_should_make_plan( all( approx( $x74, $x74_back, approx_eps_double) ),
                       "rfft 2d test - backward - 7,4" );

  my $x75_back = irfft2($x75_ref->slice(':,0:3,:'), zeros(7,5) );
  ok_should_make_plan( all( approx( $x75, $x75_back, approx_eps_double) ),
                       "rfft 2d test - backward - 7,5" );
}

# real fft sanity checks
{
  {
    eval 'rfft1( sequence(6) );';
    ok_should_reuse_plan( ! $@, "real ffts shouldn't work inplace - baseline forward" );
    eval 'rfft1( inplace sequence(6) );';
    ok( $@, "real ffts shouldn't work inplace - forward" );
  }

  {
    eval 'irfft1( sequence(2,4) );';
    ok_should_reuse_plan( ! $@, "real ffts shouldn't work inplace - baseline backward" );
    eval 'irfft1( inplace sequence(2,4) );';
    ok( $@, "real ffts shouldn't work inplace - backward" );
  }

  {
    eval 'rfft1( sequence(7), sequence(2,4) );';
    ok_should_reuse_plan( ! $@, "real fft dims - baseline");

    eval 'rfft1( sequence(7), sequence(3,4) );';
    ok( $@, "real fft dimensionality 1");

    eval 'rfft1( sequence(7), sequence(1,4) );';
    ok( $@, "real fft dimensionality 2");

    eval 'rfft1( sequence(7), sequence(2,5) );';
    ok( $@, "real fft dimensionality 3");

    eval 'rfft1( sequence(7), sequence(2,3) );';
    ok( $@, "real fft dimensionality 4");
  }

  {
    eval( 'rfft4( sequence(5,3,4,4) );' );
    ok_should_make_plan( ! $@, "real dimensionality baseline" );

    eval( 'rfft4( sequence(5,4,4) );' );
    ok( $@, "real dimensionality: too few dimensions should fail" );

    eval( 'irfft4( sequence(2,5,3,4,4) );' );
    ok_should_make_plan( ! $@, "real-backward dimensionality baseline" );

    eval( 'irfft4( sequence(5,4,4) );' );
    ok( $@, "real-backward dimensionality: too few dimensions should fail" );
  }

  {
    eval( 'rfft4( sequence(5,3,4,4), sequence(2,3,3,4,4) );' );
    ok_should_reuse_plan( ! $@, "real dimensionality baseline - more explicit" );

    eval( 'rfft4( sequence(5,3,4,4), sequence(2,3,3,4,4,3) );' );
    ok_should_reuse_plan( ! $@, "real dimensionality baseline - extra dims should be ok" );

    eval( 'rfft4( sequence(5,3,4,4), sequence(3,3,3,4,3) );' );
    ok( $@, "real dimensionality - output should look like complex numbers" );

    eval( 'rfft4( sequence(5,3,4,4), sequence(2,3,3,4,3) );' );
    ok( $@, "real dimensionality - different dims should break 1" );

    eval( 'rfft4( sequence(5,3,4,4), sequence(2,3,3,4,5) );' );
    ok( $@, "real dimensionality - different dims should break 2" );

    eval( 'rfft4( sequence(4,3,4,4), sequence(2,3,3,4,4) );' );
    ok_should_make_plan( !$@, "real dimensionality - slightly different complex dims still ok" );

    eval( 'rfft4( sequence(6,3,4,4), sequence(2,3,3,4,4) );' );
    ok( $@, "real dimensionality - too different complex dims should break" );
  }

  {
    eval( 'irfft4( sequence(2,3,3,4,4) );' );
    ok_should_make_plan( ! $@, "real-backward dimensionality baseline 1" );

    eval( 'irfft4( sequence(2,3,3,4,4), sequence(5,3,4,4) );' );
    ok_should_make_plan( ! $@, "real-backward dimensionality baseline 2" );

    eval( 'irfft4( sequence(2,3,3,4,4,3), sequence(5,3,4,4) );' );
    ok_should_reuse_plan( ! $@, "real-backward dimensionality baseline - extra dims should be ok" );

    eval( 'irfft4( sequence(3,3,3,4,4,3), sequence(5,3,4,4) );' );
    ok( $@, "real-backward dimensionality - input should look like complex numbers" );

    eval( 'irfft4( sequence(2,3,3,4,3), sequence(5,3,4,4) );' );
    ok( $@, "real-backward dimensionality - different dims should break 1" );

    eval( 'irfft4( sequence(2,3,3,4,5), sequence(5,3,4,4) );' );
    ok( $@, "real-backward dimensionality - different dims should break 2" );

    eval( 'irfft4( sequence(2,3,3,4,4), sequence(4,3,4,4) );' );
    ok_should_reuse_plan( !$@, "real-backward dimensionality - slightly different complex dims still ok" );

    eval( 'irfft4( sequence(2,3,3,4,4), sequence(6,3,4,4) );' );
    ok( $@, "real-backward dimensionality - too different complex dims should break" );
  }
}

# make sure the routines do not damage their input
{
  # I use all-new size to make sure a planning phase takes place. This checks
  # the planning phase for preserving its input also
  my $xorig = sequence(6,7,8);
  my $Xorig = sequence(2,6,7,8);
  my ($x,$X);

  $X = $Xorig->copy;
  fft2($X);
  ok_should_make_plan( all( approx( $X, $Xorig, approx_eps_double) ),
                       "2D forward FFT preserves its input" );

  $X = $Xorig->copy;
  ifft2($X);
  ok_should_make_plan( all( approx( $X, $Xorig, approx_eps_double) ),
                       "2D backward FFT preserves its input" );

  $x = $xorig->copy;
  rfft3($x);
  ok_should_make_plan( all( approx( $x, $xorig, approx_eps_double) ),
                       "3D real forward FFT preserves its input" );

  $X = $Xorig->copy;
  irfft3($X);
  ok_should_make_plan( all( approx( $X, $Xorig, approx_eps_double) ),
                       "3D real backward FFT preserves its input" );
}

# Check parameterized operation
{
    my $a = sequence(2,4,4,4)==0;
    my $b = fftn($a,1);
    my $btemplate = zeroes($a);
    $btemplate->slice('(0),:,(0),(0)') .= 1;
    ok_should_make_plan( all( approx( $b, $btemplate, approx_eps_double ) ),
			 "parameterized forward complex FFT works (1d on a 1+3d var)" );

    ok_should_reuse_plan( all( approx( $a->fftn(1), $btemplate, approx_eps_double ) ),
                          "parameterized forward complex FFT works (1d on a 1+3d var) - method" );

    $b = fftn($a,2);
    $btemplate .= 0;
    $btemplate->slice('(0),:,:,(0)') .= 1;
    ok_should_make_plan( all( approx( $b, $btemplate, approx_eps_double ) ),
			 "parameterized forward complex FFT works (2d on a 1+3d var)" );

    $b = fftn($a,3);
    $btemplate .= 0;
    $btemplate->slice('(0),:,:,:') .= 1;
    ok_should_make_plan( all( approx( $b, $btemplate, approx_eps_double ) ),
			 "parameterized forward complex FFT works (3d on a 1+3d var)" );

    # inverse on 2d -- should leave the 3rd dimension alone
    my $c = ifftn($b,2);
    my $ctemplate= zeroes($c);
    $ctemplate->slice('(0),(0),(0)') .= 1;
    ok_should_make_plan( all( approx( $c, $ctemplate, approx_eps_double ) ),
			 "parameterized reverse complex FFT works (2d on a 1+3d var)" );

    $a = sequence(4,4,4)==0;
    $b = rfftn($a,1);
    $btemplate = zeroes($b);
    $btemplate->slice('(0),:,(0),(0)') .= 1;
    ok_should_make_plan( all( approx( $b, $btemplate, approx_eps_double ) ),
			 "parameterized forward real FFT works (1d on a 3d var)" );

    # inverse on 2d -- should be a normalized forward on the non-transformed direction
    $c = irfftn($b,2);
    $ctemplate = zeroes($a);
    $ctemplate->slice('(0),:,(0)') .= 0.25;
    ok_should_make_plan( all( approx( $c, $ctemplate, approx_eps_double) ),
			 "parameterized reverse real FFT works (2d on a 3d var)" );

}

# Alignment checks. Here I try to fft purposely-unaligned piddles to make sure
# that PDL::FFTW3 both makes a new plan and computes the data correctly. I only
# create an input offset
#
# This test isn't written perfectly. It assumes that $x->copy is aligned at 16
# bytes, which is true on my amd64 box, but is not true in general.
# Additionally, FFTW crashes if alignment is particularly bad, so I disable
# these tests.
if(0)
{
  my $x = sequence(10)->cat(sequence(10)**2)->mv(-1,0);

  # from octave: conj( fft( (0:9) + i* ((0:9).**2) )' )
  my $Xref = pdl( [45.0000000000000,+285.0000000000000],
                  [-158.8841768587627,+17.7490974608742],
                  [-73.8190960235587,-28.6459544426446],
                  [-41.3271264002680,-38.7279671349711],
                  [-21.2459848116453,-42.8475374738350],
                  [-5.0000000000000,-45.0000000000000],
                  [11.2459848116453,-46.0967344361641],
                  [31.3271264002680,-45.9933924150247],
                  [63.8190960235587,-42.4097736473563],
                  [148.8841768587627,-13.0277379108784] );

  my $length = length ${$x->get_dataref};
  for my $offset (4,8)
  {
    # this assumes a 16-aligned piddle is created. This is true on my amd64 box
    my $x_offset = $x->copy;

    # create an offset in the data inside the piddle
    ${$x_offset->get_dataref} .= ' ' x $offset;
    substr(${$x_offset->get_dataref}, 0, $offset) = "";
    substr(${$x_offset->get_dataref}, 0) = ${$x->get_dataref};
    $x_offset->upd_data();

    ok_should_make_plan( all( approx( fft1($x_offset), $Xref, approx_eps_double) ),
                         "Basic 1D complex FFT - unaligned $offset bytes" );
  }
}

done_testing;

sub ok_should_make_plan
{
  my ($value, $planname) = @_;
  ok( $value, $planname );
  check_new_plan( $planname );
}

sub ok_should_reuse_plan
{
  my ($value, $planname) = @_;
  ok( $value, $planname );
  check_reused_plan( $planname );
}

sub check_new_plan
{
  my $planname = shift;

 SKIP: {
    skip "Plan creation checks disabled because pdl memory may be unaligned", 1 unless $do_check_plan_creations;
    ok( $PDL::FFTW3::_Nplans == $Nplans+1,
        "$planname: should make a new plan" );
  }

  $Nplans = $PDL::FFTW3::_Nplans;
}

sub check_reused_plan
{
  my $planname = shift;

 SKIP: {
    skip "Plan creation checks disabled because pdl memory may be unaligned", 1 unless $do_check_plan_creations;
    ok( $PDL::FFTW3::_Nplans == $Nplans,
        "$planname: should reuse an existing plan" );
  }
  $Nplans = $PDL::FFTW3::_Nplans;
}
