docs_html/doxygen/AMReX__ParticleInit_8H_source.html

#ifndef AMREX_PARTICLEINIT_H

#define AMREX_PARTICLEINIT_H

#include <AMReX_Config.H>


#include <iterator>

#include <utility>


namespace amrex {


/*

  \brief Initialize particles from an Ascii file in the following format:


         8

         0.1  0.1 16.2 1000.0 4.0 1.0 6.0

         8.1  0.1 16.2 1000.0 -5 0.0 -7.0

         16.1  0.1 16.2 1000.0 6.0 -8.0 2.0

         24.1  0.1 16.2 1000.0 9.0 4.0 8.0

         0.1  8.1 16.2 1000.0 -8.0 -3.0 -10.0

         8.1  8.1 16.2 1000.0 2.0 1.0 0.0

         16.1  8.1 16.2 1000.0 0.0 2.0 3.0

         24.1  8.1 16.2 1000.0 -9.0 7.0 5.0


  The first line is the number of particles. The remaining lines give the particle

  data to read, one particle on each line. The first AMREX_SPACEDIM components are

  positions. The next extradata components are the additional real data to read in.


  Integer data is not currently supported by this function.


  Parameters:


     file:      the name of the Ascii file with the particles

     extradata: the number of real components to read in, beyond the AMREX_SPACEDIM

                positions

     Nrep:      pointer to IntVect that lets you replicate the incoming particles

                across the domain so that you only need to specify a sub-volume of

                them. By default particles are not replicated.

 */

template <typename ParticleType, int NArrayReal, int NArrayInt,

          template<class> class Allocator, class CellAssignor>

void

ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>


::InitFromAsciiFile (const std::string& file, int extradata, const IntVect* Nrep)

{

    BL_PROFILE("ParticleContainer<NSR, NSI, NAR, NAI>::InitFromAsciiFile()");

    AMREX_ASSERT(!file.empty());

    AMREX_ASSERT(extradata <= NStructReal + NumRealComps());


    const int  MyProc   = ParallelDescriptor::MyProc();

    const int  NProcs   = ParallelDescriptor::NProcs();

    const auto strttime = amrex::second();


    int NReaders = MaxReaders();  // num ranks to read with

    int NRedist = 1;  // number of times to redistribute while reading


    // try to use some sensible heuristics

    if (NProcs <= 1024)

    {

        if (NReaders > 1)

        {

            NRedist = 2;

        }

    }

    else if (NProcs <= 4096)

    {

        NReaders = std::max(NReaders,128);

        NRedist = 4;

    }

    else if (NProcs <= 8192)

    {

        NReaders = std::max(NReaders,384);

        NRedist = 32;

    }

    else if (NProcs <= 16384)

    {

        NReaders = std::max(NReaders,512);

        NRedist = 48;

    }


    resizeData();


    IntVect lNrep(AMREX_D_DECL(1,1,1));


    if (Nrep != nullptr) {

        lNrep = *Nrep;

    }


    Long how_many      = 0;

    Long how_many_read = 0;


    Gpu::HostVector<ParticleType> nparticles;

    Vector<Gpu::HostVector<Real> > nreals(0);

    if (extradata > NStructReal) { nreals.resize(extradata - NStructReal); }


    if (MyProc < NReaders)

    {

        VisMF::IO_Buffer io_buffer(VisMF::IO_Buffer_Size);


        std::ifstream ifs;


        ifs.rdbuf()->pubsetbuf(io_buffer.dataPtr(), io_buffer.size());


        ifs.open(file.c_str(), std::ios::in);


        if (!ifs.good())

        {

            amrex::FileOpenFailed(file);

        }


        int cnt = 0;


        ifs >> cnt >> std::ws;


        ParticleLocData pld;

        ParticleType p, p_rep;

        Vector<Real> r;


        if (extradata > NStructReal) { r.resize(extradata - NStructReal); }


        const int Chunk = cnt / NReaders;


        for (int i = 0; i < MyProc*Chunk; i++)

        {

            ifs.ignore(std::numeric_limits<std::streamsize>::max(), '\n');

            ifs >> std::ws;  // Eat newline.

        }


        if (!ifs.good())

        {

            std::string msg("ParticleContainer::InitFromAsciiFile(");

            msg += file;

            msg += ") failed @ 1";

            amrex::Error(msg.c_str());

        }


        int MyCnt = Chunk;


        if (MyProc == (NReaders - 1))

        {

            MyCnt += cnt % NReaders;

        }


        const Geometry& geom = Geom(0);


        for (int i = 0; i < MyCnt; i++)

        {

            AMREX_D_TERM(ifs >> p.pos(0);,

                         ifs >> p.pos(1);,

                         ifs >> p.pos(2););


            for (int n = 0; n < extradata; n++)

            {

                if (n < NStructReal)

                {

                    ifs >> p.rdata(n);

                }

                else

                {

                    ifs >> r[n - NStructReal];

                }

            }


            if (!ifs.good())

            {

                std::string msg("ParticleContainer::InitFromAsciiFile(");

                msg += file; msg += ") failed @ 2";

                amrex::Error(msg.c_str());

            }


            if (!Where(p, pld))

            {

                PeriodicShift(p);


                if (!Where(p, pld))

                {

                    if (m_verbose) {

                        amrex::AllPrint() << "BAD PARTICLE ID WOULD BE " << ParticleType::NextID() << '\n'

                                          << "BAD PARTICLE POS "

                                          << AMREX_D_TERM(   p.pos(0),

                                                          << p.pos(1),

                                                          << p.pos(2))

                                          << "\n";

                    }

                    amrex::Abort("ParticleContainer::InitFromAsciiFile(): invalid particle");

                }

            }


            // set these rather than reading them in

            p.id()  = ParticleType::NextID();

            p.cpu() = MyProc;


            nparticles.push_back(p);

            if(nreals.size() > extradata - NStructReal) {

                for (int n = NStructReal; n < extradata; n++)

                {

                    nreals[n-NStructReal].push_back(r[n-NStructReal]);

                }

            }


            how_many++;

            how_many_read++;


            const Real DomSize[AMREX_SPACEDIM] =

                { AMREX_D_DECL((geom.ProbHi(0)-geom.ProbLo(0))/lNrep[0],

                               (geom.ProbHi(1)-geom.ProbLo(1))/lNrep[1],

                               (geom.ProbHi(2)-geom.ProbLo(2))/lNrep[2]) };

            int rep[AMREX_SPACEDIM];


#if AMREX_SPACEDIM > 2

            for (rep[2] = 1; rep[2] <= lNrep[2]; rep[2]++)

            {

#endif

#if AMREX_SPACEDIM > 1

                for (rep[1] = 1; rep[1] <= lNrep[1]; rep[1]++)

                {

#endif

                    for (rep[0] = 1; rep[0] <= lNrep[0]; rep[0]++)

                    {

                        if (!(AMREX_D_TERM( (rep[0] == 1), && (rep[1] == 1), && (rep[2] == 1) ) ) )

                        {

                            // Shift the position.

                            for (int d=0; d<AMREX_SPACEDIM; ++d)

                            {

                                p_rep.pos(d) = static_cast<ParticleReal>(p.pos(d) + Real(rep[d]-1)*DomSize[d]);

                            }


                            // Copy the extra data

                            for (int n = 0; n < extradata; n++)

                            {

                                if (n < NStructReal)

                                {

                                    p_rep.rdata(n) = p.rdata(n);

                                }

                            }


                            if (!Where(p_rep, pld))

                            {

                                PeriodicShift(p_rep);

                                if (!Where(p_rep, pld))

                                {

                                    if (m_verbose) {

                                        amrex::AllPrint() << "BAD REPLICATED PARTICLE ID WOULD BE " << ParticleType::NextID() << "\n";

                                    }

                                    amrex::Abort("ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitFromAsciiFile(): invalid replicated particle");

                                }

                            }


                            p_rep.id()  = ParticleType::NextID();

                            p_rep.cpu() = MyProc;


                            nparticles.push_back(p_rep);


                            if (nreals.size() > extradata - NStructReal) {

                                for (int n = NStructReal; n < extradata; n++)

                                {

                                    nreals[n-NStructReal].push_back(r[n-NStructReal]);

                                }

                            }


                            how_many++;

                        }

                    }

#if AMREX_SPACEDIM > 1

                }

#endif

#if AMREX_SPACEDIM > 2

            }

#endif


        }


    }


    // Now Redistribute() each chunk separately to minimize memory bloat.

    int NRedist_chunk = NReaders / NRedist;


    ParticleLocData pld;


    for (int nr = 0; nr < NRedist; nr++)

    {

        Vector<std::map<std::pair<int, int>,Gpu::HostVector<ParticleType> > > host_particles;

        host_particles.reserve(15);

        host_particles.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, std::array<Gpu::HostVector<ParticleReal>, NArrayReal > > > host_real_attribs;

        host_real_attribs.reserve(15);

        host_real_attribs.resize(finestLevel()+1);


        if (m_verbose > 0) {

            amrex::Print() << "Redistributing from processor "

                           << nr*NRedist_chunk << " to "

                           << (nr+1)*NRedist_chunk-1 << '\n';

        }


        for (int which = nr*NRedist_chunk; which < (nr+1)*NRedist_chunk; which++)

        {

            if (which == MyProc)

            {

                while (!nparticles.empty())

                {

                    ParticleType& p = nparticles.back();

                    Where(p, pld);


                    host_particles[pld.m_lev][std::make_pair(pld.m_grid, pld.m_tile)].push_back(p);


                    if (nreals.size() > extradata - NStructReal && NArrayReal > 0)

                    {

                        for (int n = NStructReal; n < extradata; n++)

                        {

                            Real rdata = nreals[n-NStructReal].back();

                            host_real_attribs[pld.m_lev][std::make_pair(pld.m_grid, pld.m_tile)][n-NStructReal].push_back(rdata);

                        }

                    }


                    nparticles.pop_back();


                    if (nreals.size() > extradata - NStructReal)

                    {

                        for (int n = NStructReal; n < extradata; n++)

                        {

                            nreals[n-NStructReal].pop_back();

                        }

                    }

                }

            }

        }


        for (int lev = 0; lev < std::ssize(host_particles); ++lev)

        {

            for (auto& kv : host_particles[lev])

            {


                auto grid = kv.first.first;

                auto tile = kv.first.second;

                const auto& src_tile = kv.second;


                auto& dst_tile = GetParticles(lev)[std::make_pair(grid,tile)];

                auto old_size = dst_tile.GetArrayOfStructs().size();

                auto new_size = old_size + src_tile.size();

                dst_tile.resize(new_size);


                Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),

                               dst_tile.GetArrayOfStructs().begin() + old_size);


                if (std::ssize(host_real_attribs[lev][std::make_pair(grid, tile)]) > NArrayReal) {

                    for (int i = 0; i < NArrayReal; ++i) {

                        Gpu::copyAsync(Gpu::hostToDevice,

                                       host_real_attribs[lev][std::make_pair(grid,tile)][i].begin(),

                                       host_real_attribs[lev][std::make_pair(grid,tile)][i].end(),

                                       dst_tile.GetStructOfArrays().GetRealData(i).begin() + old_size);

                    }

                }

            }

        }

        Gpu::streamSynchronize();


        Redistribute();

    }


    if (m_verbose > 0)

    {

        if (NRedist*NRedist_chunk < NReaders) {

            amrex::Print() << "Redistributing from processor "

                           << NRedist*NRedist_chunk << " to "

                           << NReaders << '\n';

        }

    }

    for (int which = NRedist*NRedist_chunk; which < NReaders; which++)

    {

        Vector<std::map<std::pair<int, int>,Gpu::HostVector<ParticleType> > > host_particles;

        host_particles.reserve(15);

        host_particles.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, std::array<Gpu::HostVector<ParticleReal>, NArrayReal > > > host_real_attribs;

        host_real_attribs.reserve(15);

        host_real_attribs.resize(finestLevel()+1);


        if (which == MyProc)

        {

            while (!nparticles.empty())

            {

                ParticleType& p = nparticles.back();

                Where(p, pld);


                host_particles[pld.m_lev][std::make_pair(pld.m_grid, pld.m_tile)].push_back(p);

                if (std::ssize(host_real_attribs[pld.m_lev][std::make_pair(pld.m_grid, pld.m_tile)]) >

                    extradata - NStructReal) {

                    for (int n = NStructReal; n < extradata; n++)

                    {

                        Real rdata = nreals[n-NStructReal].back();

                        host_real_attribs[pld.m_lev][std::make_pair(pld.m_grid, pld.m_tile)][n-NStructReal].push_back(rdata);

                    }

                }


                nparticles.pop_back();


                if (nreals.size() > extradata - NStructReal) {

                    for (int n = NStructReal; n < extradata; n++)


                    {

                        nreals[n-NStructReal].pop_back();


                    }

                }

            }

        }


        for (int lev = 0; lev < std::ssize(host_particles); ++lev)

        {

            for (auto& kv : host_particles[lev])

            {

                auto grid = kv.first.first;

                auto tile = kv.first.second;

                const auto& src_tile = kv.second;


                auto& dst_tile = GetParticles(lev)[std::make_pair(grid,tile)];

                auto old_size = dst_tile.GetArrayOfStructs().size();

                auto new_size = old_size + src_tile.size();

                dst_tile.resize(new_size);


                Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),

                               dst_tile.GetArrayOfStructs().begin() + old_size);


                for (int i = 0; i < NArrayReal; ++i) {

                    Gpu::copyAsync(Gpu::hostToDevice,

                                   host_real_attribs[lev][std::make_pair(grid,tile)][i].begin(),

                                   host_real_attribs[lev][std::make_pair(grid,tile)][i].end(),

                                   dst_tile.GetStructOfArrays().GetRealData(i).begin() + old_size);

                }

            }

        }

        Gpu::streamSynchronize();


        Redistribute();


    }


    if (m_verbose > 0)

    {

        const int IOProcNumber = ParallelDescriptor::IOProcessorNumber();


        Long num_particles = how_many;


        ParallelDescriptor::ReduceLongSum(num_particles, IOProcNumber);


        if (AMREX_D_TERM(lNrep[0] == 1, && lNrep[1] == 1, && lNrep[2] == 1))

        {

            amrex::Print() << "Total number of particles: " << num_particles << '\n';

        }

        else

        {

            Long num_particles_read = how_many_read;


            ParallelDescriptor::ReduceLongSum(num_particles_read, IOProcNumber);


            amrex::Print() << "Replication the domain with vector           "

                           << AMREX_D_TERM(lNrep[0] << " ", << lNrep[1] << " ", << lNrep[2]) << "\n"


                           << "Total number of particles read in          : " << num_particles_read << '\n'

                           << "Total number of particles after replication: " << num_particles      << '\n';

        }

    }


    AMREX_ASSERT(OK());


    if (m_verbose > 1)

    {

        ByteSpread();


        auto runtime = amrex::second() - strttime;


        ParallelDescriptor::ReduceRealMax(runtime, ParallelDescriptor::IOProcessorNumber());


        amrex::Print() << "InitFromAsciiFile() time: " << runtime << '\n';

    }

}


//

// The format of a binary particle init file:

//

// NP -- The number of particles in the file.   A "Long".

// DM -- Our dimension.  Either 1, 2, or 3.     A "int".

// NX -- The amount of "extra" data.            A "int".

// NP*(DM+NX) native floating-point numbers.    A "float" or "double".

//

// Note that there is nothing separating all these values.

// They're packed into the binary file like sardines.

//

template <typename ParticleType, int NArrayReal, int NArrayInt,

          template<class> class Allocator, class CellAssignor>

void


ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::

InitFromBinaryFile (const std::string& file,

                    int                extradata)

{

    BL_PROFILE("ParticleContainer<NSR, NSI, NAR, NAI>::InitFromBinaryFile()");

    AMREX_ASSERT(!file.empty());

    AMREX_ASSERT(extradata <= NStructReal);


    const int  MyProc   = ParallelDescriptor::MyProc();

    const int  NProcs   = ParallelDescriptor::NProcs();

    const int  IOProc   = ParallelDescriptor::IOProcessorNumber();

    const auto strttime = amrex::second();

    //

    // The number of MPI processes that read from the file.

    // We limit this to a rather small number since there's a limit

    // to the number of independent I/O channels on most filesystems.

    //

    const int NReaders = MaxReaders();


    AMREX_ASSERT(NReaders <= NProcs);

    //

    // How many particles each NReaders reads before redistributing.

    //

    const Long NPartPerRedist = MaxParticlesPerRead();


    if (m_verbose > 0)

    {

        amrex::Print() << "Reading with " << NReaders << " readers\n"

                       << "Redistributing after every " << NPartPerRedist << " particles for each reader\n";

    }

    //

    // tmp_particles should mirror how m_particles is built.

    // At the end of this routine it'll be the new m_particles.

    //

    Vector<ParticleLevel> tmp_particles;


    tmp_particles.reserve(15);  // So we don't ever have to do any copying on a resize.

    tmp_particles.resize(finestLevel()+1);


    resizeData();


    //

    // All the processors need to participate in Redistribute() though.

    //

    int  NX = 0;

    Long NP = 0;


    VisMF::IO_Buffer io_buffer(VisMF::IO_Buffer_Size);


    std::ifstream ifs;


    //

    // The "set" of MPI processor numbers of the readers.  We want

    // to be able to easily check whether or not we're a reader.

    //

    std::set<int> readers;

    //

    // The same set but in ascending order.

    //

    Vector<int> rprocs(NReaders);


    if (NReaders == NProcs)

    {

        //

        // Just set'm.

        //

        for (int i = 0; i < NProcs; i++) {

            rprocs[i] = i;

        }

    }

    else

    {

        //

        // The I/O Proc builds a set of NReader integers in the range: [0,NProcs-1].

        //

        // It then broadcast'm to all MPI procs.

        //

        // We want these to be as evenly distributed over the full set of

        // [0,NProcs-1] MPI processors as possible, so that when reading we

        // minimize the number of readers per Node, and hence can use more

        // of the available Node memory for reading.

        //

        if (ParallelDescriptor::IOProcessor())

        {

            do

            {

                auto n = int(amrex::Random() * Real(NProcs-1));


                AMREX_ASSERT(n >= 0);

                AMREX_ASSERT(n < NProcs);


                readers.insert(n);

            }

            while (std::ssize(readers) < NReaders);


            AMREX_ASSERT(std::ssize(readers) == rprocs.size());


            int i = 0;


            for (auto it = readers.cbegin(), End = readers.cend();

                 it != End;

                 ++it, ++i)

            {

                rprocs[i] = *it;

            }

        }


        ParallelDescriptor::Bcast(rprocs.dataPtr(), rprocs.size(), IOProc);

    }


    if (readers.empty())

    {

        //

        // Set readers for non I/O procs.

        //

        readers.insert(rprocs.begin(), rprocs.end());


        AMREX_ASSERT(std::ssize(readers) == rprocs.size());


        AMREX_ASSERT(rprocs.size() == NReaders);

    }


    int RealSizeInFile = 0;


    if (readers.contains(MyProc))

    {

        int DM = 0;


        ifs.rdbuf()->pubsetbuf(io_buffer.dataPtr(), io_buffer.size());


        ifs.open(file.c_str(), std::ios::in|std::ios::binary);


        if (!ifs.good()) {

            amrex::FileOpenFailed(file);

        }


        ifs.read((char*)&NP, sizeof(NP));

        ifs.read((char*)&DM, sizeof(DM));

        ifs.read((char*)&NX, sizeof(NX));

        //

        // NP MUST be positive!

        //

        if (NP <= 0) {

            amrex::Abort("ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitFromBinaryFile(): NP <= 0");

        }

        //

        // DM must equal AMREX_SPACEDIM.

        //

        if (DM != AMREX_SPACEDIM) {

            amrex::Abort("ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitFromBinaryFile(): DM != AMREX_SPACEDIM");

        }

        //

        // NX MUST be in [0,N].

        //

        if (NX < 0 || NX > NStructReal) {

            amrex::Abort("ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitFromBinaryFile(): NX < 0 || NX > N");

        }

        //

        // Can't ask for more data than exists in the file!

        //

        if (extradata > NX) {

            amrex::Abort("ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitFromBinaryFile(): extradata > NX");

        }

        //

        // Figure out whether we're dealing with floats or doubles.

        //

        // First get our current position.

        //

        const std::streamoff CURPOS = ifs.tellg();

        //

        // Seek to end of file.

        //

        ifs.seekg(0,std::ios::end);

        //

        // ENDPOS - CURPOS should bracket the particle data.

        //

        const std::streamoff ENDPOS = ifs.tellg();


        RealSizeInFile = int((ENDPOS - CURPOS) / (NP*(DM+NX)));


        AMREX_ASSERT(RealSizeInFile == sizeof(float) || RealSizeInFile == sizeof(double));

        //

        // Now set stream back to earlier position.

        //

        ifs.seekg(CURPOS, std::ios::beg);

        //

        // Skip to our place in the file.

        //

        int id = 0;

        for ( ; id < NReaders; id++) {

            if (rprocs[id] == MyProc) {

                break;

            }

        }


        AMREX_ASSERT(id >= 0 && id < NReaders);


        const std::streamoff NSKIP = id * (NP/NReaders) * (DM+NX) * RealSizeInFile;


        if (NSKIP > 0)

        {

            ifs.seekg(NSKIP, std::ios::cur);

        }


        if (!ifs.good())

        {

            std::string msg("ParticleContainer::InitFromBinaryFile(");

            msg += file;

            msg += ") failed @ 1";

            amrex::Error(msg.c_str());

        }

    }

    //

    // Everyone needs to know NP -- the number of particles in the file.

    //

    ParallelDescriptor::ReduceLongMax(NP);

    //

    // How many particles each reader gets to read.

    //

    Long MyCnt = NP / NReaders;


    if (MyProc == rprocs[NReaders-1]) {

        //

        // Give any remainder to the last reader.

        //

        MyCnt += NP % NReaders;

    }


    Long how_many_redists = NP / (NPartPerRedist*NReaders), how_many_read = 0;


    if (NP % (NPartPerRedist*NReaders)) { how_many_redists++; }


    Vector<float>  fxtra, fignore;

    Vector<double> dxtra, dignore;


    if (extradata > 0)

    {

        fxtra.resize(extradata);

        dxtra.resize(extradata);

    }


    if ((NX-extradata) > 0)

    {

        fignore.resize(NX-extradata);

        dignore.resize(NX-extradata);

    }


    ParticleLocData pld;


    for (int j = 0; j < how_many_redists; j++)

    {


        Vector<std::map<std::pair<int, int>, Gpu::HostVector<ParticleType> > > host_particles;

        host_particles.reserve(15);

        host_particles.resize(finestLevel()+1);


        if (readers.contains(MyProc))

        {

            ParticleType p;


            AMREX_ASSERT(MyCnt > how_many_read);


            const Long NRead = std::min((MyCnt-how_many_read), NPartPerRedist);


            for (Long i = 0; i < NRead; i++)

            {

                //

                // We don't read in idata.id or idata.cpu.  We'll set those later

                // in a manner to guarantee the global uniqueness of the pair.

                //

                if (RealSizeInFile == sizeof(float))

                {

                    float fpos[AMREX_SPACEDIM];


                    ifs.read((char*)&fpos[0], AMREX_SPACEDIM*sizeof(float));


                    AMREX_D_TERM(p.pos(0) = fpos[0];,

                                 p.pos(1) = fpos[1];,

                                 p.pos(2) = fpos[2];);


                }

                else if (RealSizeInFile == sizeof(double))

                {

                    double dpos[AMREX_SPACEDIM];


                    ifs.read((char*)&dpos[0], AMREX_SPACEDIM*sizeof(double));


                    AMREX_D_TERM(p.pos(0) = static_cast<ParticleReal>(dpos[0]);,

                                 p.pos(1) = static_cast<ParticleReal>(dpos[1]);,

                                 p.pos(2) = static_cast<ParticleReal>(dpos[2]););

                }


                //

                // Read in any "extradata".

                //

                if (extradata > 0)

                {

                    if (RealSizeInFile == sizeof(float))

                    {

                        ifs.read((char*)fxtra.data(), std::streamsize(extradata*sizeof(float)));


                        for (int ii = 0; ii < extradata; ii++) {

                            p.rdata(ii) = static_cast<ParticleReal>(fxtra[ii]);

                        }

                    }

                    else if (RealSizeInFile == sizeof(double))

                    {

                        ifs.read((char*)dxtra.data(), std::streamsize(extradata*sizeof(double)));


                        for (int ii = 0; ii < extradata; ii++) {

                            p.rdata(ii) = static_cast<ParticleReal>(dxtra[ii]);

                        }

                    }

                }

                //

                // Read any remaining data for this particle.

                //

                if ((NX-extradata) > 0)

                {

                    if (RealSizeInFile == sizeof(float))

                    {

                        ifs.read((char*)fignore.data(), std::streamsize((NX-extradata)*sizeof(float)));

                    }

                    else if (RealSizeInFile == sizeof(double))

                    {

                        ifs.read((char*)dignore.data(), std::streamsize((NX-extradata)*sizeof(double)));

                    }

                }


                if (!ifs.good())

                {

                    std::string msg("ParticleContainer::InitFromBinaryFile(");

                    msg += file;

                    msg += ") failed @ 2";

                    amrex::Error(msg.c_str());

                }


                if (!Where(p, pld))

                {

                    PeriodicShift(p);


                    if (!Where(p, pld))

                    {

                        if (m_verbose) {

                            amrex::AllPrint() << "BAD PARTICLE ID WOULD BE " << ParticleType::NextID() << '\n'

                                              << "BAD PARTICLE POS "

                                              << AMREX_D_TERM(   p.pos(0),

                                                              << p.pos(1),

                                                              << p.pos(2))

                                              << "\n";

                        }

                        amrex::Abort("ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitFromBinaryFile(): invalid particle");

                    }

                }


                p.id()  = ParticleType::NextID();

                p.cpu() = MyProc;


                host_particles[pld.m_lev][std::make_pair(pld.m_grid, pld.m_tile)].push_back(p);

            }


            how_many_read += NRead;

        }


        for (int host_lev = 0; host_lev < std::ssize(host_particles); ++host_lev)

        {

            for (auto& kv : host_particles[host_lev]) {

                auto grid = kv.first.first;

                auto tile = kv.first.second;

                const auto& src_tile = kv.second;


                auto& dst_tile = GetParticles(host_lev)[std::make_pair(grid,tile)];

                auto old_size = dst_tile.GetArrayOfStructs().size();

                auto new_size = old_size + src_tile.size();

                dst_tile.resize(new_size);


                Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),

                               dst_tile.GetArrayOfStructs().begin() + old_size);

            }

        }

        Gpu::streamSynchronize();


        Redistribute();


        //

        // Move particles in m_particles into tmp_particles so that

        // we don't keep trying to redistribute particles that have

        // already been redistributed correctly.

        //

        for (int lev = 0; lev < std::ssize(m_particles); lev++)

        {

            auto& pmap     = m_particles[lev];

            auto& tmp_pmap = tmp_particles[lev];


            for (auto& kv : pmap) {

                auto& aos = kv.second.GetArrayOfStructs()();

                auto& tmp_aos = tmp_pmap[kv.first].GetArrayOfStructs()();


                tmp_aos.insert(tmp_aos.end(), aos.begin(), aos.end());

                ParticleVector().swap(aos);

            }


            ParticleLevel().swap(pmap);

        }

    }

    //

    // Make tmp_particles the new m_particles.

    //

    tmp_particles.swap(m_particles);

    //

    // Add up all the particles read in to get the total number of particles.

    //

    if (m_verbose > 0)

    {

        Long num_particles_read = how_many_read;


        ParallelDescriptor::ReduceLongSum(num_particles_read, ParallelDescriptor::IOProcessorNumber());


        amrex::Print() << "\nTotal number of particles: " << num_particles_read << '\n';

    }


    AMREX_ASSERT(OK());


    if (m_verbose > 1)

    {

        ByteSpread();


        auto runtime = amrex::second() - strttime;


        ParallelDescriptor::ReduceRealMax(runtime, ParallelDescriptor::IOProcessorNumber());


        amrex::Print() << "InitFromBinaryFile() time: " << runtime << '\n';

    }


    Gpu::streamSynchronize();

}


//

// This function expects to read a file containing the pathnames of

// binary particles files needing to be read in for input.  It expects

// one file name per line.

//


template <typename ParticleType, int NArrayReal, int NArrayInt,

          template<class> class Allocator, class CellAssignor>

void


ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::

InitFromBinaryMetaFile (const std::string& metafile,

                        int                extradata)

{

    BL_PROFILE("ParticleContainer<NSR, NSI, NAR, NAI>::InitFromBinaryMetaFile()");

    const auto strttime = amrex::second();


    std::ifstream ifs(metafile.c_str(), std::ios::in);


    std::string file;


    for (;;)

    {

        std::getline(ifs,file);


        if (!ifs.good()) { break; }


        if (m_verbose > 1) {

            amrex::Print() << "InitFromBinaryMetaFile: processing file: " << file << '\n';

        }


        InitFromBinaryFile(file, extradata);

    }


    if (m_verbose > 1)

    {

        ByteSpread();


        auto runtime = amrex::second() - strttime;


        ParallelDescriptor::ReduceRealMax(runtime, ParallelDescriptor::IOProcessorNumber());


        amrex::Print() << "InitFromBinaryMetaFile() time: " << runtime << '\n';

    }

}


template <typename ParticleType, int NArrayReal, int NArrayInt,

          template<class> class Allocator, class CellAssignor>

void


ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::

InitRandom (Long                    icount,

            ULong                   iseed,

            const ParticleInitData& pdata,

            bool                    serialize,

            RealBox                 containing_bx)

{

    BL_PROFILE("ParticleContainer<NSR, NSI, NAR, NAI>::InitRandom()");

    AMREX_ASSERT(iseed  > 0);

    AMREX_ASSERT(icount > 0);


    AMREX_ASSERT(m_gdb != nullptr);


    const int       MyProc   = ParallelDescriptor::MyProc();

    const int       NProcs   = ParallelDescriptor::NProcs();

    const int       IOProc   = ParallelDescriptor::IOProcessorNumber();

    const auto      strttime = amrex::second();

    const Geometry& geom     = Geom(0);


    Real r, x, len[AMREX_SPACEDIM] = { AMREX_D_DECL(geom.ProbLength(0),

                                           geom.ProbLength(1),

                                           geom.ProbLength(2)) };


    // We will enforce that the particles are within the containing_bx.

    // If containing_bx is not passed in, it defaults to the full domain.

    if (!containing_bx.ok()) { containing_bx = geom.ProbDomain(); }


    // containing_bx is assumed to lie within the domain.

    if (!geom.ProbDomain().contains(containing_bx))

    {

        containing_bx.setLo(geom.ProbLo());

        containing_bx.setHi(geom.ProbHi());

    }


    const Real* xlo = containing_bx.lo();

    const Real* xhi = containing_bx.hi();


    amrex::InitRandom(iseed+MyProc);


    if (serialize)

    {

        if(icount*AMREX_SPACEDIM >= std::numeric_limits<int>::max())

        {

            amrex::Abort(

                "InitRandom has serialize=true, but this would cause too much "

                "particle data to be sent from IOProc. Set serialize=false, "

                "or use fewer than " +

                std::to_string(

                    amrex::Math::ceil(

                        amrex::Real(std::numeric_limits<int>::max()) /

                        amrex::Real(AMREX_SPACEDIM)

                    )

                ) + " particles."

            );

        }

        //

        // We'll let IOProc generate the particles so we get the same

        // positions no matter how many CPUs we have.  This is here

        // mainly for debugging purposes.  It's not really useful for

        // very large numbers of particles.

        //

        //

        Vector<typename ParticleType::RealType> pos(icount*AMREX_SPACEDIM);


        if (ParallelDescriptor::IOProcessor())

        {

            for (Long j = 0; j < icount; j++)

            {

                for (int i = 0; i < AMREX_SPACEDIM; i++)

                {

                    do

                    {

                        r = amrex::Random();

                        x = geom.ProbLo(i) + (r * len[i]);

                    }

                    while (static_cast<ParticleReal>(x) < static_cast<ParticleReal>(xlo[i]) || static_cast<ParticleReal>(x) >= static_cast<ParticleReal>(xhi[i]));


                    pos[j*AMREX_SPACEDIM + i] = static_cast<ParticleReal>(x);

                }

            }

        }


        ParallelDescriptor::Bcast(pos.dataPtr(), icount*AMREX_SPACEDIM, IOProc);


        ParticleLocData pld;


        Vector<std::map<std::pair<int, int>, Gpu::HostVector<ParticleType> > > host_particles;

        host_particles.reserve(15);

        host_particles.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, std::array<Gpu::HostVector<ParticleReal>, NArrayReal > > > host_real_attribs;

        host_real_attribs.reserve(15);

        host_real_attribs.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, std::array<Gpu::HostVector<int>, NArrayInt > > > host_int_attribs;

        host_int_attribs.reserve(15);

        host_int_attribs.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;

        host_idcpu.reserve(15);

        host_idcpu.resize(finestLevel()+1);


        for (Long j = 0; j < icount; j++)

        {

            Particle<0, 0> ptest;


            for (int i = 0; i < AMREX_SPACEDIM; i++) {

                ptest.pos(i) = pos[j*AMREX_SPACEDIM + i];

            }


            if (!Where(ptest, pld)) {

                amrex::Abort("ParticleContainer::InitRandom(): invalid particle");

            }


            AMREX_ASSERT(pld.m_lev >= 0 && pld.m_lev <= finestLevel());

            std::pair<int, int> ind(pld.m_grid, pld.m_tile);


            const int who = ParticleDistributionMap(pld.m_lev)[pld.m_grid];


            if (who == MyProc) {


                if constexpr(!ParticleType::is_soa_particle)

                {

                    ParticleType p;

                    for (int i = 0; i < AMREX_SPACEDIM; i++) {

                        p.pos(i) = pos[j*AMREX_SPACEDIM + i];

                    }


                    // We own it. Add it at the appropriate level.

                    p.id()  = ParticleType::NextID();

                    p.cpu() = MyProc;


                    for (int i = 0; i < NStructInt; i++) {

                        p.idata(i) = pdata.int_struct_data[i];

                    }


                    for (int i = 0; i < NStructReal; i++) {

                        p.rdata(i) = static_cast<ParticleReal>(pdata.real_struct_data[i]);

                    }


                    // add the struct

                    host_particles[pld.m_lev][ind].push_back(p);


                    // add the real...

                    for (int i = 0; i < NArrayReal; i++) {

                        host_real_attribs[pld.m_lev][ind][i].push_back(static_cast<ParticleReal>(pdata.real_array_data[i]));

                    }


                    // ... and int array data

                    for (int i = 0; i < NArrayInt; i++) {

                        host_int_attribs[pld.m_lev][ind][i].push_back(pdata.int_array_data[i]);

                    }

                } else {

                    for (int i = 0; i < AMREX_SPACEDIM; i++) {

                        host_real_attribs[pld.m_lev][ind][i].push_back(pos[j*AMREX_SPACEDIM+i]);

                    }


                    host_idcpu[pld.m_lev][ind].push_back(0);

                    ParticleIDWrapper(host_idcpu[pld.m_lev][ind].back()) = ParticleType::NextID();

                    ParticleCPUWrapper(host_idcpu[pld.m_lev][ind].back()) = ParallelDescriptor::MyProc();


                    host_particles[pld.m_lev][ind];


                    // add the real...

                    for (int i = AMREX_SPACEDIM; i < NArrayReal; i++) {

                        host_real_attribs[pld.m_lev][ind][i].push_back(static_cast<ParticleReal>(pdata.real_array_data[i]));

                    }


                    // ... and int array data

                    for (int i = 2; i < NArrayInt; i++) {

                        host_int_attribs[pld.m_lev][ind][i].push_back(pdata.int_array_data[i]);

                    }

                }

            }

        }


        for (int host_lev = 0; host_lev < std::ssize(host_particles); ++host_lev)

        {

            for (auto& kv : host_particles[host_lev]) {

                auto grid = kv.first.first;

                auto tile = kv.first.second;

                const auto& src_tile = kv.second;


                auto& dst_tile = GetParticles(host_lev)[std::make_pair(grid,tile)];

                auto old_size = dst_tile.size();

                auto new_size = old_size;

                if constexpr(!ParticleType::is_soa_particle)

                {

                    new_size += src_tile.size();

                } else {

                    new_size += host_real_attribs[host_lev][std::make_pair(grid,tile)][0].size();

                }

                dst_tile.resize(new_size);


                if constexpr(!ParticleType::is_soa_particle)

                {

                    Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),

                                   dst_tile.GetArrayOfStructs().begin() + old_size);

                } else {

                    Gpu::copyAsync(Gpu::hostToDevice,

                                   host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),

                                   host_idcpu[host_lev][std::make_pair(grid,tile)].end(),

                                   dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);

                }


                for (int i = 0; i < NArrayReal; ++i) { // NOLINT(readability-misleading-indentation)

                    Gpu::copyAsync(Gpu::hostToDevice,

                                   host_real_attribs[host_lev][std::make_pair(grid,tile)][i].begin(),

                                   host_real_attribs[host_lev][std::make_pair(grid,tile)][i].end(),

                                   dst_tile.GetStructOfArrays().GetRealData(i).begin() + old_size);

                }


                for (int i = 0; i < NArrayInt; ++i) {

                    Gpu::copyAsync(Gpu::hostToDevice,

                                   host_int_attribs[host_lev][std::make_pair(grid,tile)][i].begin(),

                                   host_int_attribs[host_lev][std::make_pair(grid,tile)][i].end(),

                                   dst_tile.GetStructOfArrays().GetIntData(i).begin() + old_size);

                }

            }

        }

        Gpu::streamSynchronize();


        AMREX_ASSERT(OK());

    }

    else {

        // We'll generate the particles in parallel.

        // Each CPU will key off the given seed to get independent streams of random numbers.

        Long M = icount / NProcs;

        // Processor 0 will get the slop.

        if (MyProc == 0) {

            M += (icount % NProcs);

        }


        ParticleLocData pld;


        Vector<std::map<std::pair<int, int>, Gpu::HostVector<ParticleType> > > host_particles;

        host_particles.reserve(15);

        host_particles.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, std::array<Gpu::HostVector<ParticleReal>, NArrayReal > > > host_real_attribs;

        host_real_attribs.reserve(15);

        host_real_attribs.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, std::array<Gpu::HostVector<int>, NArrayInt > > > host_int_attribs;

        host_int_attribs.reserve(15);

        host_int_attribs.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;

        host_idcpu.reserve(15);

        host_idcpu.resize(finestLevel()+1);


        for (Long icnt = 0; icnt < M; icnt++) {

            Particle<0, 0> ptest;

            for (int i = 0; i < AMREX_SPACEDIM; i++) {

                do {

                    r = amrex::Random();

                    x = geom.ProbLo(i) + (r * len[i]);

                }

                while (static_cast<ParticleReal>(x) < static_cast<ParticleReal>(xlo[i]) || static_cast<ParticleReal>(x) >= static_cast<ParticleReal>(xhi[i]));


                ptest.pos(i) = static_cast<ParticleReal>(x);


                AMREX_ASSERT(ptest.pos(i) < geom.ProbHi(i));

            }


            ptest.id()  = ParticleType::NextID();

            ptest.cpu() = ParallelDescriptor::MyProc();


            // locate the particle

            if (!Where(ptest, pld))

            {

                amrex::Abort("ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitRandom(): invalid particle");

            }

            AMREX_ASSERT(pld.m_lev >= 0 && pld.m_lev <= finestLevel());

            std::pair<int, int> ind(pld.m_grid, pld.m_tile);


            if constexpr(!ParticleType::is_soa_particle)

            {

                ParticleType p;

                for (int i = 0; i < AMREX_SPACEDIM; i++) {

                    p.pos(i) = ptest.pos(i);;

                }


                p.id()  = ptest.id();

                p.cpu() = ptest.cpu();


                for (int i = 0; i < NStructReal; i++) {

                    p.rdata(i) = static_cast<ParticleReal>(pdata.real_struct_data[i]);

                }


                for (int i = 0; i < NStructInt; i++) {

                    p.idata(i) = pdata.int_struct_data[i];

                }


                // add the struct

                host_particles[pld.m_lev][ind].push_back(p);


                // add the real...

                for (int i = 0; i < NArrayReal; i++) {

                    host_real_attribs[pld.m_lev][ind][i].push_back(static_cast<ParticleReal>(pdata.real_array_data[i]));

                }


                // ... and int array data

                for (int i = 0; i < NArrayInt; i++) {

                    host_int_attribs[pld.m_lev][ind][i].push_back(pdata.int_array_data[i]);

                }

            } else {

                for (int i = 0; i < AMREX_SPACEDIM; i++) {

                    host_real_attribs[pld.m_lev][ind][i].push_back(ptest.pos(i));

                }


                host_idcpu[pld.m_lev][ind].push_back(0);

                ParticleIDWrapper(host_idcpu[pld.m_lev][ind].back()) = ParticleType::NextID();

                ParticleCPUWrapper(host_idcpu[pld.m_lev][ind].back()) = ParallelDescriptor::MyProc();


                host_particles[pld.m_lev][ind];


                // add the real...

                for (int i = AMREX_SPACEDIM; i < NArrayReal; i++) {

                    host_real_attribs[pld.m_lev][ind][i].push_back(static_cast<ParticleReal>(pdata.real_array_data[i]));

                }


                // ... and int array data

                for (int i = 2; i < NArrayInt; i++) {

                    host_int_attribs[pld.m_lev][ind][i].push_back(pdata.int_array_data[i]);

                }

            }

        }


        for (int host_lev = 0; host_lev < std::ssize(host_particles); ++host_lev)

        {

            for (auto& kv : host_particles[host_lev]) {

                auto grid = kv.first.first;

                auto tile = kv.first.second;

                const auto& src_tile = kv.second;


                auto& dst_tile = GetParticles(host_lev)[std::make_pair(grid,tile)];

                auto old_size = dst_tile.size();

                auto new_size = old_size;

                if constexpr(!ParticleType::is_soa_particle)

                {

                    new_size += src_tile.size();

                } else {

                    new_size += host_real_attribs[host_lev][std::make_pair(grid,tile)][0].size();

                }

                dst_tile.resize(new_size);


                if constexpr(!ParticleType::is_soa_particle)

                {

                    Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),

                                   dst_tile.GetArrayOfStructs().begin() + old_size);

                } else {

                    Gpu::copyAsync(Gpu::hostToDevice,

                                   host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),

                                   host_idcpu[host_lev][std::make_pair(grid,tile)].end(),

                                   dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);

                }


                for (int i = 0; i < NArrayReal; ++i) { // NOLINT(readability-misleading-indentation)

                    Gpu::copyAsync(Gpu::hostToDevice,

                                   host_real_attribs[host_lev][std::make_pair(grid,tile)][i].begin(),

                                   host_real_attribs[host_lev][std::make_pair(grid,tile)][i].end(),

                                   dst_tile.GetStructOfArrays().GetRealData(i).begin() + old_size);

                }


                for (int i = 0; i < NArrayInt; ++i) {

                    Gpu::copyAsync(Gpu::hostToDevice,

                                   host_int_attribs[host_lev][std::make_pair(grid,tile)][i].begin(),

                                   host_int_attribs[host_lev][std::make_pair(grid,tile)][i].end(),

                                   dst_tile.GetStructOfArrays().GetIntData(i).begin() + old_size);

                }

            }

        }

        Gpu::streamSynchronize();


        // Let Redistribute() sort out where the particles belong.

        Redistribute();

    }


    if (m_verbose > 1)

    {

        auto stoptime = amrex::second() - strttime;


        ParallelDescriptor::ReduceRealMax(stoptime,IOProc);


        amrex::Print() << "ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitRandom() time: " << stoptime << '\n';

    }


    Gpu::streamSynchronize();

}


template <typename ParticleType, int NArrayReal, int NArrayInt,

          template<class> class Allocator, class CellAssignor>

void

ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>


::InitRandomPerBox (Long                    icount_per_box,

                    ULong                   iseed,

                    const ParticleInitData& pdata)

{

    BL_PROFILE("ParticleContainer<NSR, NSI, NAR, NAI>::InitRandomPerBox()");

    AMREX_ASSERT(iseed  > 0);

    AMREX_ASSERT(icount_per_box > 0);


    AMREX_ASSERT(m_gdb != nullptr);


    const int       IOProc   = ParallelDescriptor::IOProcessorNumber();

    const auto      strttime = amrex::second();

    const Geometry& geom     = Geom(0);


    ParticleLocData pld;

    ParticleType p;


    // This assumes level 0 since geom = m_gdb->Geom(0)

    const Real* dx  = geom.CellSize();


    // We use exactly the same seed for every grid

    std::mt19937 mt(iseed);

    std::uniform_real_distribution<double> dist(0.0, 1.0);


    m_particles.resize(m_gdb->finestLevel()+1);


    for (int lev = 0; lev < std::ssize(m_particles); lev++)

    {

        AMREX_ASSERT(m_particles[lev].empty());

    }


    // We'll generate the particles in parallel -- but no tiling here.

    for (MFIter mfi(*m_dummy_mf[0], false); mfi.isValid(); ++mfi)

    {

        Box grid = m_gdb->ParticleBoxArray(0)[mfi.index()];

        RealBox grid_box = RealBox(grid,dx,geom.ProbLo());


        for (Long icnt = 0; icnt < icount_per_box; icnt++) {

        for (Long jcnt = 0; jcnt < icount_per_box; jcnt++) {

        for (Long kcnt = 0; kcnt < icount_per_box; kcnt++)

        {

            AMREX_D_TERM(

                p.pos(0) = static_cast<ParticleReal>(grid_box.lo(0) + (dist(mt) + double(icnt)) / double(icount_per_box) * grid_box.length(0));,

                p.pos(1) = static_cast<ParticleReal>(grid_box.lo(1) + (dist(mt) + double(jcnt)) / double(icount_per_box) * grid_box.length(1));,

                p.pos(2) = static_cast<ParticleReal>(grid_box.lo(2) + (dist(mt) + double(kcnt)) / double(icount_per_box) * grid_box.length(2));

            );


            for (int i = 0; i < AMREX_SPACEDIM; i++) {

                AMREX_ASSERT(p.pos(i) < grid_box.hi(i));

            }


            // the real struct data

            for (int i = 0; i < NStructReal; i++) {

                p.rdata(i) = static_cast<ParticleReal>(pdata.real_struct_data[i]);

            }


            // the int struct data

            p.id()  = ParticleType::NextID();

            p.cpu() = ParallelDescriptor::MyProc();


            for (int i = 0; i < NStructInt; i++) {

                p.idata(i) = pdata.int_struct_data[i];

            }


            // locate the particle

            if (!Where(p, pld)) {

                amrex::Abort("ParticleContainer::InitRandomPerBox(): invalid particle");

            }

            AMREX_ASSERT(pld.m_lev >= 0 && pld.m_lev <= finestLevel());

            std::pair<int, int> ind(pld.m_grid, pld.m_tile);


            // add the struct

            m_particles[pld.m_lev][ind].push_back(p);


            // add the real...

            for (int i = 0; i < NArrayReal; i++) {

                m_particles[pld.m_lev][ind].push_back_real(i, static_cast<ParticleReal>(pdata.real_array_data[i]));

            }


            // ... and int array data

            for (int i = 0; i < NArrayInt; i++) {

                m_particles[pld.m_lev][ind].push_back_int(i, pdata.int_array_data[i]);

            }


        } } }

    }


    if (m_verbose > 1)

    {

        auto stoptime = amrex::second() - strttime;


        ParallelDescriptor::ReduceRealMax(stoptime,IOProc);


        amrex::Print() << "ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitRandomPerBox() time: " << stoptime << '\n';

    }

}


template <typename ParticleType, int NArrayReal, int NArrayInt,

          template<class> class Allocator, class CellAssignor>

void


ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::

InitOnePerCell (Real x_off, Real y_off, Real z_off, const ParticleInitData& pdata)

{

    amrex::ignore_unused(y_off,z_off);


    BL_PROFILE("ParticleContainer<NSR, NSI, NAR, NAI>::InitOnePerCell()");


    AMREX_ASSERT(m_gdb != nullptr);


    // Note that x_off, y_off and z_off are the offsets from the lower left corner

    // in each cell as measured in units of dx, so they must be in [0,1]

    AMREX_ASSERT(x_off >= 0. && y_off >= 0. && z_off >= 0.);

    AMREX_ASSERT(x_off <= 1. && y_off <= 1. && z_off <= 1.);


    const int       IOProc   = ParallelDescriptor::IOProcessorNumber();

    const auto      strttime = amrex::second();

    const Geometry& geom     = Geom(0);  // generates particles on level 0;


    const Real* dx  = geom.CellSize();


    ParticleType p;


    // We'll generate the particles in parallel -- but no tiling of the grid here.

    for (MFIter mfi(*m_dummy_mf[0], false); mfi.isValid(); ++mfi) {

        Box grid = ParticleBoxArray(0)[mfi.index()];

        auto ind = std::make_pair(mfi.index(), mfi.LocalTileIndex());

        RealBox grid_box (grid,dx,geom.ProbLo());

        ParticleTile<ParticleType, NArrayReal, NArrayInt, amrex::PinnedArenaAllocator> ptile_tmp;

        for (IntVect beg = grid.smallEnd(), end=grid.bigEnd(), cell = grid.smallEnd(); cell <= end; grid.next(cell))

        {

            // the real struct data

            AMREX_D_TERM(p.pos(0) = static_cast<ParticleReal>(grid_box.lo(0) + (x_off + cell[0]-beg[0])*dx[0]);,

                         p.pos(1) = static_cast<ParticleReal>(grid_box.lo(1) + (y_off + cell[1]-beg[1])*dx[1]);,

                         p.pos(2) = static_cast<ParticleReal>(grid_box.lo(2) + (z_off + cell[2]-beg[2])*dx[2]););


            for (int d = 0; d < AMREX_SPACEDIM; ++d) {

                AMREX_ASSERT(p.pos(d) < grid_box.hi(d));

            }


            for (int i = 0; i < NStructReal; i++) {

                p.rdata(i) = static_cast<ParticleReal>(pdata.real_struct_data[i]);

            }


            // the int struct data

            p.id()  = ParticleType::NextID();

            p.cpu() = ParallelDescriptor::MyProc();


            for (int i = 0; i < NStructInt; i++) {

                p.idata(i) = pdata.int_struct_data[i];

            }


            // add the struct

            ptile_tmp.push_back(p);


            // add the real...

            for (int i = 0; i < NArrayReal; i++) {

                ptile_tmp.push_back_real(i, static_cast<ParticleReal>(pdata.real_array_data[i]));

            }


            // ... and int array data

            for (int i = 0; i < NArrayInt; i++) {

                ptile_tmp.push_back_int(i, pdata.int_array_data[i]);

            }

        }


        m_particles[0][ind].resize(ptile_tmp.numParticles());

        amrex::copyParticles(m_particles[0][ind], ptile_tmp);

        Gpu::Device::streamSynchronize();

    }


    Redistribute();


    if (m_verbose > 1) {

        auto stoptime = amrex::second() - strttime;


        ParallelDescriptor::ReduceRealMax(stoptime,IOProc);


        amrex::Print() << "ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitOnePerCell() time: " << stoptime << '\n';

    }

}


template <typename ParticleType, int NArrayReal, int NArrayInt,

          template<class> class Allocator, class CellAssignor>

void


ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::

InitNRandomPerCell (int n_per_cell, const ParticleInitData& pdata)

{

    BL_PROFILE("ParticleContainer<NSR, NSI, NAR, NAI>::InitNRandomPerCell()");


    AMREX_ASSERT(m_gdb != nullptr);


    const int       IOProc   = ParallelDescriptor::IOProcessorNumber();

    const auto      strttime = amrex::second();

    const Geometry& geom     = Geom(0);


    // This assumes level 0 since geom = Geom(0)

    const Real* dx  = geom.CellSize();


    resizeData();


    for (int lev = 0; lev < std::ssize(m_particles); lev++) {

        AMREX_ASSERT(m_particles[lev].empty());

    }


    ParticleLocData pld;

    ParticleType p;

    Real r;


    // We'll generate the particles in parallel -- but no tiling here.

    for (MFIter mfi(*m_dummy_mf[0], false); mfi.isValid(); ++mfi)

    {

        Box grid = ParticleBoxArray(0)[mfi.index()];

        RealBox grid_box (grid,dx,geom.ProbLo());


        Vector<std::map<std::pair<int, int>, Gpu::HostVector<ParticleType> > > host_particles;

        host_particles.reserve(15);

        host_particles.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, std::array<Gpu::HostVector<ParticleReal>, NArrayReal > > > host_real_attribs;

        host_real_attribs.reserve(15);

        host_real_attribs.resize(finestLevel()+1);


        Vector<std::map<std::pair<int, int>, std::array<Gpu::HostVector<int>, NArrayInt > > > host_int_attribs;

        host_int_attribs.reserve(15);

        host_int_attribs.resize(finestLevel()+1);


        for (IntVect beg = grid.smallEnd(), end=grid.bigEnd(),

                    cell = grid.smallEnd(); cell <= end; grid.next(cell)) {


            for (int n = 0; n < n_per_cell; n++)

            {

                // the real struct data

                for (int i = 0; i < AMREX_SPACEDIM; i++) {

                    constexpr int max_iter = 10;

                    int iter = 0;

                    while (iter < max_iter) {

                        r = amrex::Random();

                        p.pos(i) = static_cast<ParticleReal>(grid_box.lo(i) + (r + Real(cell[i]-beg[i]))*dx[i]);

                        if (p.pos(i) < grid_box.hi(i)) { break; }

                        iter++;

                    }

                    AMREX_ASSERT(p.pos(i) < grid_box.hi(i));

                }


                for (int i = 0; i < NStructReal; i++) {

                    p.rdata(i) = static_cast<ParticleReal>(pdata.real_struct_data[i]);

                }


                // the int struct data

                p.id()  = ParticleType::NextID();

                p.cpu() = ParallelDescriptor::MyProc();


                for (int i = 0; i < NStructInt; i++) {

                    p.idata(i) = pdata.int_struct_data[i];

                }


                // locate the particle

                if (!Where(p, pld)) {

                    amrex::Abort("ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitNRandomPerCell(): invalid particle");

                }

                AMREX_ASSERT(pld.m_lev >= 0 && pld.m_lev <= finestLevel());

                std::pair<int, int> ind(pld.m_grid, pld.m_tile);


                // add the struct

                host_particles[pld.m_lev][ind].push_back(p);


                // add the real...

                for (int i = 0; i < NArrayReal; i++) {

                    host_real_attribs[pld.m_lev][ind][i].push_back(pdata.real_array_data[i]);

                }


                // ... and int array data

                for (int i = 0; i < NArrayInt; i++) {

                    host_int_attribs[pld.m_lev][ind][i].push_back(pdata.int_array_data[i]);

                }

            }

        }


        for (int host_lev = 0; host_lev < std::ssize(host_particles); ++host_lev)

        {

            for (auto& kv : host_particles[host_lev]) {

                auto gid = kv.first.first;

                auto tid = kv.first.second;

                const auto& src_tid = kv.second;


                auto& dst_tile = GetParticles(host_lev)[std::make_pair(gid,tid)];

                auto old_size = dst_tile.GetArrayOfStructs().size();

                auto new_size = old_size + src_tid.size();

                dst_tile.resize(new_size);


                Gpu::copyAsync(Gpu::hostToDevice, src_tid.begin(), src_tid.end(),

                               dst_tile.GetArrayOfStructs().begin() + old_size);


                for (int i = 0; i < NArrayReal; ++i)

                {

                    Gpu::copyAsync(Gpu::hostToDevice,

                                   host_real_attribs[host_lev][std::make_pair(gid,tid)][i].begin(),

                                   host_real_attribs[host_lev][std::make_pair(gid,tid)][i].end(),

                                   dst_tile.GetStructOfArrays().GetRealData(i).begin() + old_size);

                }


                for (int i = 0; i < NArrayInt; ++i)

                {

                    Gpu::copyAsync(Gpu::hostToDevice,

                                   host_int_attribs[host_lev][std::make_pair(gid,tid)][i].begin(),

                                   host_int_attribs[host_lev][std::make_pair(gid,tid)][i].end(),

                                   dst_tile.GetStructOfArrays().GetIntData(i).begin() + old_size);

                }

            }

        }

        Gpu::streamSynchronize();

    }


    if (m_verbose > 1)

    {

        auto stoptime = amrex::second() - strttime;


        ParallelDescriptor::ReduceRealMax(stoptime,IOProc);


        amrex::Print() << "ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt>::InitNRandomPerCell() time: " << stoptime << '\n';

    }


    Gpu::streamSynchronize();

}


}


#endif /*AMREX_PARTICLEINIT_H*/


BL_PROFILE
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551

AMREX_ASSERT
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38

AMREX_D_TERM
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172

AMREX_D_DECL
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171

amrex::AllPrint
Print on all processors of the default communicator.
Definition AMReX_Print.H:113

amrex::BoxND< 3 >

amrex::BoxND::bigEnd
__host__ __device__ const IntVectND< dim > & bigEnd() const &noexcept
Return the inclusive upper bound of the box.
Definition AMReX_Box.H:123

amrex::BoxND::next
__host__ __device__ void next(IntVectND< dim > &) const noexcept
Step through the rectangle. It is a runtime error to give a point not inside rectangle....
Definition AMReX_Box.H:1121

amrex::BoxND::smallEnd
__host__ __device__ const IntVectND< dim > & smallEnd() const &noexcept
Return the inclusive lower bound of the box.
Definition AMReX_Box.H:111

amrex::CoordSys::CellSize
const Real * CellSize() const noexcept
Returns the cellsize for each coordinate direction.
Definition AMReX_CoordSys.H:71

amrex::Geometry
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:75

amrex::Geometry::ProbHi
const Real * ProbHi() const noexcept
Returns the hi end of the problem domain in each dimension.
Definition AMReX_Geometry.H:186

amrex::Geometry::ProbDomain
const RealBox & ProbDomain() const noexcept
Returns the problem domain.
Definition AMReX_Geometry.H:176

amrex::Geometry::ProbLength
Real ProbLength(int dir) const noexcept
Returns length of problem domain in specified dimension.
Definition AMReX_Geometry.H:214

amrex::Geometry::ProbLo
const Real * ProbLo() const noexcept
Returns the lo end of the problem domain in each dimension.
Definition AMReX_Geometry.H:184

amrex::Gpu::Device::streamSynchronize
static void streamSynchronize() noexcept
Definition AMReX_GpuDevice.cpp:856

amrex::IntVectND< 3 >

amrex::MFIter
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:88

amrex::MFIter::isValid
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:172

amrex::PODVector
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308

amrex::PODVector::pop_back
void pop_back() noexcept
Definition AMReX_PODVector.H:644

amrex::PODVector::back
T & back() noexcept
Definition AMReX_PODVector.H:662

amrex::PODVector::empty
bool empty() const noexcept
Definition AMReX_PODVector.H:652

amrex::PODVector::push_back
void push_back(const T &a_value)
Definition AMReX_PODVector.H:629

amrex::ParticleContainer_impl
A distributed container for Particles sorted onto the levels, grids, and tiles of a block-structured ...
Definition AMReX_ParticleContainer.H:149

amrex::ParticleContainer_impl::ParticleLevel
std::map< std::pair< int, int >, ParticleTileType > ParticleLevel
Definition AMReX_ParticleContainer.H:196

amrex::ParticleContainer_impl::InitNRandomPerCell
void InitNRandomPerCell(int n_per_cell, const ParticleInitData &pdata)
This initializes the particle container with n_per_cell randomly distributed particles per cell,...
Definition AMReX_ParticleInit.H:1553

amrex::ParticleContainer_impl::ParticleVector
typename AoS::ParticleVector ParticleVector
Definition AMReX_ParticleContainer.H:204

amrex::ParticleContainer_impl::InitOnePerCell
void InitOnePerCell(Real x_off, Real y_off, Real z_off, const ParticleInitData &pdata)
This initializes the particle container with one particle per cell, where the other particle data and...
Definition AMReX_ParticleInit.H:1469

amrex::ParticleContainer_impl::InitRandom
void InitRandom(Long icount, ULong iseed, const ParticleInitData &pdata, bool serialize=false, RealBox bx=RealBox())
This initializes the particle container with icount randomly distributed particles....
Definition AMReX_ParticleInit.H:974

amrex::ParticleContainer_impl::InitFromBinaryMetaFile
void InitFromBinaryMetaFile(const std::string &file, int extradata)
Definition AMReX_ParticleInit.H:935

amrex::ParticleContainer_impl::InitFromBinaryFile
void InitFromBinaryFile(const std::string &file, int extradata)
Definition AMReX_ParticleInit.H:489

amrex::ParticleContainer_impl::ParticleType
T_ParticleType ParticleType
Definition AMReX_ParticleContainer.H:151

amrex::Print
This class provides the user with a few print options.
Definition AMReX_Print.H:35

amrex::RealBox
A Box with real dimensions.
Definition AMReX_RealBox.H:28

amrex::RealBox::contains
__host__ __device__ bool contains(const Real *point, Real eps=0.0) const noexcept
Is the specified point contained in the RealBox?
Definition AMReX_RealBox.H:109

amrex::RealBox::setLo
void setLo(const Real *a_lo) noexcept
Sets lo side.
Definition AMReX_RealBox.H:71

amrex::RealBox::lo
__host__ __device__ const Real * lo() const &noexcept
Returns lo side.
Definition AMReX_RealBox.H:53

amrex::RealBox::hi
__host__ __device__ const Real * hi() const &noexcept
Returns hide side.
Definition AMReX_RealBox.H:58

amrex::RealBox::length
__host__ __device__ Real length(int dir) const noexcept
Returns length in specified direction.
Definition AMReX_RealBox.H:69

amrex::RealBox::setHi
void setHi(const Real *a_hi) noexcept
Sets hi side.
Definition AMReX_RealBox.H:79

amrex::RealBox::ok
__host__ __device__ bool ok() const noexcept
Is the RealBox OK; i.e. does it have non-negative volume?
Definition AMReX_RealBox.H:88

amrex::Vector
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28

amrex::Vector::dataPtr
T * dataPtr() noexcept
get access to the underlying data pointer
Definition AMReX_Vector.H:49

amrex::Vector::size
Long size() const noexcept
Definition AMReX_Vector.H:53

amrex::VisMFBuffer::IO_Buffer_Size
static constexpr int IO_Buffer_Size
We try to do I/O with buffers of this size.
Definition AMReX_VisMFBuffer.H:16

amrex::Real
amrex_real Real
Floating Point Type for Fields.
Definition AMReX_REAL.H:79

amrex::ULong
amrex_ulong ULong
Unsigned integer type guaranteed to be wider than unsigned int.
Definition AMReX_INT.H:32

amrex::ParticleReal
amrex_particle_real ParticleReal
Floating Point Type for Particles.
Definition AMReX_REAL.H:90

amrex::Long
amrex_long Long
Definition AMReX_INT.H:30

amrex::ParallelDescriptor::MyProc
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128

amrex::ParallelDescriptor::NProcs
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255

amrex::ParallelDescriptor::ReduceLongSum
void ReduceLongSum(Long &)
Definition AMReX_ParallelDescriptor.cpp:1236

amrex::ParallelDescriptor::IOProcessorNumber
int IOProcessorNumber() noexcept
The MPI rank number of the I/O Processor (probably rank 0). This rank is usually used to write to std...
Definition AMReX_ParallelDescriptor.H:279

amrex::ParallelDescriptor::ReduceLongMax
void ReduceLongMax(Long &)
Definition AMReX_ParallelDescriptor.cpp:1237

amrex::ParallelDescriptor::IOProcessor
bool IOProcessor() noexcept
Is this CPU the I/O Processor? To get the rank number, call IOProcessorNumber()
Definition AMReX_ParallelDescriptor.H:289

amrex::InitRandom
void InitRandom(ULong cpu_seed, int nprocs, ULong gpu_seed)
Set the seed of the random number generator.
Definition AMReX_Random.cpp:99

amrex::Random
Real Random()
Generate a psuedo-random real from uniform distribution.
Definition AMReX_Random.cpp:133

amrex::Gpu::copyAsync
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:228

amrex::Gpu::hostToDevice
static constexpr HostToDevice hostToDevice
Definition AMReX_GpuContainers.H:105

amrex::Gpu::streamSynchronize
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310

amrex::ParallelDescriptor::Bcast
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition AMReX_ParallelDescriptor.cpp:1295

amrex::ParallelDescriptor::ReduceRealMax
void ReduceRealMax(Vector< std::reference_wrapper< Real > > const &)
Definition AMReX_ParallelDescriptor.cpp:1228

amrex
Definition AMReX_Amr.cpp:50

amrex::ignore_unused
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139

amrex::FileOpenFailed
void FileOpenFailed(const std::string &file)
Output a message and abort when couldn't open the file.
Definition AMReX_Utility.cpp:137

amrex::copyParticles
void copyParticles(DstTile &dst, const SrcTile &src) noexcept
Copy particles from src to dst. This version copies all the particles, writing them to the beginning ...
Definition AMReX_ParticleTransformation.H:222

amrex::begin
__host__ __device__ Dim3 begin(BoxND< dim > const &box) noexcept
Definition AMReX_Box.H:2006

amrex::second
double second() noexcept
Definition AMReX_Utility.cpp:940

amrex::Direction::x
@ x

amrex::Error
void Error(const std::string &msg)
Print out message to cerr and exit via amrex::Abort().
Definition AMReX.cpp:235

amrex::Abort
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:241

amrex::int
const int[]
Definition AMReX_BLProfiler.cpp:1664

amrex::end
__host__ __device__ Dim3 end(BoxND< dim > const &box) noexcept
Definition AMReX_Box.H:2015

amrex::ParticleCPUWrapper
Definition AMReX_Particle.H:259

amrex::ParticleIDWrapper
Definition AMReX_Particle.H:154

amrex::ParticleInitType
A struct used to pass initial data into the various Init methods. This struct is used to pass initial...
Definition AMReX_ParticleContainer.H:119

amrex::ParticleInitType::int_struct_data
std::array< int, NStructInt > int_struct_data
Definition AMReX_ParticleContainer.H:121

amrex::ParticleInitType::int_array_data
std::array< int, NArrayInt > int_array_data
Definition AMReX_ParticleContainer.H:123

amrex::ParticleInitType::real_array_data
std::array< double, NArrayReal > real_array_data
Definition AMReX_ParticleContainer.H:122

amrex::ParticleInitType::real_struct_data
std::array< double, NStructReal > real_struct_data
Definition AMReX_ParticleContainer.H:120

amrex::ParticleLocData
A struct used for storing a particle's position in the AMR hierarchy.
Definition AMReX_ParticleContainer.H:93

amrex::ParticleLocData::m_grid
int m_grid
Definition AMReX_ParticleContainer.H:95

amrex::ParticleLocData::m_tile
int m_tile
Definition AMReX_ParticleContainer.H:96

amrex::ParticleLocData::m_lev
int m_lev
Definition AMReX_ParticleContainer.H:94

amrex::ParticleTile
Definition AMReX_ParticleTile.H:764

amrex::ParticleTile::push_back
void push_back(const ParticleType &p)
Definition AMReX_ParticleTile.H:1036

amrex::ParticleTile::numParticles
int numParticles() const
Returns the number of real particles (excluding neighbors)
Definition AMReX_ParticleTile.H:947

amrex::ParticleTile::push_back_int
void push_back_int(int comp, int v)
Definition AMReX_ParticleTile.H:1140

amrex::ParticleTile::push_back_real
void push_back_real(int comp, ParticleReal v)
Definition AMReX_ParticleTile.H:1085

amrex::Particle
The struct used to store particles.
Definition AMReX_Particle.H:405

amrex::Particle::pos
__host__ __device__ RealVect pos() const &
Definition AMReX_Particle.H:456

amrex::Particle::cpu
__host__ __device__ ParticleCPUWrapper cpu() &
Definition AMReX_Particle.H:424

amrex::Particle::id
__host__ __device__ ParticleIDWrapper id() &
Definition AMReX_Particle.H:427