#include <iostream>
#include <sstream>
#include <regex>
#include <time.h>
#include <stdio.h>
#include <stdexcept>
#include <memory>
#include <vector>
#include <future>
#include <thread>
#include <mutex>
#include "bmdbg.h"
using namespace std;
static
{
std::cerr
<< "BitMagic DNA Index Build Sample (c) 2018" << std::endl
<< "-fa file-name -- input FASTA file" << std::endl
<< "-j number -- number of parallel jobs to run" << std::endl
<< "-timing -- collect timings" << std::endl
;
}
static
{
for (int i = 1; i < argc; ++i)
{
std::string arg = argv[i];
if ((arg == "-h") || (arg == "--help"))
{
return 0;
}
if (arg == "-fa" || arg == "--fa")
{
if (i + 1 < argc)
{
}
else
{
std::cerr << "Error: -fa requires file name" << std::endl;
return 1;
}
continue;
}
if (arg == "-j" || arg == "--j")
{
if (i + 1 < argc)
{
}
else
{
std::cerr << "Error: -j requires number of jobs" << std::endl;
return 1;
}
continue;
}
if (arg == "-timing" || arg == "--timing" || arg == "-t" || arg == "--t")
}
return 0;
}
static
int load_FASTA(
const std::string& fname, std::vector<char>& seq_vect)
{
seq_vect.resize(0);
std::ifstream fin(fname.c_str(), std::ios::in);
if (!fin.good())
return -1;
std::string line;
for (unsigned i = 0; std::getline(fin, line); ++i)
{
if (line.empty() ||
line.front() == '>')
continue;
for (std::string::iterator it = line.begin(); it != line.end(); ++it)
seq_vect.push_back(*it);
}
return 0;
}
{
public:
void Build(
const vector<char>& sequence)
{
bm::bvector<>::insert_iterator iA = m_FPrintBV[
eA].inserter();
bm::bvector<>::insert_iterator iC = m_FPrintBV[
eC].inserter();
bm::bvector<>::insert_iterator iG = m_FPrintBV[
eG].inserter();
bm::bvector<>::insert_iterator iT = m_FPrintBV[
eT].inserter();
bm::bvector<>::insert_iterator iN = m_FPrintBV[
eN].inserter();
for (size_t i = 0; i < sequence.size(); ++i)
{
unsigned pos = unsigned(i);
switch (sequence[i])
{
case 'A':
iA = pos;
break;
case 'C':
iC = pos;
break;
case 'G':
iG = pos;
break;
case 'T':
iT = pos;
break;
case 'N':
iN = pos;
break;
default:
break;
}
}
}
{
for (size_t i = 0; i < sequence.size(); ++i)
{
unsigned pos = unsigned(i);
switch (sequence[i])
{
case 'A':
iA = pos;
break;
case 'C':
iC = pos;
break;
case 'G':
iG = pos;
break;
case 'T':
iT = pos;
break;
case 'N':
iN = pos;
break;
default:
break;
}
}
}
void BuildParallel(
const vector<char>& sequence,
unsigned threads)
{
struct Func
{
DNA_FingerprintScanner* target_idx;
const std::vector<char>* src_sequence;
Func(DNA_FingerprintScanner* idx, const vector<char>& src)
: target_idx(idx), src_sequence(&src) {}
void operator() (size_t from, size_t to)
{
const vector<char>& sequence = *src_sequence;
bm::bvector<> bvA, bvT, bvG, bvC, bvN;
{
for (size_t i = from; i < sequence.size() && (i < to); ++i)
{
unsigned pos = unsigned(i);
switch (sequence[i])
{
case 'A':
iA = pos;
break;
case 'C':
iC = pos;
break;
case 'G':
iG = pos;
break;
case 'T':
iT = pos;
break;
case 'N':
iN = pos;
break;
default:
break;
}
}
iA.flush();
iC.flush();
iT.flush();
iG.flush();
iN.flush();
}
}
};
if (threads <= 1)
{
return;
}
std::vector<std::future<void> > futures;
futures.reserve(8);
unsigned range = unsigned(sequence.size() / threads);
for (unsigned k = 0; k < sequence.size(); k += range)
{
futures.emplace_back(std::async(std::launch::async,
Func(this, sequence), k, k + range));
}
for (auto& e : futures)
{
e.wait();
}
}
{
static std::mutex mtx_A;
static std::mutex mtx_T;
static std::mutex mtx_G;
static std::mutex mtx_C;
static std::mutex mtx_N;
switch (letter)
{
case 'A':
{
std::lock_guard<std::mutex> guard(mtx_A);
m_FPrintBV[
eA].merge(bv);
}
break;
case 'C':
{
std::lock_guard<std::mutex> guard(mtx_C);
m_FPrintBV[
eC].merge(bv);
}
break;
case 'G':
{
std::lock_guard<std::mutex> guard(mtx_G);
m_FPrintBV[
eG].merge(bv);
}
break;
case 'T':
{
std::lock_guard<std::mutex> guard(mtx_T);
m_FPrintBV[
eT].merge(bv);
}
break;
case 'N':
{
std::lock_guard<std::mutex> guard(mtx_N);
m_FPrintBV[
eN].merge(bv);
}
break;
default:
break;
}
}
const bm::bvector<>&
GetVector(
char letter)
const
{
switch (letter)
{
case 'A':
case 'C':
case 'G':
case 'T':
case 'N':
default:
break;
}
throw runtime_error("Error. Invalid letter!");
}
private:
bm::bvector<> m_FPrintBV[
eEnd];
};
static
{
std::vector<char> letters {'A', 'T', 'G', 'C'};
for (char base : letters)
{
if (cmp != 0)
{
throw runtime_error(string("Fingerprint mismatch for:") + string(1, base));
}
}
}
int main(
int argc,
char *argv[])
{
if (argc < 3)
{
return 1;
}
std::vector<char> seq_vect;
try
{
if (ret != 0)
return ret;
{
if (res != 0)
return res;
std::cout << "FASTA sequence size=" << seq_vect.size() << std::endl;
{
}
{
}
}
{
std::cout << std::endl << "Performance:" << std::endl;
}
}
catch (std::exception& ex)
{
std::cerr << "Error:" << ex.what() << std::endl;
return 1;
}
return 0;
}
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
Timing utilities for benchmarking (internal).
pre-processor un-defines to avoid global space pollution (internal)
Utility for keeping all DNA finger print vectors and search using various techniques.
void BuildParallel(const vector< char > &sequence, unsigned threads)
Build fingerprint bit-vectors using bulk insert iterator and parallel processing.
const bm::bvector & GetVector(char letter) const
Return fingerprint bit-vector.
void Build(const vector< char > &sequence)
Build fingerprint bit-vectors from the original sequence.
void BuildBulk(const vector< char > &sequence)
Build index using bulk insert iterator.
void MergeVector(char letter, bm::bvector<> &bv)
Thread sync bit-vector merge.
Bitvector Bit-vector container with runtime compression of bits.
int compare(const bvector< Alloc > &bvect) const BMNOEXCEPT
Lexicographical comparison with a bitvector.
Utility class to collect performance measurements and statistics.
std::map< std::string, statistics > duration_map_type
test name to duration map
static void print_duration_map(TOut &tout, const duration_map_type &dmap, format fmt=ct_time)
@ BM_SORTED
input set is sorted (ascending order)
bm::chrono_taker ::duration_map_type timing_map
static int parse_args(int argc, char *argv[])
static int load_FASTA(const std::string &fname, std::vector< char > &seq_vect)
static void fingerprint_compare(const DNA_FingerprintScanner &idx1, const DNA_FingerprintScanner &idx2)
Check correctness of indexes constructed using different methods.