BitMagic-C++
xsample03.cpp File Reference

Example: SNP search in human genome. More...

#include <iostream>
#include <sstream>
#include <chrono>
#include <regex>
#include <time.h>
#include <stdio.h>
#include <vector>
#include <map>
#include <utility>
#include "bm.h"
#include "bmalgo.h"
#include "bmserial.h"
#include "bmrandom.h"
#include "bmsparsevec.h"
#include "bmsparsevec_compr.h"
#include "bmsparsevec_algo.h"
#include "bmsparsevec_serial.h"
#include "bmalgo_similarity.h"
#include "bmsparsevec_util.h"
#include "bmdbg.h"
#include "bmtimer.h"
#include "bmundef.h"
Include dependency graph for xsample03.cpp:

Go to the source code of this file.

Typedefs

typedef bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_u32
typedef bm::rsc_sparse_vector< unsigned, sparse_vector_u32rsc_sparse_vector_u32
typedef std::vector< std::pair< unsigned, unsigned > > vector_pairs

Functions

static void show_help ()
static int parse_args (int argc, char *argv[])
static int load_snp_report (const std::string &fname, sparse_vector_u32 &sv)
static void generate_random_subset (const sparse_vector_u32 &sv, std::vector< unsigned > &vect, unsigned count)
static void build_vector_pairs (const sparse_vector_u32 &sv, vector_pairs &vp)
static bool search_vector_pairs (const vector_pairs &vp, unsigned rs_id, unsigned &pos)
static void run_benchmark (const sparse_vector_u32 &sv, const rsc_sparse_vector_u32 &csv)
int main (int argc, char *argv[])

Variables

std::string sv_out_name
std::string rsc_out_name
std::string sv_in_name
std::string rsc_in_name
std::string isnp_name
bool is_diag = false
bool is_timing = false
bool is_bench = false
bm::chrono_taker ::duration_map_type timing_map

Detailed Description

Example: SNP search in human genome.

Brief description of used method:

  1. Parse SNP chromosome report and extract information about SNP number and location in the chromosome
  2. Use this information to build bit-transposed sparse_vector<> where vector position matches chromosome position and SNP ids (aka rsid) is kept as a bit-transposed matrix
  3. Build rank-select compressed sparse vector, dropping all NULL columns (this data format is pretty sparse, since number of SNPs is significantly less than number of chromosome bases (1:5 or less) Use memory report to understand memory footprint for each form of storage
  4. Run benchmarks searching for 500 randomly selected SNPs using

This example should be useful for construction of compressed columnar tables with parallel search capabilities.

Definition in file xsample03.cpp.

Typedef Documentation

◆ rsc_sparse_vector_u32

◆ sparse_vector_u32

Definition at line 204 of file xsample03.cpp.

◆ vector_pairs

typedef std::vector<std::pair<unsigned, unsigned> > vector_pairs
Examples
xsample03.cpp.

Definition at line 206 of file xsample03.cpp.

Function Documentation

◆ build_vector_pairs()

void build_vector_pairs ( const sparse_vector_u32 & sv,
vector_pairs & vp )
static

◆ generate_random_subset()

void generate_random_subset ( const sparse_vector_u32 & sv,
std::vector< unsigned > & vect,
unsigned count )
static

◆ load_snp_report()

int load_snp_report ( const std::string & fname,
sparse_vector_u32 & sv )
static

◆ main()

◆ parse_args()

int parse_args ( int argc,
char * argv[] )
static

◆ run_benchmark()

◆ search_vector_pairs()

bool search_vector_pairs ( const vector_pairs & vp,
unsigned rs_id,
unsigned & pos )
static
Examples
xsample03.cpp.

Definition at line 330 of file xsample03.cpp.

Referenced by run_benchmark().

◆ show_help()

void show_help ( )
static
Examples
inv_list.cpp, xsample03.cpp, xsample04.cpp, xsample04a.cpp, and xsample05.cpp.

Definition at line 81 of file xsample03.cpp.

Referenced by main(), and parse_args().

Variable Documentation

◆ is_bench

bool is_bench = false

◆ is_diag

◆ is_timing

◆ isnp_name

std::string isnp_name
Examples
xsample03.cpp.

Definition at line 104 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ rsc_in_name

std::string rsc_in_name
Examples
xsample03.cpp.

Definition at line 103 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ rsc_out_name

std::string rsc_out_name
Examples
xsample03.cpp.

Definition at line 101 of file xsample03.cpp.

Referenced by main(), and parse_args().

◆ sv_in_name

std::string sv_in_name
Examples
xsample03.cpp, and xsample05.cpp.

Definition at line 102 of file xsample03.cpp.

Referenced by main(), parse_args(), and parse_args().

◆ sv_out_name

std::string sv_out_name
Examples
xsample03.cpp, and xsample05.cpp.

Definition at line 100 of file xsample03.cpp.

Referenced by main(), parse_args(), and parse_args().

◆ timing_map

bm::chrono_taker ::duration_map_type timing_map

Definition at line 210 of file xsample03.cpp.