11 #ifndef RD_FPBREADER_H_DEC2015
12 #define RD_FPBREADER_H_DEC2015
29 #include <boost/shared_ptr.hpp>
30 #include <boost/shared_array.hpp>
34 struct FPBReader_impl;
69 FPBReader(
const char *fname,
bool lazyRead =
false) {
70 _initFromFilename(fname, lazyRead);
73 FPBReader(
const std::string &fname,
bool lazyRead =
false) {
74 _initFromFilename(fname.c_str(), lazyRead);
88 FPBReader(std::istream *inStream,
bool takeOwnership =
true,
89 bool lazyRead =
false)
92 df_owner(takeOwnership),
94 df_lazyRead(lazyRead){};
97 if (df_owner)
delete dp_istrm;
119 if (!df_init)
return;
124 boost::shared_ptr<ExplicitBitVect>
getFP(
unsigned int idx)
const;
126 boost::shared_array<std::uint8_t>
getBytes(
unsigned int idx)
const;
129 std::string
getId(
unsigned int idx)
const;
131 std::pair<boost::shared_ptr<ExplicitBitVect>, std::string>
operator[](
132 unsigned int idx)
const {
133 return std::make_pair(
getFP(idx), getId(idx));
139 unsigned int minCount,
unsigned int maxCount);
148 double getTanimoto(
unsigned int idx,
const std::uint8_t *bv)
const;
151 boost::shared_array<std::uint8_t> bv)
const {
152 return getTanimoto(idx, bv.get());
170 const std::uint8_t *bv,
double threshold = 0.7,
171 bool usePopcountScreen =
true)
const;
174 boost::shared_array<std::uint8_t> bv,
double threshold = 0.7,
175 bool usePopcountScreen =
true)
const {
176 return getTanimotoNeighbors(bv.get(), threshold, usePopcountScreen);
181 bool usePopcountScreen =
true)
const;
193 double getTversky(
unsigned int idx,
const std::uint8_t *bv,
double ca,
196 double getTversky(
unsigned int idx, boost::shared_array<std::uint8_t> bv,
197 double ca,
double cb)
const {
198 return getTversky(idx, bv.get(), ca, cb);
219 const std::uint8_t *bv,
double ca,
double cb,
double threshold = 0.7,
220 bool usePopcountScreen =
true)
const;
223 boost::shared_array<std::uint8_t> bv,
double ca,
double cb,
224 double threshold = 0.7,
bool usePopcountScreen =
true)
const {
225 return getTverskyNeighbors(bv.get(), ca, cb, threshold, usePopcountScreen);
229 const ExplicitBitVect &ebv,
double ca,
double cb,
double threshold = 0.7,
230 bool usePopcountScreen =
true)
const;
237 const std::uint8_t *bv)
const;
240 boost::shared_array<std::uint8_t> bv)
const {
241 return getContainingNeighbors(bv.get());
248 std::istream *dp_istrm{
nullptr};
249 detail::FPBReader_impl *dp_impl{
nullptr};
250 bool df_owner{
false};
252 bool df_lazyRead{
false};
258 FPBReader(
const FPBReader &);
259 FPBReader &operator=(
const FPBReader &);
261 void _initFromFilename(
const char *fname,
bool lazyRead) {
262 std::istream *tmpStream =
static_cast<std::istream *
>(
263 new std::ifstream(fname, std::ios_base::binary));
264 if (!(*tmpStream) || (tmpStream->bad())) {
265 std::ostringstream errout;
266 errout <<
"Bad input file " << fname;
268 throw BadFileException(errout.str());
270 dp_istrm = tmpStream;
274 df_lazyRead = lazyRead;
a class for bit vectors that are densely occupied
class for reading and searching FPB files
double getTversky(unsigned int idx, const std::uint8_t *bv, double ca, double cb) const
std::vector< std::pair< double, unsigned int > > getTanimotoNeighbors(const ExplicitBitVect &ebv, double threshold=0.7, bool usePopcountScreen=true) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::pair< unsigned int, unsigned int > getFPIdsInCountRange(unsigned int minCount, unsigned int maxCount)
unsigned int length() const
returns the number of fingerprints
double getTanimoto(unsigned int idx, const std::uint8_t *bv) const
boost::shared_ptr< ExplicitBitVect > getFP(unsigned int idx) const
returns the requested fingerprint as an ExplicitBitVect
boost::shared_array< std::uint8_t > getBytes(unsigned int idx) const
returns the requested fingerprint as an array of bytes
double getTanimoto(unsigned int idx, boost::shared_array< std::uint8_t > bv) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
double getTversky(unsigned int idx, boost::shared_array< std::uint8_t > bv, double ca, double cb) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< std::pair< double, unsigned int > > getTverskyNeighbors(const ExplicitBitVect &ebv, double ca, double cb, double threshold=0.7, bool usePopcountScreen=true) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
double getTversky(unsigned int idx, const ExplicitBitVect &ebv, double ca, double cb) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
FPBReader(std::istream *inStream, bool takeOwnership=true, bool lazyRead=false)
ctor for reading from an open istream
std::vector< unsigned int > getContainingNeighbors(const ExplicitBitVect &ebv) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
FPBReader(const char *fname, bool lazyRead=false)
ctor for reading from a named file
FPBReader(const std::string &fname, bool lazyRead=false)
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< unsigned int > getContainingNeighbors(boost::shared_array< std::uint8_t > bv) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< std::pair< double, unsigned int > > getTverskyNeighbors(const std::uint8_t *bv, double ca, double cb, double threshold=0.7, bool usePopcountScreen=true) const
returns Tversky neighbors that are within a similarity threshold
unsigned int nBits() const
returns the number of bits in our fingerprints
std::vector< unsigned int > getContainingNeighbors(const std::uint8_t *bv) const
returns indices of all fingerprints that completely contain this one
double getTanimoto(unsigned int idx, const ExplicitBitVect &ebv) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::string getId(unsigned int idx) const
returns the id of the requested fingerprint
std::pair< boost::shared_ptr< ExplicitBitVect >, std::string > operator[](unsigned int idx) const
returns the fingerprint and id of the requested fingerprint
std::vector< std::pair< double, unsigned int > > getTanimotoNeighbors(boost::shared_array< std::uint8_t > bv, double threshold=0.7, bool usePopcountScreen=true) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
void init()
Read the data from the file and initialize internal data structures.
std::vector< std::pair< double, unsigned int > > getTanimotoNeighbors(const std::uint8_t *bv, double threshold=0.7, bool usePopcountScreen=true) const
returns tanimoto neighbors that are within a similarity threshold
std::vector< std::pair< double, unsigned int > > getTverskyNeighbors(boost::shared_array< std::uint8_t > bv, double ca, double cb, double threshold=0.7, bool usePopcountScreen=true) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
#define RDKIT_DATASTRUCTS_EXPORT
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getFP(const ROMol &mol, FPType fPType)