1 |
/********************************************************************** |
2 |
fingerprint.h - Base class for fingerprints and fast searching |
3 |
|
4 |
Copyright (C) 2005 by Chris Morley |
5 |
|
6 |
This file is part of the Open Babel project. |
7 |
For more information, see <http://openbabel.sourceforge.net/> |
8 |
|
9 |
This program is free software; you can redistribute it and/or modify |
10 |
it under the terms of the GNU General Public License as published by |
11 |
the Free Software Foundation version 2 of the License. |
12 |
|
13 |
This program is distributed in the hope that it will be useful, |
14 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 |
GNU General Public License for more details. |
17 |
***********************************************************************/ |
18 |
|
19 |
#ifndef OB_FINGERPRINT_H |
20 |
#define OB_FINGERPRINT_H |
21 |
|
22 |
#include <list> |
23 |
#include <map> |
24 |
#include <set> |
25 |
#include <vector> |
26 |
#include <string> |
27 |
|
28 |
namespace OpenBabel |
29 |
{ |
30 |
class OBBase; //Forward declaration; used only as pointer. |
31 |
|
32 |
/// \brief The base class for fingerprints |
33 |
class OBAPI OBFingerprint |
34 |
{ |
35 |
//see end of cpp file for detailed documentation |
36 |
public: |
37 |
/// Sets the nth bit |
38 |
void SetBit(std::vector<unsigned int>& vec, unsigned int n); |
39 |
|
40 |
/// Repeatedly ORs the top half with the bottom half until no smaller than nbits |
41 |
void Fold(std::vector<unsigned int>& vec, unsigned int nbits); |
42 |
|
43 |
/// Returns fingerprint in vector, which may be resized, folded to nbits (if nbits!=0) |
44 |
virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0; |
45 |
|
46 |
/// Required short description of the fingerprint type. |
47 |
virtual std::string Description()=0; |
48 |
|
49 |
/// Optional flags |
50 |
enum FptFlag{FPT_UNIQUEBITS=1}; |
51 |
virtual unsigned int Flags() { return 0;}; |
52 |
|
53 |
/// Obtain info on available fingerprints |
54 |
static bool GetNextFPrt(std::string& id, OBFingerprint*& pFPrt); |
55 |
|
56 |
/// Returns a pointer to a fingerprint (the default if ID is empty), or NULL if not available |
57 |
static OBFingerprint* FindFingerprint(std::string& ID); |
58 |
|
59 |
/// Returns the Tanimoto coefficient between two vectors (vector<unsigned int>& SeekPositions) |
60 |
static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2); |
61 |
|
62 |
/// Inline version of Tanimoto() taking a pointer for the second vector |
63 |
static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2) |
64 |
{ |
65 |
///If used for two vectors, vec1 and vec2, call as Tanimoto(vec1, &vec2[0]); |
66 |
int andbits=0, orbits=0; |
67 |
unsigned int i; |
68 |
for (i=0;i<vec1.size();++i) |
69 |
{ |
70 |
int andfp = vec1[i] & p2[i]; |
71 |
int orfp = vec1[i] | p2[i]; |
72 |
//Count bits |
73 |
for(;andfp;andfp=andfp<<1) |
74 |
if(andfp<0) ++andbits; |
75 |
for(;orfp;orfp=orfp<<1) |
76 |
if(orfp<0) ++orbits; |
77 |
} |
78 |
return((double)andbits/(double)orbits); |
79 |
}; |
80 |
|
81 |
static unsigned int Getbitsperint(){ return bitsperint; } |
82 |
|
83 |
private: |
84 |
///Function object to set bits |
85 |
struct bit_or |
86 |
{ |
87 |
unsigned int operator()(const unsigned int a, const unsigned int b) |
88 |
{ |
89 |
return a | b; |
90 |
} |
91 |
}; |
92 |
|
93 |
typedef std::map<std::string, OBFingerprint*> FPMapType; |
94 |
typedef FPMapType::iterator Fptpos; |
95 |
|
96 |
protected: |
97 |
///This static function returns a reference to the FPtsMap |
98 |
///which, because it is a static local variable is constructed only once. |
99 |
///This fiddle is to avoid the "static initialization order fiasco" |
100 |
///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/". |
101 |
static FPMapType& FPtsMap() |
102 |
{ |
103 |
static FPMapType* fptm = NULL; |
104 |
if (!fptm) |
105 |
fptm = new FPMapType; |
106 |
return *fptm; |
107 |
}; |
108 |
|
109 |
OBFingerprint(std::string ID, bool IsDefault=false) |
110 |
{ |
111 |
FPtsMap()[ID] = this; //registers the derived fingerprint class |
112 |
if(IsDefault || FPtsMap().empty()) |
113 |
_pDefault=this; |
114 |
}; |
115 |
|
116 |
private: |
117 |
static OBFingerprint* _pDefault; |
118 |
static const unsigned int bitsperint;// = 8 * sizeof(unsigned int); |
119 |
static int rubbish; |
120 |
}; |
121 |
|
122 |
|
123 |
|
124 |
|
125 |
//************************************************************* |
126 |
//Fast search routines |
127 |
///Header for fastsearch index file |
128 |
struct OBAPI FptIndexHeader |
129 |
{ |
130 |
unsigned int headerlength;///<offset to data: sizeof(FptIndexHeader) |
131 |
unsigned int nEntries; ///<number of fingerprints |
132 |
unsigned int words; ///<number 32bit words per fingerprint |
133 |
char fpid[16]; ///<ID of the fingerprint type |
134 |
char datafilename[256]; ///<the data that this is an index to |
135 |
}; |
136 |
/// Structure of fastsearch index files |
137 |
struct OBAPI FptIndex |
138 |
{ |
139 |
FptIndexHeader header; |
140 |
std::vector<unsigned int> fptdata; |
141 |
std::vector<unsigned int> seekdata; |
142 |
bool Read(std::istream* pIndexstream); |
143 |
///\brief Returns pointer to FP used or NULL and an error message |
144 |
OBFingerprint* CheckFP(); |
145 |
}; |
146 |
|
147 |
/// \brief Class to search fingerprint index files |
148 |
class OBAPI FastSearch |
149 |
{ |
150 |
//see end of cpp file for detailed documentation |
151 |
public: |
152 |
std::string ReadIndex(std::istream* pIndexstream); |
153 |
virtual ~FastSearch(){}; |
154 |
|
155 |
/// \brief Does substructure search and returns vector of the file positions of matches |
156 |
bool Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates); |
157 |
|
158 |
/// \brief Returns multimap containing objects whose Tanimoto coefficients with the target |
159 |
/// is greater than the value specified. |
160 |
bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, |
161 |
double MinTani); |
162 |
|
163 |
/// \brief Returns multimap containing the nCandidates objects with largest Tanimoto |
164 |
/// coefficients with the target. |
165 |
bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap, |
166 |
int nCandidates=0); |
167 |
|
168 |
/// \brief Returns a pointer to the fingerprint type used to constuct the index |
169 |
OBFingerprint* GetFingerprint() const{ return _pFP;}; |
170 |
|
171 |
private: |
172 |
FptIndex _index; |
173 |
OBFingerprint* _pFP; |
174 |
}; |
175 |
|
176 |
//********************************************** |
177 |
/// \brief Class to prepare fingerprint index files See FastSearch class for details |
178 |
class OBAPI FastSearchIndexer |
179 |
{ |
180 |
//see end of cpp file for detailed documentation |
181 |
public: |
182 |
///\brief Constructor with a new index |
183 |
FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid, |
184 |
int FptBits=0); |
185 |
|
186 |
///\brief Constructor using existing index |
187 |
FastSearchIndexer(FptIndex* pindex, std::ostream* os); |
188 |
|
189 |
~FastSearchIndexer(); |
190 |
|
191 |
///\brief Called for each object |
192 |
bool Add(OBBase* pOb, std::streampos seekpos); |
193 |
|
194 |
private: |
195 |
std::ostream* _indexstream; |
196 |
FptIndex* _pindex; |
197 |
OBFingerprint* _pFP; |
198 |
int _nbits; |
199 |
}; |
200 |
|
201 |
} //namespace OpenBabel |
202 |
#endif |
203 |
|
204 |
//! \file fingerprint.h |
205 |
//! \brief Declaration of OBFingerprint base class and fastsearch classes |