Open-source cheminformatics and machine learning.
No Matches
Go to the documentation of this file.
2// Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
10#include <RDGeneral/export.h>
11#ifndef __RD_VECTOR_H__
12#define __RD_VECTOR_H__
14#include <RDGeneral/Invariant.h>
15#include <RDGeneral/utils.h>
16#include <cmath>
17#include <iostream>
18#include <iomanip>
19#include <cstdlib>
20#include <cstring>
21#include <ctime>
22#include <boost/random.hpp>
23#include <boost/smart_ptr.hpp>
25namespace RDNumeric {
27//! A class to represent vectors of numbers.
28template <class TYPE>
29class Vector {
30 public:
31 typedef boost::shared_array<TYPE> DATA_SPTR;
33 //! Initialize with only a size.
34 explicit Vector(unsigned int N) {
35 d_size = N;
36 TYPE *data = new TYPE[N];
37 memset(static_cast<void *>(data), 0, d_size * sizeof(TYPE));
38 d_data.reset(data);
39 }
41 //! Initialize with a size and default value.
42 Vector(unsigned int N, TYPE val) { //: Vector(N) {
43 d_size = N;
44 TYPE *data = new TYPE[N];
46 unsigned int i;
47 for (i = 0; i < N; i++) {
48 data[i] = val;
49 }
50 d_data.reset(data);
51 }
53 //! Initialize from a smart pointer.
54 /*!
55 <b>NOTE:</b> the data is not copied in this case
56 */
57 Vector(unsigned int N, DATA_SPTR data) { // TYPE *data) {
58 d_size = N;
59 d_data = data;
60 }
62 //! copy constructor
63 /*! We make a copy of the other vector's data.
64 */
65 Vector(const Vector &other) {
66 d_size = other.size();
67 const TYPE *otherData = other.getData();
68 TYPE *data = new TYPE[d_size];
70 memcpy(static_cast<void *>(data), static_cast<const void *>(otherData),
71 d_size * sizeof(TYPE));
72 d_data.reset(data);
73 }
75 ~Vector() = default;
77 //! return the size (dimension) of the vector
78 unsigned int size() const { return d_size; }
80 //! returns the value at a particular index
81 inline TYPE getVal(unsigned int i) const {
82 PRECONDITION(i < d_size, "bad index");
83 return d_data[i];
84 }
86 //! sets the index at a particular value
87 inline void setVal(unsigned int i, TYPE val) {
88 PRECONDITION(i < d_size, "bad index");
89 d_data[i] = val;
90 }
92 inline TYPE operator[](unsigned int i) const {
93 PRECONDITION(i < d_size, "bad index");
94 return d_data[i];
95 }
97 inline TYPE &operator[](unsigned int i) {
98 PRECONDITION(i < d_size, "bad index");
99 return d_data[i];
100 }
102 //! returns a pointer to our data array
103 inline TYPE *getData() { return d_data.get(); }
105 //! returns a const pointer to our data array
106 inline const TYPE *getData() const {
107 // return dp_data;
108 return d_data.get();
109 }
111 //! Copy operator.
112 /*! We make a copy of the other Vector's data.
113 */
116 PRECONDITION(d_size == other.size(), "Size mismatch in vector copying");
117 const TYPE *otherData = other.getData();
118 memcpy(static_cast<void *>(d_data.get()),
119 static_cast<const void *>(otherData), d_size * sizeof(TYPE));
120 return *this;
121 }
123 //! elementwise addition, vectors must be the same size.
125 PRECONDITION(d_size == other.size(), "Size mismatch in vector addition");
126 const TYPE *otherData = other.getData();
127 TYPE *data = d_data.get();
128 unsigned int i;
129 for (i = 0; i < d_size; i++) {
130 data[i] += otherData[i];
131 }
132 return *this;
133 }
135 //! elementwise subtraction, vectors must be the same size.
137 PRECONDITION(d_size == other.size(), "Size mismatch in vector subtraction");
138 const TYPE *otherData = other.getData();
139 TYPE *data = d_data.get();
140 unsigned int i;
141 for (i = 0; i < d_size; i++) {
142 data[i] -= otherData[i];
143 }
144 return *this;
145 }
147 //! multiplication by a scalar
149 unsigned int i;
150 for (i = 0; i < d_size; i++) {
151 d_data[i] *= scale;
152 }
153 return *this;
154 }
156 //! division by a scalar
158 unsigned int i;
159 for (i = 0; i < d_size; i++) {
160 d_data[i] /= scale;
161 }
162 return *this;
163 }
165 //! L2 norm squared
166 inline TYPE normL2Sq() const {
167 TYPE res = (TYPE)0.0;
168 unsigned int i;
169 TYPE *data = d_data.get();
170 for (i = 0; i < d_size; i++) {
171 res += data[i] * data[i];
172 }
173 return res;
174 }
176 //! L2 norm
177 inline TYPE normL2() const { return sqrt(this->normL2Sq()); }
179 //! L1 norm
180 inline TYPE normL1() const {
181 TYPE res = (TYPE)0.0;
182 unsigned int i;
183 TYPE *data = d_data.get();
184 for (i = 0; i < d_size; i++) {
185 res += fabs(data[i]);
186 }
187 return res;
188 }
190 //! L-infinity norm
191 inline TYPE normLinfinity() const {
192 TYPE res = (TYPE)(-1.0);
193 unsigned int i;
194 TYPE *data = d_data.get();
195 for (i = 0; i < d_size; i++) {
196 if (fabs(data[i]) > res) {
197 res = fabs(data[i]);
198 }
199 }
200 return res;
201 }
203 //! \brief Gets the ID of the entry that has the largest absolute value
204 //! i.e. the entry being used for the L-infinity norm
205 inline unsigned int largestAbsValId() const {
206 TYPE res = (TYPE)(-1.0);
207 unsigned int i, id = d_size;
208 TYPE *data = d_data.get();
209 for (i = 0; i < d_size; i++) {
210 if (fabs(data[i]) > res) {
211 res = fabs(data[i]);
212 id = i;
213 }
214 }
215 return id;
216 }
218 //! \brief Gets the ID of the entry that has the largest value
219 inline unsigned int largestValId() const {
220 TYPE res = (TYPE)(-1.e8);
221 unsigned int i, id = d_size;
222 TYPE *data = d_data.get();
223 for (i = 0; i < d_size; i++) {
224 if (data[i] > res) {
225 res = data[i];
226 id = i;
227 }
228 }
229 return id;
230 }
232 //! \brief Gets the ID of the entry that has the smallest value
233 inline unsigned int smallestValId() const {
234 TYPE res = (TYPE)(1.e8);
235 unsigned int i, id = d_size;
236 TYPE *data = d_data.get();
237 for (i = 0; i < d_size; i++) {
238 if (data[i] < res) {
239 res = data[i];
240 id = i;
241 }
242 }
243 return id;
244 }
246 //! returns the dot product between two Vectors
247 inline TYPE dotProduct(const Vector<TYPE> other) const {
248 PRECONDITION(d_size == other.size(),
249 "Size mismatch in vector doct product");
250 const TYPE *oData = other.getData();
251 unsigned int i;
252 TYPE res = (TYPE)(0.0);
253 TYPE *data = d_data.get();
254 for (i = 0; i < d_size; i++) {
255 res += (data[i] * oData[i]);
256 }
257 return res;
258 }
260 //! Normalize the vector using the L2 norm
261 inline void normalize() {
262 TYPE val = this->normL2();
263 (*this) /= val;
264 }
266 //! Set to a random unit vector
267 inline void setToRandom(unsigned int seed = 0) {
268 // we want to get our own RNG here instead of using the global
269 // one. This is related to Issue285.
270 RDKit::rng_type generator(42u);
271 RDKit::uniform_double dist(0, 1.0);
272 RDKit::double_source_type randSource(generator, dist);
273 if (seed > 0) {
274 generator.seed(seed);
275 } else {
276 // we can't initialize using only clock(), because it's possible
277 // that we'll get here fast enough that clock() will return 0
278 // and generator.seed(0) is an error:
279 generator.seed(clock() + 1);
280 }
282 unsigned int i;
283 TYPE *data = d_data.get();
284 for (i = 0; i < d_size; i++) {
285 data[i] = randSource();
286 }
287 this->normalize();
288 }
290 private:
291 unsigned int d_size; //!< our length
292 DATA_SPTR d_data;
293 Vector<TYPE> &operator=(const Vector<TYPE> &other);
298//! returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
299template <typename T>
300double TanimotoSimilarity(const Vector<T> &v1, const Vector<T> &v2) {
301 double numer = v1.dotProduct(v2);
302 if (numer == 0.0) {
303 return 0.0;
304 }
305 double denom = v1.normL2Sq() + v2.normL2Sq() - numer;
306 if (denom == 0.0) {
307 return 0.0;
308 }
309 return numer / denom;
311} // end of namespace RDNumeric
313//! ostream operator for Vectors
314template <typename TYPE>
315std::ostream &operator<<(std::ostream &target,
316 const RDNumeric::Vector<TYPE> &vec) {
317 unsigned int siz = vec.size();
318 target << "Size: " << siz << " [";
319 unsigned int i;
320 for (i = 0; i < siz; i++) {
321 target << std::setw(7) << std::setprecision(3) << vec.getVal(i) << ", ";
322 }
323 target << "]\n";
324 return target;
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
std::ostream & operator<<(std::ostream &target, const RDNumeric::Vector< TYPE > &vec)
ostream operator for Vectors
Definition Vector.h:315
A class to represent vectors of numbers.
Definition Vector.h:29
TYPE normL2() const
L2 norm.
Definition Vector.h:177
Vector< TYPE > & operator*=(TYPE scale)
multiplication by a scalar
Definition Vector.h:148
Vector(unsigned int N)
Initialize with only a size.
Definition Vector.h:34
Vector(const Vector &other)
copy constructor
Definition Vector.h:65
Vector< TYPE > & assign(const Vector< TYPE > &other)
Copy operator.
Definition Vector.h:115
TYPE & operator[](unsigned int i)
Definition Vector.h:97
void normalize()
Normalize the vector using the L2 norm.
Definition Vector.h:261
void setVal(unsigned int i, TYPE val)
sets the index at a particular value
Definition Vector.h:87
Vector(unsigned int N, TYPE val)
Initialize with a size and default value.
Definition Vector.h:42
Vector< TYPE > & operator-=(const Vector< TYPE > &other)
elementwise subtraction, vectors must be the same size.
Definition Vector.h:136
Vector< TYPE > & operator+=(const Vector< TYPE > &other)
elementwise addition, vectors must be the same size.
Definition Vector.h:124
void setToRandom(unsigned int seed=0)
Set to a random unit vector.
Definition Vector.h:267
unsigned int size() const
return the size (dimension) of the vector
Definition Vector.h:78
TYPE normL1() const
L1 norm.
Definition Vector.h:180
Vector< TYPE > & operator/=(TYPE scale)
division by a scalar
Definition Vector.h:157
TYPE normLinfinity() const
L-infinity norm.
Definition Vector.h:191
boost::shared_array< TYPE > DATA_SPTR
Definition Vector.h:31
TYPE normL2Sq() const
L2 norm squared.
Definition Vector.h:166
TYPE dotProduct(const Vector< TYPE > other) const
returns the dot product between two Vectors
Definition Vector.h:247
unsigned int largestValId() const
Gets the ID of the entry that has the largest value.
Definition Vector.h:219
unsigned int largestAbsValId() const
Gets the ID of the entry that has the largest absolute value i.e. the entry being used for the L-infi...
Definition Vector.h:205
TYPE * getData()
returns a pointer to our data array
Definition Vector.h:103
unsigned int smallestValId() const
Gets the ID of the entry that has the smallest value.
Definition Vector.h:233
TYPE getVal(unsigned int i) const
returns the value at a particular index
Definition Vector.h:81
const TYPE * getData() const
returns a const pointer to our data array
Definition Vector.h:106
TYPE operator[](unsigned int i) const
Definition Vector.h:92
Vector(unsigned int N, DATA_SPTR data)
Initialize from a smart pointer.
Definition Vector.h:57
boost::minstd_rand rng_type
Definition utils.h:36
boost::variate_generator< rng_type &, uniform_double > double_source_type
Definition utils.h:40
boost::uniform_real uniform_double
Definition utils.h:38
double TanimotoSimilarity(const Vector< T > &v1, const Vector< T > &v2)
returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
Definition Vector.h:300
Vector< double > DoubleVector
Definition Vector.h:296