ThunderSVM
ThunderSVM: An Open-Source SVM Library on GPUs and CPUs
dataset.h
1 //
2 // Created by jiashuai on 17-9-17.
3 //
4 
5 #ifndef THUNDERSVM_DATASET_H
6 #define THUNDERSVM_DATASET_H
7 
8 #include "thundersvm.h"
9 #include "syncarray.h"
10 
14 class DataSet {
15 public:
16  struct node{
17  node(int index, float_type value) : index(index), value(value) {}
18 
19  int index;
20  float_type value;
21  };
22 
23  typedef vector<vector<DataSet::node>> node2d;
24 
25  DataSet();
26 
33  DataSet(const DataSet::node2d &instances, int n_features, const vector<float_type> &y);
34 
36  void load_from_file(string file_name);
37 
39  void load_from_python(float *y, char **x, int len);
40 
42  void group_classes(bool classification = true);
43 
44  size_t n_instances() const;
45 
46  size_t n_features() const;
47 
48  size_t n_classes() const;
49 
51  const vector<int> &count() const;
52 
54  const vector<int> &start() const;
55 
57  const vector<int> &label() const;
58 
60  const vector<float_type> &y() const;
61 
62  const node2d & instances() const;
63 
65  const node2d instances(int y_i) const;
66 
68  const node2d instances(int y_i, int y_j) const;
69 
71  const vector<int> original_index() const;
72 
73  const vector<int> original_index(int y_i) const;
74 
75  const vector<int> original_index(int y_i, int y_j) const;
76 
77 private:
78  vector<float_type> y_;
79  node2d instances_;
80  size_t total_count_;
81  size_t n_features_;
82  vector<int> start_; //logical start position of each class
83  vector<int> count_; //the number of instances of each class
84  vector<int> label_;
85  vector<int> perm_;
86 };
87 #endif //THUNDERSVM_DATASET_H
void load_from_file(string file_name)
load dataset from file
Definition: dataset.cpp:56
const vector< int > & label() const
mapping logical label (0,1,2,3,...) to real label (maybe 2,4,5,6,...)
Definition: dataset.cpp:254
const vector< int > original_index() const
mapping instance index (after grouped) to the original index (in file)
Definition: dataset.cpp:335
Dataset reader.
Definition: dataset.h:14
const vector< int > & start() const
the start position of instances for each class
Definition: dataset.cpp:246
const vector< float_type > & y() const
label for each instances, the instances are arranged as they are in file
Definition: dataset.cpp:353
void group_classes(bool classification=true)
group instances in same class
Definition: dataset.cpp:258
const vector< int > & count() const
the number of instances for each class
Definition: dataset.cpp:242
void load_from_python(float *y, char **x, int len)
load dataset from python
Definition: dataset.cpp:221
Definition: dataset.h:16