43 #include "EST_String.h"
45 #include "EST_TVector.h"
46 #include "EST_TList.h"
47 #include "EST_simplestats.h"
48 #include "EST_Track.h"
50 #define wagon_error(WMESS) (cerr << WMESS << endl,exit(-1))
54 #define WGN_HUGE_VAL 1.0e20
60 int get_int_val(
int n)
const {
return (
int)
a_no_check(n); }
61 float get_flt_val(
int n)
const {
return a_no_check(n); }
62 void set_int_val(
int n,
int i) {
a_check(n) = (int)i; }
63 void set_flt_val(
int n,
float f) {
a_check(n) = f; }
71 wndt_binary, wndt_float, wndt_class,
73 wndt_cluster, wndt_vector, wndt_matrix, wndt_trajectory,
85 void load_description(
const EST_String& descfname,LISP ignores);
86 void ignore_non_numbers();
88 int ftype(
const int &i)
const {
return p_type(i);}
89 int ignore(
int i)
const {
return p_ignore(i); }
90 void set_ignore(
int i,
int value) { p_ignore[i] = value; }
91 const EST_String &feat_name(
const int &i)
const {
return p_name(i);}
92 int samples(
void)
const {
return length();}
93 int width(
void)
const {
return dlength;}
95 enum wn_oper {wnop_equal, wnop_binary, wnop_greaterthan,
96 wnop_lessthan, wnop_is, wnop_in, wnop_matches};
110 { feature_pos=s.feature_pos;
111 op=s.op; yes=s.yes; no=s.no; operand1=s.operand1;
112 operandl = s.operandl; score=s.score;}
115 { feature_pos=fp; op=o; operand1=a; }
116 void set_fp(
const int &fp) {feature_pos=fp;}
117 void set_oper(
const wn_oper &o) {op=o;}
118 void set_operand1(
const EST_Val &a) {operand1 = a;}
119 void set_yes(
const int &y) {yes=y;}
120 void set_no(
const int &n) {no=n;}
121 int get_yes(
void)
const {
return yes;}
122 int get_no(
void)
const {
return no;}
123 const int get_fp(
void)
const {
return feature_pos;}
124 const wn_oper get_op(
void)
const {
return op;}
125 const EST_Val get_operand1(
void)
const {
return operand1;}
126 const EST_IList &get_operandl(
void)
const {
return operandl;}
127 const float get_score(
void)
const {
return score;}
128 void set_score(
const float &f) {score=f;}
129 const int ask(
const WVector &w)
const;
130 friend ostream& operator<<(ostream& s,
const WQuestion &q);
133 enum wnim_type {wnim_unset, wnim_float, wnim_class,
134 wnim_cluster, wnim_vector, wnim_matrix, wnim_ols,
144 float cluster_impurity();
145 float cluster_member_mean(
int i);
146 float vector_impurity();
147 float trajectory_impurity();
148 float ols_impurity();
157 WImpurity() { t=wnim_unset; a.
reset(); trajectory=0; l=0; width=0; data=0;}
163 t=s.t; a=s.a; p=s.p; members=s.members; member_counts = s.member_counts; l=s.l; width=s.width;
172 for (j=0; j<width; j++)
173 trajectory[i][j] = s.trajectory[i][j];
180 double samples(
void);
181 wnim_type type(
void)
const {
return t;}
182 void cumulate(
const float pv,
double count=1.0);
185 float cluster_distance(
int i);
186 int in_cluster(
int i);
187 float cluster_ranking(
int i);
188 friend ostream& operator<<(ostream &s,
WImpurity &imp);
201 ~
WDlist() {
if (next != 0)
delete next; }
202 void set_score(
float s) { p_score = s; }
203 void set_question(
const WQuestion &q) { p_question = q; }
204 void set_best(
const EST_String &t,
int freq,
int samples)
205 { p_token = t; p_freq = freq; p_samples = samples;}
206 float score()
const {
return p_score;}
207 const EST_String &token(
void)
const {
return p_token;}
208 const WQuestion &question()
const {
return p_question;}
211 friend ostream &operator<<(ostream &s,
WDlist &d);
221 void print_out(ostream &s,
int margin);
222 int leaf(
void)
const {
return ((left == 0) || (right == 0)); }
225 WNode() { left = right = 0; }
226 ~
WNode() {
if (left != 0) {
delete left; left=0;}
227 if (right != 0) {
delete right; right=0;} }
229 void set_subnodes(
WNode *l,
WNode *r) { left=l; right=r; }
230 void set_impurity(
const WImpurity &imp) {impurity=imp;}
231 void set_question(
const WQuestion &q) {question=q;}
233 void held_out_prune(
void);
234 WImpurity &get_impurity(
void) {
return impurity;}
235 WQuestion &get_question(
void) {
return question;}
238 int samples(
void)
const {
return data.
n(); }
239 friend ostream& operator<<(ostream &s,
WNode &n);
250 void wgn_load_datadescription(
EST_String fname,LISP ignores);
252 WNode *wgn_build_tree(
float &score);
253 WNode *wgn_build_dlist(
float &score,ostream *output);
254 WNode *wagon_stepwise(
float limit);
258 float summary_results(
WNode &tree,ostream *output);
260 extern int wgn_min_cluster_size;
261 extern int wgn_held_out;
262 extern int wgn_prune;
263 extern int wgn_quiet;
264 extern int wgn_verbose;
265 extern int wgn_predictee;
266 extern int wgn_count_field;
269 extern float wgn_float_range_split;
270 extern float wgn_balance;
274 #define wgn_ques_feature(X) (get_c_string(car(X)))
275 #define wgn_ques_oper_str(X) (get_c_string(car(cdr(X))))
276 #define wgn_ques_operand(X) (car(cdr(cdr(X))))
278 int wagon_ask_question(LISP question, LISP value);