#include <math.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <stdarg.h> #include "linear.h" #include "tron.h" typedef signed char schar; template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; } #ifndef min template <class T> static inline T min(T x,T y) { return (x<y)?x:y; } #endif #ifndef max template <class T> static inline T max(T x,T y) { return (x>y)?x:y; } #endif template <class S, class T> static inline void clone(T*& dst, S* src, int n) { dst = new T[n]; memcpy((void *)dst,(void *)src,sizeof(T)*n); } #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) #define INF HUGE_VAL static void print_string_stdout(const char *s) { fputs(s,stdout); fflush(stdout); } static void (*liblinear_print_string) (const char *) = &print_string_stdout; #if 1 static void info(const char *fmt,...) { char buf[BUFSIZ]; va_list ap; va_start(ap,fmt); vsprintf(buf,fmt,ap); va_end(ap); (*liblinear_print_string)(buf); } #else static void info(const char *fmt,...) {} #endif class l2r_lr_fun : public function { public: l2r_lr_fun(const problem *prob, double Cp, double Cn); ~l2r_lr_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void); private: void Xv(double *v, double *Xv); void XTv(double *v, double *XTv); double *C; double *z; double *D; const problem *prob; }; l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn) { int i; int l=prob->l; int *y=prob->y; this->prob = prob; z = new double[l]; D = new double[l]; C = new double[l]; for (i=0; i<l; i++) { if (y[i] == 1) C[i] = Cp; else C[i] = Cn; } } l2r_lr_fun::~l2r_lr_fun() { delete[] z; delete[] D; delete[] C; } double l2r_lr_fun::fun(double *w) { int i; double f=0; int *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); Xv(w, z); for(i=0;i<l;i++) { double yz = y[i]*z[i]; if (yz >= 0) f += C[i]*log(1 + exp(-yz)); else f += C[i]*(-yz+log(1 + exp(yz))); } f = 2*f; for(i=0;i<w_size;i++) f += w[i]*w[i]; f /= 2.0; return(f); } void l2r_lr_fun::grad(double *w, double *g) { int i; int *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); for(i=0;i<l;i++) { z[i] = 1/(1 + exp(-y[i]*z[i])); D[i] = z[i]*(1-z[i]); z[i] = C[i]*(z[i]-1)*y[i]; } XTv(z, g); for(i=0;i<w_size;i++) g[i] = w[i] + g[i]; } int l2r_lr_fun::get_nr_variable(void) { return prob->n; } void l2r_lr_fun::Hv(double *s, double *Hs) { int i; int l=prob->l; int w_size=get_nr_variable(); double *wa = new double[l]; Xv(s, wa); for(i=0;i<l;i++) wa[i] = C[i]*D[i]*wa[i]; XTv(wa, Hs); for(i=0;i<w_size;i++) Hs[i] = s[i] + Hs[i]; delete[] wa; } void l2r_lr_fun::Xv(double *v, double *Xv) { int i; int l=prob->l; feature_node **x=prob->x; for(i=0;i<l;i++) { feature_node *s=x[i]; Xv[i]=0; while(s->index!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } } } void l2r_lr_fun::XTv(double *v, double *XTv) { int i; int l=prob->l; int w_size=get_nr_variable(); feature_node **x=prob->x; for(i=0;i<w_size;i++) XTv[i]=0; for(i=0;i<l;i++) { feature_node *s=x[i]; while(s->index!=-1) { XTv[s->index-1]+=v[i]*s->value; s++; } } } class l2r_l2_svc_fun : public function { public: l2r_l2_svc_fun(const problem *prob, double Cp, double Cn); ~l2r_l2_svc_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void); private: void Xv(double *v, double *Xv); void subXv(double *v, double *Xv); void subXTv(double *v, double *XTv); double *C; double *z; double *D; int *I; int sizeI; const problem *prob; }; l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn) { int i; int l=prob->l; int *y=prob->y; this->prob = prob; z = new double[l]; D = new double[l]; C = new double[l]; I = new int[l]; for (i=0; i<l; i++) { if (y[i] == 1) C[i] = Cp; else C[i] = Cn; } } l2r_l2_svc_fun::~l2r_l2_svc_fun() { delete[] z; delete[] D; delete[] C; delete[] I; } double l2r_l2_svc_fun::fun(double *w) { int i; double f=0; int *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); Xv(w, z); for(i=0;i<l;i++) { z[i] = y[i]*z[i]; double d = 1-z[i]; if (d > 0) f += C[i]*d*d; } f = 2*f; for(i=0;i<w_size;i++) f += w[i]*w[i]; f /= 2.0; return(f); } void l2r_l2_svc_fun::grad(double *w, double *g) { int i; int *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); sizeI = 0; for (i=0;i<l;i++) if (z[i] < 1) { z[sizeI] = C[i]*y[i]*(z[i]-1); I[sizeI] = i; sizeI++; } subXTv(z, g); for(i=0;i<w_size;i++) g[i] = w[i] + 2*g[i]; } int l2r_l2_svc_fun::get_nr_variable(void) { return prob->n; } void l2r_l2_svc_fun::Hv(double *s, double *Hs) { int i; int l=prob->l; int w_size=get_nr_variable(); double *wa = new double[l]; subXv(s, wa); for(i=0;i<sizeI;i++) wa[i] = C[I[i]]*wa[i]; subXTv(wa, Hs); for(i=0;i<w_size;i++) Hs[i] = s[i] + 2*Hs[i]; delete[] wa; } void l2r_l2_svc_fun::Xv(double *v, double *Xv) { int i; int l=prob->l; feature_node **x=prob->x; for(i=0;i<l;i++) { feature_node *s=x[i]; Xv[i]=0; while(s->index!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } } } void l2r_l2_svc_fun::subXv(double *v, double *Xv) { int i; feature_node **x=prob->x; for(i=0;i<sizeI;i++) { feature_node *s=x[I[i]]; Xv[i]=0; while(s->index!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } } } void l2r_l2_svc_fun::subXTv(double *v, double *XTv) { int i; int w_size=get_nr_variable(); feature_node **x=prob->x; for(i=0;i<w_size;i++) XTv[i]=0; for(i=0;i<sizeI;i++) { feature_node *s=x[I[i]]; while(s->index!=-1) { XTv[s->index-1]+=v[i]*s->value; s++; } } } // A coordinate descent algorithm for // multi-class support vector machines by Crammer and Singer // // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i // // where e^m_i = 0 if y_i = m, // e^m_i = 1 if y_i != m, // C^m_i = C if m = y_i, // C^m_i = 0 if m != y_i, // and w_m(\alpha) = \sum_i \alpha^m_i x_i // // Given: // x, y, C // eps is the stopping tolerance // // solution will be put in w // // See Appendix of LIBLINEAR paper, Fan et al. (2008) #define GETI(i) (prob->y[i]) // To support weights for instances, use GETI(i) (i) class Solver_MCSVM_CS { public: Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000); ~Solver_MCSVM_CS(); void Solve(double *w); private: void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new); bool be_shrunk(int i, int m, int yi, double alpha_i, double minG); double *B, *C, *G; int w_size, l; int nr_class; int max_iter; double eps; const problem *prob; }; Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter) { this->w_size = prob->n; this->l = prob->l; this->nr_class = nr_class; this->eps = eps; this->max_iter = max_iter; this->prob = prob; this->B = new double[nr_class]; this->G = new double[nr_class]; this->C = weighted_C; } Solver_MCSVM_CS::~Solver_MCSVM_CS() { delete[] B; delete[] G; } int compare_double(const void *a, const void *b) { if(*(double *)a > *(double *)b) return -1; if(*(double *)a < *(double *)b) return 1; return 0; } void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new) { int r; double *D; clone(D, B, active_i); if(yi < active_i) D[yi] += A_i*C_yi; qsort(D, active_i, sizeof(double), compare_double); double beta = D[0] - A_i*C_yi; for(r=1;r<active_i && beta<r*D[r];r++) beta += D[r]; beta /= r; for(r=0;r<active_i;r++) { if(r == yi) alpha_new[r] = min(C_yi, (beta-B[r])/A_i); else alpha_new[r] = min((double)0, (beta - B[r])/A_i); } delete[] D; } bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG) { double bound = 0; if(m == yi) bound = C[GETI(i)]; if(alpha_i == bound && G[m] < minG) return true; return false; } void Solver_MCSVM_CS::Solve(double *w) { int i, m, s; int iter = 0; double *alpha = new double[l*nr_class]; double *alpha_new = new double[nr_class]; int *index = new int[l]; double *QD = new double[l]; int *d_ind = new int[nr_class]; double *d_val = new double[nr_class]; int *alpha_index = new int[nr_class*l]; int *y_index = new int[l]; int active_size = l; int *active_size_i = new int[l]; double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking bool start_from_all = true; // initial for(i=0;i<l*nr_class;i++) alpha[i] = 0; for(i=0;i<w_size*nr_class;i++) w[i] = 0; for(i=0;i<l;i++) { for(m=0;m<nr_class;m++) alpha_index[i*nr_class+m] = m; feature_node *xi = prob->x[i]; QD[i] = 0; while(xi->index != -1) { QD[i] += (xi->value)*(xi->value); xi++; } active_size_i[i] = nr_class; y_index[i] = prob->y[i]; index[i] = i; } while(iter < max_iter) { double stopping = -INF; for(i=0;i<active_size;i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for(s=0;s<active_size;s++) { i = index[s]; double Ai = QD[i]; double *alpha_i = &alpha[i*nr_class]; int *alpha_index_i = &alpha_index[i*nr_class]; if(Ai > 0) { for(m=0;m<active_size_i[i];m++) G[m] = 1; if(y_index[i] < active_size_i[i]) G[y_index[i]] = 0; feature_node *xi = prob->x[i]; while(xi->index!= -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;m<active_size_i[i];m++) G[m] += w_i[alpha_index_i[m]]*(xi->value); xi++; } double minG = INF; double maxG = -INF; for(m=0;m<active_size_i[i];m++) { if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG) minG = G[m]; if(G[m] > maxG) maxG = G[m]; } if(y_index[i] < active_size_i[i]) if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG) minG = G[y_index[i]]; for(m=0;m<active_size_i[i];m++) { if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG)) { active_size_i[i]--; while(active_size_i[i]>m) { if(!be_shrunk(i, active_size_i[i], y_index[i], alpha_i[alpha_index_i[active_size_i[i]]], minG)) { swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); swap(G[m], G[active_size_i[i]]); if(y_index[i] == active_size_i[i]) y_index[i] = m; else if(y_index[i] == m) y_index[i] = active_size_i[i]; break; } active_size_i[i]--; } } } if(active_size_i[i] <= 1) { active_size--; swap(index[s], index[active_size]); s--; continue; } if(maxG-minG <= 1e-12) continue; else stopping = max(maxG - minG, stopping); for(m=0;m<active_size_i[i];m++) B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ; solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new); int nz_d = 0; for(m=0;m<active_size_i[i];m++) { double d = alpha_new[m] - alpha_i[alpha_index_i[m]]; alpha_i[alpha_index_i[m]] = alpha_new[m]; if(fabs(d) >= 1e-12) { d_ind[nz_d] = alpha_index_i[m]; d_val[nz_d] = d; nz_d++; } } xi = prob->x[i]; while(xi->index != -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;m<nz_d;m++) w_i[d_ind[m]] += d_val[m]*xi->value; xi++; } } } iter++; if(iter % 10 == 0) { info("."); } if(stopping < eps_shrink) { if(stopping < eps && start_from_all == true) break; else { active_size = l; for(i=0;i<l;i++) active_size_i[i] = nr_class; info("*"); eps_shrink = max(eps_shrink/2, eps); start_from_all = true; } } else start_from_all = false; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0;i<w_size*nr_class;i++) v += w[i]*w[i]; v = 0.5*v; for(i=0;i<l*nr_class;i++) { v += alpha[i]; if(fabs(alpha[i]) > 0) nSV++; } for(i=0;i<l;i++) v -= alpha[i*nr_class+prob->y[i]]; info("Objective value = %lf\n",v); info("nSV = %d\n",nSV); delete [] alpha; delete [] alpha_new; delete [] index; delete [] QD; delete [] d_ind; delete [] d_val; delete [] alpha_index; delete [] y_index; delete [] active_size_i; } // A coordinate descent algorithm for // L1-loss and L2-loss SVM dual problems // // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, // s.t. 0 <= alpha_i <= upper_bound_i, // // where Qij = yi yj xi^T xj and // D is a diagonal matrix // // In L1-SVM case: // upper_bound_i = Cp if y_i = 1 // upper_bound_i = Cn if y_i = -1 // D_ii = 0 // In L2-SVM case: // upper_bound_i = INF // D_ii = 1/(2*Cp) if y_i = 1 // D_ii = 1/(2*Cn) if y_i = -1 // // Given: // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w // // See Algorithm 3 of Hsieh et al., ICML 2008 #undef GETI #define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) static void solve_l2r_l1l2_svc( const problem *prob, double *w, double eps, double Cp, double Cn, int solver_type) { int l = prob->l; int w_size = prob->n; int i, s, iter = 0; double C, d, G; double *QD = new double[l]; int max_iter = 1000; int *index = new int[l]; double *alpha = new double[l]; schar *y = new schar[l]; int active_size = l; // PG: projected gradient, for shrinking and stopping double PG; double PGmax_old = INF; double PGmin_old = -INF; double PGmax_new, PGmin_new; // default solver_type: L2R_L2LOSS_SVC_DUAL double diag[3] = {0.5/Cn, 0, 0.5/Cp}; double upper_bound[3] = {INF, 0, INF}; if(solver_type == L2R_L1LOSS_SVC_DUAL) { diag[0] = 0; diag[2] = 0; upper_bound[0] = Cn; upper_bound[2] = Cp; } for(i=0; i<w_size; i++) w[i] = 0; for(i=0; i<l; i++) { alpha[i] = 0; if(prob->y[i] > 0) { y[i] = +1; } else { y[i] = -1; } QD[i] = diag[GETI(i)]; feature_node *xi = prob->x[i]; while (xi->index != -1) { QD[i] += (xi->value)*(xi->value); xi++; } index[i] = i; } while (iter < max_iter) { PGmax_new = -INF; PGmin_new = INF; for (i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for (s=0; s<active_size; s++) { i = index[s]; G = 0; schar yi = y[i]; feature_node *xi = prob->x[i]; while(xi->index!= -1) { G += w[xi->index-1]*(xi->value); xi++; } G = G*yi-1; C = upper_bound[GETI(i)]; G += alpha[i]*diag[GETI(i)]; PG = 0; if (alpha[i] == 0) { if (G > PGmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) PG = G; } else if (alpha[i] == C) { if (G < PGmin_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) PG = G; } else PG = G; PGmax_new = max(PGmax_new, PG); PGmin_new = min(PGmin_new, PG); if(fabs(PG) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C); d = (alpha[i] - alpha_old)*yi; xi = prob->x[i]; while (xi->index != -1) { w[xi->index-1] += d*xi->value; xi++; } } } iter++; if(iter % 10 == 0) info("."); if(PGmax_new - PGmin_new <= eps) { if(active_size == l) break; else { active_size = l; info("*"); PGmax_old = INF; PGmin_old = -INF; continue; } } PGmax_old = PGmax_new; PGmin_old = PGmin_new; if (PGmax_old <= 0) PGmax_old = INF; if (PGmin_old >= 0) PGmin_old = -INF; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i<w_size; i++) v += w[i]*w[i]; for(i=0; i<l; i++) { v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); if(alpha[i] > 0) ++nSV; } info("Objective value = %lf\n",v/2); info("nSV = %d\n",nSV); delete [] QD; delete [] alpha; delete [] y; delete [] index; } // A coordinate descent algorithm for // the dual of L2-regularized logistic regression problems // // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) , // s.t. 0 <= alpha_i <= upper_bound_i, // // where Qij = yi yj xi^T xj and // upper_bound_i = Cp if y_i = 1 // upper_bound_i = Cn if y_i = -1 // // Given: // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w // // See Algorithm 5 of Yu et al., MLJ 2010 #undef GETI #define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn) { int l = prob->l; int w_size = prob->n; int i, s, iter = 0; double *xTx = new double[l]; int max_iter = 1000; int *index = new int[l]; double *alpha = new double[2*l]; // store alpha and C - alpha schar *y = new schar[l]; int max_inner_iter = 100; // for inner Newton double innereps = 1e-2; double innereps_min = min(1e-8, eps); double upper_bound[3] = {Cn, 0, Cp}; for(i=0; i<w_size; i++) w[i] = 0; for(i=0; i<l; i++) { if(prob->y[i] > 0) { y[i] = +1; } else { y[i] = -1; } alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8); alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i]; xTx[i] = 0; feature_node *xi = prob->x[i]; while (xi->index != -1) { xTx[i] += (xi->value)*(xi->value); w[xi->index-1] += y[i]*alpha[2*i]*xi->value; xi++; } index[i] = i; } while (iter < max_iter) { for (i=0; i<l; i++) { int j = i+rand()%(l-i); swap(index[i], index[j]); } int newton_iter = 0; double Gmax = 0; for (s=0; s<l; s++) { i = index[s]; schar yi = y[i]; double C = upper_bound[GETI(i)]; double ywTx = 0, xisq = xTx[i]; feature_node *xi = prob->x[i]; while (xi->index != -1) { ywTx += w[xi->index-1]*xi->value; xi++; } ywTx *= y[i]; double a = xisq, b = ywTx; // Decide to minimize g_1(z) or g_2(z) int ind1 = 2*i, ind2 = 2*i+1, sign = 1; if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0) { ind1 = 2*i+1; ind2 = 2*i; sign = -1; } // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old) double alpha_old = alpha[ind1]; double z = alpha_old; if(C - z < 0.5 * C) z = 0.1*z; double gp = a*(z-alpha_old)+sign*b+log(z/(C-z)); Gmax = max(Gmax, fabs(gp)); // Newton method on the sub-problem const double eta = 0.1; // xi in the paper int inner_iter = 0; while (inner_iter <= max_inner_iter) { if(fabs(gp) < innereps) break; double gpp = a + C/(C-z)/z; double tmpz = z - gp/gpp; if(tmpz <= 0) z *= eta; else // tmpz in (0, C) z = tmpz; gp = a*(z-alpha_old)+sign*b+log(z/(C-z)); newton_iter++; inner_iter++; } if(inner_iter > 0) // update w { alpha[ind1] = z; alpha[ind2] = C-z; xi = prob->x[i]; while (xi->index != -1) { w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value; xi++; } } } iter++; if(iter % 10 == 0) info("."); if(Gmax < eps) break; if(newton_iter <= l/10) innereps = max(innereps_min, 0.1*innereps); } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n"); // calculate objective value double v = 0; for(i=0; i<w_size; i++) v += w[i] * w[i]; v *= 0.5; for(i=0; i<l; i++) v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]); info("Objective value = %lf\n", v); delete [] xTx; delete [] alpha; delete [] y; delete [] index; } // A coordinate descent algorithm for // L1-regularized L2-loss support vector classification // // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2, // // Given: // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w // // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008) #undef GETI #define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) static void solve_l1r_l2_svc( problem *prob_col, double *w, double eps, double Cp, double Cn) { int l = prob_col->l; int w_size = prob_col->n; int j, s, iter = 0; int max_iter = 1000; int active_size = w_size; int max_num_linesearch = 20; double sigma = 0.01; double d, G_loss, G, H; double Gmax_old = INF; double Gmax_new, Gnorm1_new; double Gnorm1_init; double d_old, d_diff; double loss_old, loss_new; double appxcond, cond; int *index = new int[w_size]; schar *y = new schar[l]; double *b = new double[l]; // b = 1-ywTx double *xj_sq = new double[w_size]; feature_node *x; double C[3] = {Cn,0,Cp}; for(j=0; j<l; j++) { b[j] = 1; if(prob_col->y[j] > 0) y[j] = 1; else y[j] = -1; } for(j=0; j<w_size; j++) { w[j] = 0; index[j] = j; xj_sq[j] = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; double val = x->value; x->value *= y[ind]; // x->value stores yi*xij xj_sq[j] += C[GETI(ind)]*val*val; x++; } } while(iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for(j=0; j<active_size; j++) { int i = j+rand()%(active_size-j); swap(index[i], index[j]); } for(s=0; s<active_size; s++) { j = index[s]; G_loss = 0; H = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; if(b[ind] > 0) { double val = x->value; double tmp = C[GETI(ind)]*val; G_loss -= tmp*b[ind]; H += tmp*val; } x++; } G_loss *= 2; G = G_loss; H *= 2; H = max(H, 1e-12); double Gp = G+1; double Gn = G-1; double violation = 0; if(w[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(w[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d if(Gp <= H*w[j]) d = -Gp/H; else if(Gn >= H*w[j]) d = -Gn/H; else d = -w[j]; if(fabs(d) < 1.0e-12) continue; double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; d_old = 0; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { d_diff = d_old - d; cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; appxcond = xj_sq[j]*d*d + G_loss*d + cond; if(appxcond <= 0) { x = prob_col->x[j]; while(x->index != -1) { b[x->index-1] += d_diff*x->value; x++; } break; } if(num_linesearch == 0) { loss_old = 0; loss_new = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; if(b[ind] > 0) loss_old += C[GETI(ind)]*b[ind]*b[ind]; double b_new = b[ind] + d_diff*x->value; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; x++; } } else { loss_new = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; double b_new = b[ind] + d_diff*x->value; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; x++; } } cond = cond + loss_new - loss_old; if(cond <= 0) break; else { d_old = d; d *= 0.5; delta *= 0.5; } } w[j] += d; // recompute b[] if line search takes too many steps if(num_linesearch >= max_num_linesearch) { info("#"); for(int i=0; i<l; i++) b[i] = 1; for(int i=0; i<w_size; i++) { if(w[i]==0) continue; x = prob_col->x[i]; while(x->index != -1) { b[x->index-1] -= w[i]*x->value; x++; } } } } if(iter == 0) Gnorm1_init = Gnorm1_new; iter++; if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == w_size) break; else { active_size = w_size; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n", iter); if(iter >= max_iter) info("\nWARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nnz = 0; for(j=0; j<w_size; j++) { x = prob_col->x[j]; while(x->index != -1) { x->value *= prob_col->y[x->index-1]; // restore x->value x++; } if(w[j] != 0) { v += fabs(w[j]); nnz++; } } for(j=0; j<l; j++) if(b[j] > 0) v += C[GETI(j)]*b[j]*b[j]; info("Objective value = %lf\n", v); info("#nonzeros/#features = %d/%d\n", nnz, w_size); delete [] index; delete [] y; delete [] b; delete [] xj_sq; } // A coordinate descent algorithm for // L1-regularized logistic regression problems // // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)), // // Given: // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w // // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008) #undef GETI #define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) static void solve_l1r_lr( const problem *prob_col, double *w, double eps, double Cp, double Cn) { int l = prob_col->l; int w_size = prob_col->n; int j, s, newton_iter=0, iter=0; int max_newton_iter = 100; int max_iter = 1000; int max_num_linesearch = 20; int active_size; int QP_active_size; double nu = 1e-12; double inner_eps = 1; double sigma = 0.01; double w_norm=0, w_norm_new; double z, G, H; double Gnorm1_init; double Gmax_old = INF; double Gmax_new, Gnorm1_new; double QP_Gmax_old = INF; double QP_Gmax_new, QP_Gnorm1_new; double delta, negsum_xTd, cond; int *index = new int[w_size]; schar *y = new schar[l]; double *Hdiag = new double[w_size]; double *Grad = new double[w_size]; double *wpd = new double[w_size]; double *xjneg_sum = new double[w_size]; double *xTd = new double[l]; double *exp_wTx = new double[l]; double *exp_wTx_new = new double[l]; double *tau = new double[l]; double *D = new double[l]; feature_node *x; double C[3] = {Cn,0,Cp}; for(j=0; j<l; j++) { if(prob_col->y[j] > 0) y[j] = 1; else y[j] = -1; // assume initial w is 0 exp_wTx[j] = 1; tau[j] = C[GETI(j)]*0.5; D[j] = C[GETI(j)]*0.25; } for(j=0; j<w_size; j++) { w[j] = 0; wpd[j] = w[j]; index[j] = j; xjneg_sum[j] = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; if(y[ind] == -1) xjneg_sum[j] += C[GETI(ind)]*x->value; x++; } } while(newton_iter < max_newton_iter) { Gmax_new = 0; Gnorm1_new = 0; active_size = w_size; for(s=0; s<active_size; s++) { j = index[s]; Hdiag[j] = nu; Grad[j] = 0; double tmp = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; Hdiag[j] += x->value*x->value*D[ind]; tmp += x->value*tau[ind]; x++; } Grad[j] = -tmp + xjneg_sum[j]; double Gp = Grad[j]+1; double Gn = Grad[j]-1; double violation = 0; if(w[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; //outer-level shrinking else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(w[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; } if(newton_iter == 0) Gnorm1_init = Gnorm1_new; if(Gnorm1_new <= eps*Gnorm1_init) break; iter = 0; QP_Gmax_old = INF; QP_active_size = active_size; for(int i=0; i<l; i++) xTd[i] = 0; // optimize QP over wpd while(iter < max_iter) { QP_Gmax_new = 0; QP_Gnorm1_new = 0; for(j=0; j<QP_active_size; j++) { int i = j+rand()%(QP_active_size-j); swap(index[i], index[j]); } for(s=0; s<QP_active_size; s++) { j = index[s]; H = Hdiag[j]; x = prob_col->x[j]; G = Grad[j] + (wpd[j]-w[j])*nu; while(x->index != -1) { int ind = x->index-1; G += x->value*D[ind]*xTd[ind]; x++; } double Gp = G+1; double Gn = G-1; double violation = 0; if(wpd[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; //inner-level shrinking else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l) { QP_active_size--; swap(index[s], index[QP_active_size]); s--; continue; } } else if(wpd[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); QP_Gmax_new = max(QP_Gmax_new, violation); QP_Gnorm1_new += violation; // obtain solution of one-variable problem if(Gp <= H*wpd[j]) z = -Gp/H; else if(Gn >= H*wpd[j]) z = -Gn/H; else z = -wpd[j]; if(fabs(z) < 1.0e-12) continue; z = min(max(z,-10.0),10.0); wpd[j] += z; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; xTd[ind] += x->value*z; x++; } } iter++; if(QP_Gnorm1_new <= inner_eps*Gnorm1_init) { //inner stopping if(QP_active_size == active_size) break; //active set reactivation else { QP_active_size = active_size; QP_Gmax_old = INF; continue; } } QP_Gmax_old = QP_Gmax_new; } if(iter >= max_iter) info("WARNING: reaching max number of inner iterations\n"); delta = 0; w_norm_new = 0; for(j=0; j<w_size; j++) { delta += Grad[j]*(wpd[j]-w[j]); if(wpd[j] != 0) w_norm_new += fabs(wpd[j]); } delta += (w_norm_new-w_norm); negsum_xTd = 0; for(int i=0; i<l; i++) if(y[i] == -1) negsum_xTd += C[GETI(i)]*xTd[i]; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { cond = w_norm_new - w_norm + negsum_xTd - sigma*delta; for(int i=0; i<l; i++) { double exp_xTd = exp(xTd[i]); exp_wTx_new[i] = exp_wTx[i]*exp_xTd; cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i])); } if(cond <= 0) { w_norm = w_norm_new; for(j=0; j<w_size; j++) w[j] = wpd[j]; for(int i=0; i<l; i++) { exp_wTx[i] = exp_wTx_new[i]; double tau_tmp = 1/(1+exp_wTx[i]); tau[i] = C[GETI(i)]*tau_tmp; D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp; } break; } else { w_norm_new = 0; for(j=0; j<w_size; j++) { wpd[j] = (w[j]+wpd[j])*0.5; if(wpd[j] != 0) w_norm_new += fabs(wpd[j]); } delta *= 0.5; negsum_xTd *= 0.5; for(int i=0; i<l; i++) xTd[i] *= 0.5; } } // Recompute some info due to too many line search steps if(num_linesearch >= max_num_linesearch) { for(int i=0; i<l; i++) exp_wTx[i] = 0; for(int i=0; i<w_size; i++) { if(w[i]==0) continue; x = prob_col->x[i]; while(x->index != -1) { exp_wTx[x->index-1] += w[i]*x->value; x++; } } for(int i=0; i<l; i++) exp_wTx[i] = exp(exp_wTx[i]); } if(iter == 1) inner_eps *= 0.25; newton_iter++; Gmax_old = Gmax_new; info("iter %3d #CD cycles %d\n", newton_iter, iter); } info("=========================\n"); info("optimization finished, #iter = %d\n", newton_iter); if(newton_iter >= max_newton_iter) info("WARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nnz = 0; for(j=0; j<w_size; j++) if(w[j] != 0) { v += fabs(w[j]); nnz++; } for(j=0; j<l; j++) if(y[j] == 1) v += C[GETI(j)]*log(1+1/exp_wTx[j]); else v += C[GETI(j)]*log(1+exp_wTx[j]); info("Objective value = %lf\n", v); info("#nonzeros/#features = %d/%d\n", nnz, w_size); delete [] index; delete [] y; delete [] Hdiag; delete [] Grad; delete [] wpd; delete [] xjneg_sum; delete [] xTd; delete [] exp_wTx; delete [] exp_wTx_new; delete [] tau; delete [] D; } // transpose matrix X from row format to column format static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col) { int i; int l = prob->l; int n = prob->n; int nnz = 0; int *col_ptr = new int[n+1]; feature_node *x_space; prob_col->l = l; prob_col->n = n; prob_col->y = new int[l]; prob_col->x = new feature_node*[n]; for(i=0; i<l; i++) prob_col->y[i] = prob->y[i]; for(i=0; i<n+1; i++) col_ptr[i] = 0; for(i=0; i<l; i++) { feature_node *x = prob->x[i]; while(x->index != -1) { nnz++; col_ptr[x->index]++; x++; } } for(i=1; i<n+1; i++) col_ptr[i] += col_ptr[i-1] + 1; x_space = new feature_node[nnz+n]; for(i=0; i<n; i++) prob_col->x[i] = &x_space[col_ptr[i]]; for(i=0; i<l; i++) { feature_node *x = prob->x[i]; while(x->index != -1) { int ind = x->index-1; x_space[col_ptr[ind]].index = i+1; // starts from 1 x_space[col_ptr[ind]].value = x->value; col_ptr[ind]++; x++; } } for(i=0; i<n; i++) x_space[col_ptr[i]].index = -1; *x_space_ret = x_space; delete [] col_ptr; } // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data // perm, length l, must be allocated before calling this subroutine static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm) { int l = prob->l; int max_nr_class = 16; int nr_class = 0; int *label = Malloc(int,max_nr_class); int *count = Malloc(int,max_nr_class); int *data_label = Malloc(int,l); int i; for(i=0;i<l;i++) { int this_label = prob->y[i]; int j; for(j=0;j<nr_class;j++) { if(this_label == label[j]) { ++count[j]; break; } } data_label[i] = j; if(j == nr_class) { if(nr_class == max_nr_class) { max_nr_class *= 2; label = (int *)realloc(label,max_nr_class*sizeof(int)); count = (int *)realloc(count,max_nr_class*sizeof(int)); } label[nr_class] = this_label; count[nr_class] = 1; ++nr_class; } } int *start = Malloc(int,nr_class); start[0] = 0; for(i=1;i<nr_class;i++) start[i] = start[i-1]+count[i-1]; for(i=0;i<l;i++) { perm[start[data_label[i]]] = i; ++start[data_label[i]]; } start[0] = 0; for(i=1;i<nr_class;i++) start[i] = start[i-1]+count[i-1]; *nr_class_ret = nr_class; *label_ret = label; *start_ret = start; *count_ret = count; free(data_label); } static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn) { double eps=param->eps; int pos = 0; int neg = 0; for(int i=0;i<prob->l;i++) if(prob->y[i]==+1) pos++; neg = prob->l - pos; function *fun_obj=NULL; switch(param->solver_type) { case L2R_LR: { fun_obj=new l2r_lr_fun(prob, Cp, Cn); TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; break; } case L2R_L2LOSS_SVC: { fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn); TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; break; } case L2R_L2LOSS_SVC_DUAL: solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL); break; case L2R_L1LOSS_SVC_DUAL: solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL); break; case L1R_L2LOSS_SVC: { problem prob_col; feature_node *x_space = NULL; transpose(prob, &x_space ,&prob_col); solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn); delete [] prob_col.y; delete [] prob_col.x; delete [] x_space; break; } case L1R_LR: { problem prob_col; feature_node *x_space = NULL; transpose(prob, &x_space ,&prob_col); solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn); delete [] prob_col.y; delete [] prob_col.x; delete [] x_space; break; } case L2R_LR_DUAL: solve_l2r_lr_dual(prob, w, eps, Cp, Cn); break; default: fprintf(stderr, "Error: unknown solver_type\n"); break; } } // // Interface functions // model* train(const problem *prob, const parameter *param) { int i,j; int l = prob->l; int n = prob->n; int w_size = prob->n; model *model_ = Malloc(model,1); if(prob->bias>=0) model_->nr_feature=n-1; else model_->nr_feature=n; model_->param = *param; model_->bias = prob->bias; int nr_class; int *label = NULL; int *start = NULL; int *count = NULL; int *perm = Malloc(int,l); // group training data of the same class group_classes(prob,&nr_class,&label,&start,&count,perm); model_->nr_class=nr_class; model_->label = Malloc(int,nr_class); for(i=0;i<nr_class;i++) model_->label[i] = label[i]; // calculate weighted C double *weighted_C = Malloc(double, nr_class); for(i=0;i<nr_class;i++) weighted_C[i] = param->C; for(i=0;i<param->nr_weight;i++) { for(j=0;j<nr_class;j++) if(param->weight_label[i] == label[j]) break; if(j == nr_class) fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]); else weighted_C[j] *= param->weight[i]; } // constructing the subproblem feature_node **x = Malloc(feature_node *,l); for(i=0;i<l;i++) x[i] = prob->x[perm[i]]; int k; problem sub_prob; sub_prob.l = l; sub_prob.n = n; sub_prob.x = Malloc(feature_node *,sub_prob.l); sub_prob.y = Malloc(int,sub_prob.l); for(k=0; k<sub_prob.l; k++) sub_prob.x[k] = x[k]; // multi-class svm by Crammer and Singer if(param->solver_type == MCSVM_CS) { model_->w=Malloc(double, n*nr_class); for(i=0;i<nr_class;i++) for(j=start[i];j<start[i]+count[i];j++) sub_prob.y[j] = i; Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps); Solver.Solve(model_->w); } else { if(nr_class == 2) { model_->w=Malloc(double, w_size); int e0 = start[0]+count[0]; k=0; for(; k<e0; k++) sub_prob.y[k] = +1; for(; k<sub_prob.l; k++) sub_prob.y[k] = -1; train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]); } else { model_->w=Malloc(double, w_size*nr_class); double *w=Malloc(double, w_size); for(i=0;i<nr_class;i++) { int si = start[i]; int ei = si+count[i]; k=0; for(; k<si; k++) sub_prob.y[k] = -1; for(; k<ei; k++) sub_prob.y[k] = +1; for(; k<sub_prob.l; k++) sub_prob.y[k] = -1; train_one(&sub_prob, param, w, weighted_C[i], param->C); for(int j=0;j<w_size;j++) model_->w[j*nr_class+i] = w[j]; } free(w); } } free(x); free(label); free(start); free(count); free(perm); free(sub_prob.x); free(sub_prob.y); free(weighted_C); return model_; } void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target) { int i; int *fold_start = Malloc(int,nr_fold+1); int l = prob->l; int *perm = Malloc(int,l); for(i=0;i<l;i++) perm[i]=i; for(i=0;i<l;i++) { int j = i+rand()%(l-i); swap(perm[i],perm[j]); } for(i=0;i<=nr_fold;i++) fold_start[i]=i*l/nr_fold; for(i=0;i<nr_fold;i++) { int begin = fold_start[i]; int end = fold_start[i+1]; int j,k; struct problem subprob; subprob.bias = prob->bias; subprob.n = prob->n; subprob.l = l-(end-begin); subprob.x = Malloc(struct feature_node*,subprob.l); subprob.y = Malloc(int,subprob.l); k=0; for(j=0;j<begin;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;j<l;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } struct model *submodel = train(&subprob,param); for(j=begin;j<end;j++) target[perm[j]] = predict(submodel,prob->x[perm[j]]); free_and_destroy_model(&submodel); free(subprob.x); free(subprob.y); } free(fold_start); free(perm); } int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values) { int idx; int n; if(model_->bias>=0) n=model_->nr_feature+1; else n=model_->nr_feature; double *w=model_->w; int nr_class=model_->nr_class; int i; int nr_w; if(nr_class==2 && model_->param.solver_type != MCSVM_CS) nr_w = 1; else nr_w = nr_class; const feature_node *lx=x; for(i=0;i<nr_w;i++) dec_values[i] = 0; for(; (idx=lx->index)!=-1; lx++) { // the dimension of testing data may exceed that of training if(idx<=n) for(i=0;i<nr_w;i++) dec_values[i] += w[(idx-1)*nr_w+i]*lx->value; } if(nr_class==2) return (dec_values[0]>0)?model_->label[0]:model_->label[1]; else { int dec_max_idx = 0; for(i=1;i<nr_class;i++) { if(dec_values[i] > dec_values[dec_max_idx]) dec_max_idx = i; } return model_->label[dec_max_idx]; } } int predict(const model *model_, const feature_node *x) { double *dec_values = Malloc(double, model_->nr_class); int label=predict_values(model_, x, dec_values); free(dec_values); return label; } int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates) { if(check_probability_model(model_)) { int i; int nr_class=model_->nr_class; int nr_w; if(nr_class==2) nr_w = 1; else nr_w = nr_class; int label=predict_values(model_, x, prob_estimates); for(i=0;i<nr_w;i++) prob_estimates[i]=1/(1+exp(-prob_estimates[i])); if(nr_class==2) // for binary classification prob_estimates[1]=1.-prob_estimates[0]; else { double sum=0; for(i=0; i<nr_class; i++) sum+=prob_estimates[i]; for(i=0; i<nr_class; i++) prob_estimates[i]=prob_estimates[i]/sum; } return label; } else return 0; } static const char *solver_type_table[]= { "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS", "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL }; int save_model(const char *model_file_name, const struct model *model_) { int i; int nr_feature=model_->nr_feature; int n; const parameter& param = model_->param; if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; int w_size = n; FILE *fp = fopen(model_file_name,"w"); if(fp==NULL) return -1; int nr_w; if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) nr_w=1; else nr_w=model_->nr_class; fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]); fprintf(fp, "nr_class %d\n", model_->nr_class); fprintf(fp, "label"); for(i=0; i<model_->nr_class; i++) fprintf(fp, " %d", model_->label[i]); fprintf(fp, "\n"); fprintf(fp, "nr_feature %d\n", nr_feature); fprintf(fp, "bias %.16g\n", model_->bias); fprintf(fp, "w\n"); for(i=0; i<w_size; i++) { int j; for(j=0; j<nr_w; j++) fprintf(fp, "%.16g ", model_->w[i*nr_w+j]); fprintf(fp, "\n"); } if (ferror(fp) != 0 || fclose(fp) != 0) return -1; else return 0; } struct model *load_model(const char *model_file_name) { FILE *fp = fopen(model_file_name,"r"); if(fp==NULL) return NULL; int i; int nr_feature; int n; int nr_class; double bias; model *model_ = Malloc(model,1); parameter& param = model_->param; model_->label = NULL; char cmd[81]; while(1) { fscanf(fp,"%80s",cmd); if(strcmp(cmd,"solver_type")==0) { fscanf(fp,"%80s",cmd); int i; for(i=0;solver_type_table[i];i++) { if(strcmp(solver_type_table[i],cmd)==0) { param.solver_type=i; break; } } if(solver_type_table[i] == NULL) { fprintf(stderr,"unknown solver type.\n"); free(model_->label); free(model_); return NULL; } } else if(strcmp(cmd,"nr_class")==0) { fscanf(fp,"%d",&nr_class); model_->nr_class=nr_class; } else if(strcmp(cmd,"nr_feature")==0) { fscanf(fp,"%d",&nr_feature); model_->nr_feature=nr_feature; } else if(strcmp(cmd,"bias")==0) { fscanf(fp,"%lf",&bias); model_->bias=bias; } else if(strcmp(cmd,"w")==0) { break; } else if(strcmp(cmd,"label")==0) { int nr_class = model_->nr_class; model_->label = Malloc(int,nr_class); for(int i=0;i<nr_class;i++) fscanf(fp,"%d",&model_->label[i]); } else { fprintf(stderr,"unknown text in model file: [%s]\n",cmd); free(model_); return NULL; } } nr_feature=model_->nr_feature; if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; int w_size = n; int nr_w; if(nr_class==2 && param.solver_type != MCSVM_CS) nr_w = 1; else nr_w = nr_class; model_->w=Malloc(double, w_size*nr_w); for(i=0; i<w_size; i++) { int j; for(j=0; j<nr_w; j++) fscanf(fp, "%lf ", &model_->w[i*nr_w+j]); fscanf(fp, "\n"); } if (ferror(fp) != 0 || fclose(fp) != 0) return NULL; return model_; } int get_nr_feature(const model *model_) { return model_->nr_feature; } int get_nr_class(const model *model_) { return model_->nr_class; } void get_labels(const model *model_, int* label) { if (model_->label != NULL) for(int i=0;i<model_->nr_class;i++) label[i] = model_->label[i]; } void free_model_content(struct model *model_ptr) { if(model_ptr->w != NULL) free(model_ptr->w); if(model_ptr->label != NULL) free(model_ptr->label); } void free_and_destroy_model(struct model **model_ptr_ptr) { struct model *model_ptr = *model_ptr_ptr; if(model_ptr != NULL) { free_model_content(model_ptr); free(model_ptr); } } void destroy_param(parameter* param) { if(param->weight_label != NULL) free(param->weight_label); if(param->weight != NULL) free(param->weight); } const char *check_parameter(const problem *prob, const parameter *param) { if(param->eps <= 0) return "eps <= 0"; if(param->C <= 0) return "C <= 0"; if(param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC_DUAL && param->solver_type != L2R_L2LOSS_SVC && param->solver_type != L2R_L1LOSS_SVC_DUAL && param->solver_type != MCSVM_CS && param->solver_type != L1R_L2LOSS_SVC && param->solver_type != L1R_LR && param->solver_type != L2R_LR_DUAL) return "unknown solver type"; return NULL; } int check_probability_model(const struct model *model_) { return (model_->param.solver_type==L2R_LR || model_->param.solver_type==L2R_LR_DUAL || model_->param.solver_type==L1R_LR); } void set_print_string_function(void (*print_func)(const char*)) { if (print_func == NULL) liblinear_print_string = &print_string_stdout; else liblinear_print_string = print_func; }