Commit a6b85d09 by liuzhangyiding

增加参数实现准确率识别,对应修改处理流程

parent d80c569e
...@@ -13,13 +13,13 @@ ...@@ -13,13 +13,13 @@
1.0 1.0
2.0 2.0
2.0 2.0
1.0
2.0 2.0
2.0 2.0
1.0
2.0 2.0
2.0 2.0
1.0
2.0 2.0
2.0 2.0
1.0
2.0 2.0
2.0 2.0
...@@ -2,30 +2,33 @@ svm_type c_svc ...@@ -2,30 +2,33 @@ svm_type c_svc
kernel_type rbf kernel_type rbf
gamma 0.07142857142857142 gamma 0.07142857142857142
nr_class 2 nr_class 2
total_sv 22 total_sv 23
rho 0.3544510261690456 rho 0.4090245608696046
label 1 2 label 1 2
nr_sv 10 12 probA -1.6128218314376728
probB 0.14720533163949925
nr_sv 11 12
SV SV
1.0 9:3.0588936890535687 1.0 8:0.979533151778619 7:0.979533151778619 3:0.4005446203898835
1.0 8:1.5294468445267844 3:0.6609640474436812 1.0 13:1.7617809780285065 9:1.4692997276679285
0.9903141391672972 3:1.3219280948873624 0.3704443587292751 3:1.2016338611696504
1.0 3:1.3219280948873624 1.0 5:3.523561956057013
1.0 3:1.3219280948873624 1.0 3:1.2016338611696504
1.0 3:1.3219280948873624 1.0 10:3.523561956057013
1.0 10:3.6438561897747253 1.0 3:1.2016338611696504
1.0 8:1.019631229684523 7:1.019631229684523 3:0.44064269829578745 1.0 8:1.4692997276679285 3:0.6008169305848252
1.0 13:1.8219280948873626 9:1.5294468445267844 1.0 3:1.2016338611696504
1.0 5:3.6438561897747253 1.0 3:1.2016338611696504
-0.6700922884685501 14:2.643856189774725 1.0 9:2.938599455335857
-0.2563835238113437 11:3.0588936890535687 -0.34763255797023584 11:2.938599455335857
-1.0 14:1.2617809780285065 12:1.7617809780285065
-0.27060537603115437 4:2.523561956057013
-0.9876261200717943 11:2.938599455335857
-1.0 -1.0
-1.0 2:3.6438561897747253 -1.0 1:1.7617809780285065 14:1.2617809780285065
-0.07237134131172171 4:2.643856189774725 -1.0 2:3.523561956057013
-1.0 4:1.3219280948873624 7:1.5294468445267844
-1.0 1:1.8219280948873626 14:1.3219280948873624
-0.9914669855756817 11:3.0588936890535687
-1.0 14:1.3219280948873624 12:1.8219280948873626
-1.0 -1.0
-1.0 4:2.643856189774725 -1.0 4:1.2617809780285065 7:1.4692997276679285
-1.0 6:3.6438561897747253 -0.8144378826386051 14:2.523561956057013
-1.0 6:3.523561956057013
-0.9501424220174856 4:2.523561956057013
1 9:3.0588936890535687 1 8:0.979533151778619 7:0.979533151778619 3:0.4005446203898835
1 8:1.5294468445267844 3:0.6609640474436812 1 3:1.2016338611696504
1 3:1.3219280948873624 1 13:1.7617809780285065 9:1.4692997276679285
1 3:1.3219280948873624 1 3:1.2016338611696504
1 3:1.3219280948873624 1 3:1.2016338611696504
1 3:1.3219280948873624 1 5:3.523561956057013
1 3:1.3219280948873624 1 3:1.2016338611696504
1 3:1.3219280948873624 1 10:3.523561956057013
1 3:1.3219280948873624 1 3:1.2016338611696504
1 10:3.6438561897747253 1 8:1.4692997276679285 3:0.6008169305848252
1 8:1.019631229684523 7:1.019631229684523 3:0.44064269829578745 1 3:1.2016338611696504
1 13:1.8219280948873626 9:1.5294468445267844 1 3:1.2016338611696504
1 5:3.6438561897747253 1 9:2.938599455335857
2 14:2.643856189774725 2 11:2.938599455335857
2 11:3.0588936890535687 2 14:1.2617809780285065 12:1.7617809780285065
2 4:2.523561956057013
2 11:2.938599455335857
2 2
2 2:3.6438561897747253 2 1:1.7617809780285065 14:1.2617809780285065
2 4:2.643856189774725 2 2:3.523561956057013
2 4:1.3219280948873624 7:1.5294468445267844
2 1:1.8219280948873626 14:1.3219280948873624
2 11:3.0588936890535687
2 14:1.3219280948873624 12:1.8219280948873626
2 2
2 4:2.643856189774725 2 4:1.2617809780285065 7:1.4692997276679285
2 6:3.6438561897747253 2 14:2.523561956057013
2 6:3.523561956057013
2 4:2.523561956057013
...@@ -105,21 +105,21 @@ public class IdentifyService { ...@@ -105,21 +105,21 @@ public class IdentifyService {
if(matchedSkills.size()==0){ if(matchedSkills.size()==0){
matchedSkills = matchSkillByRegex(formattext,matchedSkills); matchedSkills = matchSkillByRegex(formattext,matchedSkills);
} }
Skill matchedSkill = null;
List wordList = WordUtil.getNlpSeg(text);
//TODO 匹配到一个的话直接确定意图,否则用模型进行二次识别。识别模型目前必定识别出结果,无概率计算。
if((matchedSkills.size()==1)){
matchedSkill = matchedSkills.get(0);
FakeLog.log("match only one skill:"+matchedSkill.getName() );
}else if(matchedSkills.size()>1){
String identifyBySVMResult = SVMService.predict(wordList);
matchedSkill = StringUtils.isEmpty(identifyBySVMResult)?null:skillList.getSkillByName(identifyBySVMResult);
}
//直接进行模型测试
// Skill matchedSkill = null; // Skill matchedSkill = null;
// List wordList = WordUtil.getNlpSeg(text); // List wordList = WordUtil.getNlpSeg(text);
// String identifyBySVMResult = SVMService.predict(wordList); // //TODO 匹配到一个的话直接确定意图,否则用模型进行二次识别。识别模型目前必定识别出结果,无概率计算。
// matchedSkill = StringUtils.isEmpty(identifyBySVMResult)?null:skillList.getSkillByName(identifyBySVMResult); // if((matchedSkills.size()==1)){
// matchedSkill = matchedSkills.get(0);
// FakeLog.log("match only one skill:"+matchedSkill.getName() );
// }else if(matchedSkills.size()>1){
// String identifyBySVMResult = SVMService.predict(wordList);
// matchedSkill = StringUtils.isEmpty(identifyBySVMResult)?null:skillList.getSkillByName(identifyBySVMResult);
// }
//直接进行模型测试
Skill matchedSkill = null;
List wordList = WordUtil.getNlpSeg(text);
String identifyBySVMResult = SVMService.predict(wordList);
matchedSkill = StringUtils.isEmpty(identifyBySVMResult)?null:skillList.getSkillByName(identifyBySVMResult);
if(matchedSkill == null){ if(matchedSkill == null){
FakeLog.log("无法识别:"+text); FakeLog.log("无法识别:"+text);
......
package com.xoado.domain.service; package com.xoado.domain.service;
import com.xoado.domain.svm.comMain; import com.xoado.domain.svm.comMain;
import com.xoado.domain.util.FakeLog;
import com.xoado.domain.util.WordUtil; import com.xoado.domain.util.WordUtil;
import libsvm.svm; import libsvm.svm;
import libsvm.svm_model; import libsvm.svm_model;
...@@ -70,7 +71,7 @@ public class SVMService { ...@@ -70,7 +71,7 @@ public class SVMService {
labelNumberMap.put(Integer.parseInt(numberAndLabelName[0]),numberAndLabelName[1]); labelNumberMap.put(Integer.parseInt(numberAndLabelName[0]),numberAndLabelName[1]);
} }
br.close(); br.close();
System.out.println(labelNumberMap); FakeLog.log(labelNumberMap.toString());
} catch (Exception e) { } catch (Exception e) {
System.err.println("read errors :" + e); System.err.println("read errors :" + e);
} }
...@@ -83,7 +84,7 @@ public class SVMService { ...@@ -83,7 +84,7 @@ public class SVMService {
String[] numberAndLabelName = lineTxt.split("\\s+"); String[] numberAndLabelName = lineTxt.split("\\s+");
wordAndNumberMap.put(numberAndLabelName[0],numberAndLabelName[1]); wordAndNumberMap.put(numberAndLabelName[0],numberAndLabelName[1]);
} }
System.out.println(wordAndNumberMap); FakeLog.log(wordAndNumberMap.toString());
br.close(); br.close();
} catch (Exception e) { } catch (Exception e) {
System.err.println("read errors :" + e); System.err.println("read errors :" + e);
...@@ -109,11 +110,13 @@ public class SVMService { ...@@ -109,11 +110,13 @@ public class SVMService {
if(!modelIsUseful){ if(!modelIsUseful){
return ""; return "";
} }
System.out.println(wordList); FakeLog.log(wordList.toString());
//System.out.println(labelNumberMap); //FakeLog.log(labelNumberMap);
//System.out.println(wordAndNumberMap); //FakeLog.log(wordAndNumberMap);
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
sb.append("1 "); //label的值
sb.append("");
int countExistWord = 0;
for(Object word :wordList){ for(Object word :wordList){
String[] words = String.valueOf(word).split("/"); String[] words = String.valueOf(word).split("/");
if(words.length == 2) { if(words.length == 2) {
...@@ -121,11 +124,17 @@ public class SVMService { ...@@ -121,11 +124,17 @@ public class SVMService {
String lexicalCategory = words[1]; String lexicalCategory = words[1];
if(wordAndNumberMap.get(wordWithOutlexicalCategory)!=null) { if(wordAndNumberMap.get(wordWithOutlexicalCategory)!=null) {
sb.append(wordAndNumberMap.get(wordWithOutlexicalCategory)).append(":").append("1").append(" "); sb.append(wordAndNumberMap.get(wordWithOutlexicalCategory)).append(":").append("1").append(" ");
countExistWord++;
} }
} }
} }
//完全不认识的内容直接抛出
if(countExistWord==0){
return null;
}
int i, predict_probability=0; //predict_probability为1时支持准确率识别
int i, predict_probability=1;
if(predict_probability == 1) if(predict_probability == 1)
{ {
if(svm.svm_check_probability_model(model)==0) if(svm.svm_check_probability_model(model)==0)
...@@ -137,23 +146,34 @@ public class SVMService { ...@@ -137,23 +146,34 @@ public class SVMService {
else { else {
if(svm.svm_check_probability_model(model)!=0) if(svm.svm_check_probability_model(model)!=0)
{ {
System.out.println("Model supports probability estimates, but disabled in prediction.\n"); FakeLog.log("Model supports probability estimates, but disabled in prediction.\n");
} }
} }
System.out.println("转码后数据:" +sb.toString()); FakeLog.log("转码后数据:" +sb.toString());
String predictResult = ""; Map.Entry<Integer,Double> predictResult = null;
try { try {
predictResult = predict(sb.toString(), model, predict_probability); predictResult = predict(sb.toString(), model, predict_probability);
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
return predictResult; //TODO 匹配度默认值暂时定为0.5
String result = null;
//不考虑匹配度的话直接返回匹配到的意图
if(predictResult.getValue()==null){
result = labelNumberMap.get(predictResult.getKey());
}else{
if(predictResult.getValue()>0.5){
result = labelNumberMap.get(predictResult.getKey());
}
}
//完全匹配不到的话,返回null
return result;
} }
public static void main(String[] args) throws IOException, InterruptedException { public static void main(String[] args) throws IOException, InterruptedException {
//更新训练模型 //更新训练模型
boolean rebuildModel = true; boolean rebuildModel = false;
if(rebuildModel){ if(rebuildModel){
modelIsUseful = false; modelIsUseful = false;
deleteFile("model\\train.txt","model\\model.txt","model\\TestResult.txt","model\\train.txt","model\\terms.txt","model\\labels.txt"); deleteFile("model\\train.txt","model\\model.txt","model\\TestResult.txt","model\\train.txt","model\\terms.txt","model\\labels.txt");
...@@ -168,15 +188,15 @@ public class SVMService { ...@@ -168,15 +188,15 @@ public class SVMService {
modelIsUseful = true; modelIsUseful = true;
} }
//String testText = "我想打车去大雁塔"; String testText = "我想打车去吃外卖";
//String testText = "我要坐飞机去北京"; //String testText = "我要坐飞机去北京";
String testText = "帮我把空调打开"; //String testText = "帮我把空调打开";
//String testText = "给我叫个外卖"; //String testText = "给我叫个外卖";
FakeLog.log( "待预测语料:"+testText);
List wordList = WordUtil.getNlpSeg(testText); List wordList = WordUtil.getNlpSeg(testText);
System.out.println(wordList); FakeLog.log(wordList.toString());
//System.out.println(labelNumberMap); //FakeLog.log(labelNumberMap);
//System.out.println(wordAndNumberMap); //FakeLog.log(wordAndNumberMap);
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
sb.append("-1 "); sb.append("-1 ");
for(Object word :wordList){ for(Object word :wordList){
...@@ -191,7 +211,7 @@ public class SVMService { ...@@ -191,7 +211,7 @@ public class SVMService {
} }
String model_file = "model//model.txt"; String model_file = "model//model.txt";
int i, predict_probability=0; int i, predict_probability=1;
try try
{ {
...@@ -213,11 +233,10 @@ public class SVMService { ...@@ -213,11 +233,10 @@ public class SVMService {
{ {
if(svm.svm_check_probability_model(model)!=0) if(svm.svm_check_probability_model(model)!=0)
{ {
System.out.println("Model supports probability estimates, but disabled in prediction.\n"); FakeLog.log("Model supports probability estimates, but disabled in prediction.\n");
} }
} }
System.out.println("待预测语料:"+testText); FakeLog.log("转码后数据:" +sb.toString());
System.out.println("转码后数据:" +sb.toString());
predict(sb.toString(),model,predict_probability); predict(sb.toString(),model,predict_probability);
} }
catch(FileNotFoundException e) catch(FileNotFoundException e)
...@@ -242,7 +261,7 @@ public class SVMService { ...@@ -242,7 +261,7 @@ public class SVMService {
return Integer.parseInt(s); return Integer.parseInt(s);
} }
private static String predict(String context,svm_model model, int predict_probability) throws IOException private static Map.Entry<Integer,Double> predict(String context, svm_model model, int predict_probability) throws IOException
{ {
String predictResult = ""; String predictResult = "";
int correct = 0; int correct = 0;
...@@ -258,7 +277,7 @@ public class SVMService { ...@@ -258,7 +277,7 @@ public class SVMService {
if(svm_type == svm_parameter.EPSILON_SVR || if(svm_type == svm_parameter.EPSILON_SVR ||
svm_type == svm_parameter.NU_SVR) svm_type == svm_parameter.NU_SVR)
{ {
System.out.println("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+svm.svm_get_svr_probability(model)+"\n"); FakeLog.log("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+svm.svm_get_svr_probability(model)+"\n");
} }
else else
{ {
...@@ -269,9 +288,9 @@ public class SVMService { ...@@ -269,9 +288,9 @@ public class SVMService {
// for(int j=0;j<nr_class;j++) // for(int j=0;j<nr_class;j++)
// output.writeBytes(" "+labels[j]); // output.writeBytes(" "+labels[j]);
// output.writeBytes("\n"); // output.writeBytes("\n");
for(int j=0;j<nr_class;j++){ // for(int j=0;j<nr_class;j++){
System.out.println("labels[j]:"+labels[j]); // FakeLog.log("labels[j]:"+labels[j]);
} // }
} }
} }
...@@ -289,48 +308,54 @@ public class SVMService { ...@@ -289,48 +308,54 @@ public class SVMService {
x[j].value = atof(st.nextToken()); x[j].value = atof(st.nextToken());
} }
Map.Entry<Integer,Double> labelNumberAndEstimate = null;
double predict_label; double predict_label;
if (predict_probability==1 && (svm_type==svm_parameter.C_SVC || svm_type==svm_parameter.NU_SVC)) if (predict_probability==1 && (svm_type==svm_parameter.C_SVC || svm_type==svm_parameter.NU_SVC))
{ {
predict_label = svm.svm_predict_probability(model,x,prob_estimates); predict_label = svm.svm_predict_probability(model,x,prob_estimates);
int key = (int) predict_label; int key = (int) predict_label;
System.out.println("predict_label:"+ labelNumberMap.get(key)); FakeLog.log("predict_label:"+ labelNumberMap.get(key));
labelNumberAndEstimate = new AbstractMap.SimpleEntry<>(key,prob_estimates[key-1]);
predictResult = labelNumberMap.get(key); predictResult = labelNumberMap.get(key);
FakeLog.log("Detailed suitability:");
for(int j=0;j<nr_class;j++) { for(int j=0;j<nr_class;j++) {
System.out.println(prob_estimates[j] + " "); FakeLog.log(labelNumberMap.get(j+1) + ":"+prob_estimates[j]);
} }
} }
else else
{ {
predict_label = svm.svm_predict(model,x); predict_label = svm.svm_predict(model,x);
int key = (int) predict_label; int key = (int) predict_label;
System.out.println("predict_label:"+ labelNumberMap.get(key)); FakeLog.log("predict_label:"+ labelNumberMap.get(key));
predictResult = labelNumberMap.get(key); predictResult = labelNumberMap.get(key);
labelNumberAndEstimate = new AbstractMap.SimpleEntry<>(key,null);
} }
if(predict_label == target_label) // if(predict_label == target_label)
++correct; // ++correct;
error += (predict_label-target_label)*(predict_label-target_label); // error += (predict_label-target_label)*(predict_label-target_label);
sump += predict_label; // sump += predict_label;
sumt += target_label; // sumt += target_label;
sumpp += predict_label*predict_label; // sumpp += predict_label*predict_label;
sumtt += target_label*target_label; // sumtt += target_label*target_label;
sumpt += predict_label*target_label; // sumpt += predict_label*target_label;
if(svm_type == svm_parameter.EPSILON_SVR || //这打的是啥日志啊
svm_type == svm_parameter.NU_SVR) // if(svm_type == svm_parameter.EPSILON_SVR ||
{ // svm_type == svm_parameter.NU_SVR)
System.out.println("Mean squared error = "+error/total+" (regression)\n"); // {
System.out.println("Squared correlation coefficient = "+ // FakeLog.log("Mean squared error = "+error/total+" (regression)\n");
((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ // FakeLog.log("Squared correlation coefficient = "+
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))+ // ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
" (regression)\n"); // ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))+
} // " (regression)\n");
else { // }
System.out.println("Accuracy = " + (double) correct / total * 100 + // else {
"% (" + correct + "/" + total + ") (classification)\n"); // FakeLog.log("Accuracy = " + (double) correct / total * 100 +
} // "% (" + correct + "/" + total + ") (classification)\n");
return predictResult; // }
return labelNumberAndEstimate;
} }
} }
...@@ -16,7 +16,7 @@ public class comMain { ...@@ -16,7 +16,7 @@ public class comMain {
// TODO Auto-generated method stub // TODO Auto-generated method stub
String[] arg = { "model\\train.txt", // 存放SVM训练模型用的数据的路径 String[] arg = { "-b","1", "model\\train.txt", // 存放SVM训练模型用的数据的路径
"model\\model.txt" }; // 存放SVM通过训练数据训/ //练出来的模型的路径 "model\\model.txt" }; // 存放SVM通过训练数据训/ //练出来的模型的路径
......
...@@ -116,13 +116,6 @@ class svm_train { ...@@ -116,13 +116,6 @@ class svm_train {
public static void main(String argv[]) throws IOException public static void main(String argv[]) throws IOException
{ {
String model_file = argv[0];
String output_file = argv[1];
argv = new String[4];
argv[0] = model_file;
argv[1] = output_file;
svm_train t = new svm_train(); svm_train t = new svm_train();
t.run(argv); t.run(argv);
} }
......
...@@ -59,7 +59,7 @@ public class DocumentWordsCollector extends AbstractComponent { ...@@ -59,7 +59,7 @@ public class DocumentWordsCollector extends AbstractComponent {
int n = 1; int n = 1;
for(File file : files) { for(File file : files) {
String doc = file.getAbsolutePath(); String doc = file.getAbsolutePath();
//遍历文件里的每一行内容 //遍历文件里的每一行内容
BufferedReader br = null; BufferedReader br = null;
try { try {
br = new BufferedReader( br = new BufferedReader(
......
...@@ -13,8 +13,7 @@ ...@@ -13,8 +13,7 @@
"keyList": ["吃饭","外卖","科","吃"], "keyList": ["吃饭","外卖","科","吃"],
"regexeList": [ "regexeList": [
"^.*(叫|点|吃).*$", "^.*(叫|点|吃).*$",
"^.*来.*碗.*$", "^.*来.*碗.*$"
"^.*(科技).*$"
] ]
} }
] ]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment