Commit a6b85d09 by liuzhangyiding

增加参数实现准确率识别,对应修改处理流程

parent d80c569e
......@@ -13,13 +13,13 @@
1.0
2.0
2.0
1.0
2.0
2.0
1.0
2.0
2.0
1.0
2.0
2.0
1.0
2.0
2.0
......@@ -2,30 +2,33 @@ svm_type c_svc
kernel_type rbf
gamma 0.07142857142857142
nr_class 2
total_sv 22
rho 0.3544510261690456
total_sv 23
rho 0.4090245608696046
label 1 2
nr_sv 10 12
probA -1.6128218314376728
probB 0.14720533163949925
nr_sv 11 12
SV
1.0 9:3.0588936890535687
1.0 8:1.5294468445267844 3:0.6609640474436812
0.9903141391672972 3:1.3219280948873624
1.0 3:1.3219280948873624
1.0 3:1.3219280948873624
1.0 3:1.3219280948873624
1.0 10:3.6438561897747253
1.0 8:1.019631229684523 7:1.019631229684523 3:0.44064269829578745
1.0 13:1.8219280948873626 9:1.5294468445267844
1.0 5:3.6438561897747253
-0.6700922884685501 14:2.643856189774725
-0.2563835238113437 11:3.0588936890535687
1.0 8:0.979533151778619 7:0.979533151778619 3:0.4005446203898835
1.0 13:1.7617809780285065 9:1.4692997276679285
0.3704443587292751 3:1.2016338611696504
1.0 5:3.523561956057013
1.0 3:1.2016338611696504
1.0 10:3.523561956057013
1.0 3:1.2016338611696504
1.0 8:1.4692997276679285 3:0.6008169305848252
1.0 3:1.2016338611696504
1.0 3:1.2016338611696504
1.0 9:2.938599455335857
-0.34763255797023584 11:2.938599455335857
-1.0 14:1.2617809780285065 12:1.7617809780285065
-0.27060537603115437 4:2.523561956057013
-0.9876261200717943 11:2.938599455335857
-1.0
-1.0 2:3.6438561897747253
-0.07237134131172171 4:2.643856189774725
-1.0 4:1.3219280948873624 7:1.5294468445267844
-1.0 1:1.8219280948873626 14:1.3219280948873624
-0.9914669855756817 11:3.0588936890535687
-1.0 14:1.3219280948873624 12:1.8219280948873626
-1.0 1:1.7617809780285065 14:1.2617809780285065
-1.0 2:3.523561956057013
-1.0
-1.0 4:2.643856189774725
-1.0 6:3.6438561897747253
-1.0 4:1.2617809780285065 7:1.4692997276679285
-0.8144378826386051 14:2.523561956057013
-1.0 6:3.523561956057013
-0.9501424220174856 4:2.523561956057013
1 9:3.0588936890535687
1 8:1.5294468445267844 3:0.6609640474436812
1 3:1.3219280948873624
1 3:1.3219280948873624
1 3:1.3219280948873624
1 3:1.3219280948873624
1 3:1.3219280948873624
1 3:1.3219280948873624
1 3:1.3219280948873624
1 10:3.6438561897747253
1 8:1.019631229684523 7:1.019631229684523 3:0.44064269829578745
1 13:1.8219280948873626 9:1.5294468445267844
1 5:3.6438561897747253
2 14:2.643856189774725
2 11:3.0588936890535687
1 8:0.979533151778619 7:0.979533151778619 3:0.4005446203898835
1 3:1.2016338611696504
1 13:1.7617809780285065 9:1.4692997276679285
1 3:1.2016338611696504
1 3:1.2016338611696504
1 5:3.523561956057013
1 3:1.2016338611696504
1 10:3.523561956057013
1 3:1.2016338611696504
1 8:1.4692997276679285 3:0.6008169305848252
1 3:1.2016338611696504
1 3:1.2016338611696504
1 9:2.938599455335857
2 11:2.938599455335857
2 14:1.2617809780285065 12:1.7617809780285065
2 4:2.523561956057013
2 11:2.938599455335857
2
2 2:3.6438561897747253
2 4:2.643856189774725
2 4:1.3219280948873624 7:1.5294468445267844
2 1:1.8219280948873626 14:1.3219280948873624
2 11:3.0588936890535687
2 14:1.3219280948873624 12:1.8219280948873626
2 1:1.7617809780285065 14:1.2617809780285065
2 2:3.523561956057013
2
2 4:2.643856189774725
2 6:3.6438561897747253
2 4:1.2617809780285065 7:1.4692997276679285
2 14:2.523561956057013
2 6:3.523561956057013
2 4:2.523561956057013
......@@ -105,21 +105,21 @@ public class IdentifyService {
if(matchedSkills.size()==0){
matchedSkills = matchSkillByRegex(formattext,matchedSkills);
}
Skill matchedSkill = null;
List wordList = WordUtil.getNlpSeg(text);
//TODO 匹配到一个的话直接确定意图,否则用模型进行二次识别。识别模型目前必定识别出结果,无概率计算。
if((matchedSkills.size()==1)){
matchedSkill = matchedSkills.get(0);
FakeLog.log("match only one skill:"+matchedSkill.getName() );
}else if(matchedSkills.size()>1){
String identifyBySVMResult = SVMService.predict(wordList);
matchedSkill = StringUtils.isEmpty(identifyBySVMResult)?null:skillList.getSkillByName(identifyBySVMResult);
}
//直接进行模型测试
// Skill matchedSkill = null;
// List wordList = WordUtil.getNlpSeg(text);
// //TODO 匹配到一个的话直接确定意图,否则用模型进行二次识别。识别模型目前必定识别出结果,无概率计算。
// if((matchedSkills.size()==1)){
// matchedSkill = matchedSkills.get(0);
// FakeLog.log("match only one skill:"+matchedSkill.getName() );
// }else if(matchedSkills.size()>1){
// String identifyBySVMResult = SVMService.predict(wordList);
// matchedSkill = StringUtils.isEmpty(identifyBySVMResult)?null:skillList.getSkillByName(identifyBySVMResult);
// }
//直接进行模型测试
Skill matchedSkill = null;
List wordList = WordUtil.getNlpSeg(text);
String identifyBySVMResult = SVMService.predict(wordList);
matchedSkill = StringUtils.isEmpty(identifyBySVMResult)?null:skillList.getSkillByName(identifyBySVMResult);
if(matchedSkill == null){
FakeLog.log("无法识别:"+text);
......
package com.xoado.domain.service;
import com.xoado.domain.svm.comMain;
import com.xoado.domain.util.FakeLog;
import com.xoado.domain.util.WordUtil;
import libsvm.svm;
import libsvm.svm_model;
......@@ -70,7 +71,7 @@ public class SVMService {
labelNumberMap.put(Integer.parseInt(numberAndLabelName[0]),numberAndLabelName[1]);
}
br.close();
System.out.println(labelNumberMap);
FakeLog.log(labelNumberMap.toString());
} catch (Exception e) {
System.err.println("read errors :" + e);
}
......@@ -83,7 +84,7 @@ public class SVMService {
String[] numberAndLabelName = lineTxt.split("\\s+");
wordAndNumberMap.put(numberAndLabelName[0],numberAndLabelName[1]);
}
System.out.println(wordAndNumberMap);
FakeLog.log(wordAndNumberMap.toString());
br.close();
} catch (Exception e) {
System.err.println("read errors :" + e);
......@@ -109,11 +110,13 @@ public class SVMService {
if(!modelIsUseful){
return "";
}
System.out.println(wordList);
//System.out.println(labelNumberMap);
//System.out.println(wordAndNumberMap);
FakeLog.log(wordList.toString());
//FakeLog.log(labelNumberMap);
//FakeLog.log(wordAndNumberMap);
StringBuffer sb = new StringBuffer();
sb.append("1 ");
//label的值
sb.append("");
int countExistWord = 0;
for(Object word :wordList){
String[] words = String.valueOf(word).split("/");
if(words.length == 2) {
......@@ -121,11 +124,17 @@ public class SVMService {
String lexicalCategory = words[1];
if(wordAndNumberMap.get(wordWithOutlexicalCategory)!=null) {
sb.append(wordAndNumberMap.get(wordWithOutlexicalCategory)).append(":").append("1").append(" ");
countExistWord++;
}
}
}
//完全不认识的内容直接抛出
if(countExistWord==0){
return null;
}
int i, predict_probability=0;
//predict_probability为1时支持准确率识别
int i, predict_probability=1;
if(predict_probability == 1)
{
if(svm.svm_check_probability_model(model)==0)
......@@ -137,23 +146,34 @@ public class SVMService {
else {
if(svm.svm_check_probability_model(model)!=0)
{
System.out.println("Model supports probability estimates, but disabled in prediction.\n");
FakeLog.log("Model supports probability estimates, but disabled in prediction.\n");
}
}
System.out.println("转码后数据:" +sb.toString());
String predictResult = "";
FakeLog.log("转码后数据:" +sb.toString());
Map.Entry<Integer,Double> predictResult = null;
try {
predictResult = predict(sb.toString(), model, predict_probability);
} catch (IOException e) {
e.printStackTrace();
}
return predictResult;
//TODO 匹配度默认值暂时定为0.5
String result = null;
//不考虑匹配度的话直接返回匹配到的意图
if(predictResult.getValue()==null){
result = labelNumberMap.get(predictResult.getKey());
}else{
if(predictResult.getValue()>0.5){
result = labelNumberMap.get(predictResult.getKey());
}
}
//完全匹配不到的话,返回null
return result;
}
public static void main(String[] args) throws IOException, InterruptedException {
//更新训练模型
boolean rebuildModel = true;
boolean rebuildModel = false;
if(rebuildModel){
modelIsUseful = false;
deleteFile("model\\train.txt","model\\model.txt","model\\TestResult.txt","model\\train.txt","model\\terms.txt","model\\labels.txt");
......@@ -168,15 +188,15 @@ public class SVMService {
modelIsUseful = true;
}
//String testText = "我想打车去大雁塔";
String testText = "我想打车去吃外卖";
//String testText = "我要坐飞机去北京";
String testText = "帮我把空调打开";
//String testText = "帮我把空调打开";
//String testText = "给我叫个外卖";
FakeLog.log( "待预测语料:"+testText);
List wordList = WordUtil.getNlpSeg(testText);
System.out.println(wordList);
//System.out.println(labelNumberMap);
//System.out.println(wordAndNumberMap);
FakeLog.log(wordList.toString());
//FakeLog.log(labelNumberMap);
//FakeLog.log(wordAndNumberMap);
StringBuffer sb = new StringBuffer();
sb.append("-1 ");
for(Object word :wordList){
......@@ -191,7 +211,7 @@ public class SVMService {
}
String model_file = "model//model.txt";
int i, predict_probability=0;
int i, predict_probability=1;
try
{
......@@ -213,11 +233,10 @@ public class SVMService {
{
if(svm.svm_check_probability_model(model)!=0)
{
System.out.println("Model supports probability estimates, but disabled in prediction.\n");
FakeLog.log("Model supports probability estimates, but disabled in prediction.\n");
}
}
System.out.println("待预测语料:"+testText);
System.out.println("转码后数据:" +sb.toString());
FakeLog.log("转码后数据:" +sb.toString());
predict(sb.toString(),model,predict_probability);
}
catch(FileNotFoundException e)
......@@ -242,7 +261,7 @@ public class SVMService {
return Integer.parseInt(s);
}
private static String predict(String context,svm_model model, int predict_probability) throws IOException
private static Map.Entry<Integer,Double> predict(String context, svm_model model, int predict_probability) throws IOException
{
String predictResult = "";
int correct = 0;
......@@ -258,7 +277,7 @@ public class SVMService {
if(svm_type == svm_parameter.EPSILON_SVR ||
svm_type == svm_parameter.NU_SVR)
{
System.out.println("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+svm.svm_get_svr_probability(model)+"\n");
FakeLog.log("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+svm.svm_get_svr_probability(model)+"\n");
}
else
{
......@@ -269,9 +288,9 @@ public class SVMService {
// for(int j=0;j<nr_class;j++)
// output.writeBytes(" "+labels[j]);
// output.writeBytes("\n");
for(int j=0;j<nr_class;j++){
System.out.println("labels[j]:"+labels[j]);
}
// for(int j=0;j<nr_class;j++){
// FakeLog.log("labels[j]:"+labels[j]);
// }
}
}
......@@ -289,48 +308,54 @@ public class SVMService {
x[j].value = atof(st.nextToken());
}
Map.Entry<Integer,Double> labelNumberAndEstimate = null;
double predict_label;
if (predict_probability==1 && (svm_type==svm_parameter.C_SVC || svm_type==svm_parameter.NU_SVC))
{
predict_label = svm.svm_predict_probability(model,x,prob_estimates);
int key = (int) predict_label;
System.out.println("predict_label:"+ labelNumberMap.get(key));
FakeLog.log("predict_label:"+ labelNumberMap.get(key));
labelNumberAndEstimate = new AbstractMap.SimpleEntry<>(key,prob_estimates[key-1]);
predictResult = labelNumberMap.get(key);
FakeLog.log("Detailed suitability:");
for(int j=0;j<nr_class;j++) {
System.out.println(prob_estimates[j] + " ");
FakeLog.log(labelNumberMap.get(j+1) + ":"+prob_estimates[j]);
}
}
else
{
predict_label = svm.svm_predict(model,x);
int key = (int) predict_label;
System.out.println("predict_label:"+ labelNumberMap.get(key));
FakeLog.log("predict_label:"+ labelNumberMap.get(key));
predictResult = labelNumberMap.get(key);
}
if(predict_label == target_label)
++correct;
error += (predict_label-target_label)*(predict_label-target_label);
sump += predict_label;
sumt += target_label;
sumpp += predict_label*predict_label;
sumtt += target_label*target_label;
sumpt += predict_label*target_label;
if(svm_type == svm_parameter.EPSILON_SVR ||
svm_type == svm_parameter.NU_SVR)
{
System.out.println("Mean squared error = "+error/total+" (regression)\n");
System.out.println("Squared correlation coefficient = "+
((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))+
" (regression)\n");
}
else {
System.out.println("Accuracy = " + (double) correct / total * 100 +
"% (" + correct + "/" + total + ") (classification)\n");
}
return predictResult;
labelNumberAndEstimate = new AbstractMap.SimpleEntry<>(key,null);
}
// if(predict_label == target_label)
// ++correct;
// error += (predict_label-target_label)*(predict_label-target_label);
// sump += predict_label;
// sumt += target_label;
// sumpp += predict_label*predict_label;
// sumtt += target_label*target_label;
// sumpt += predict_label*target_label;
//这打的是啥日志啊
// if(svm_type == svm_parameter.EPSILON_SVR ||
// svm_type == svm_parameter.NU_SVR)
// {
// FakeLog.log("Mean squared error = "+error/total+" (regression)\n");
// FakeLog.log("Squared correlation coefficient = "+
// ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
// ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))+
// " (regression)\n");
// }
// else {
// FakeLog.log("Accuracy = " + (double) correct / total * 100 +
// "% (" + correct + "/" + total + ") (classification)\n");
// }
return labelNumberAndEstimate;
}
}
......@@ -16,7 +16,7 @@ public class comMain {
// TODO Auto-generated method stub
String[] arg = { "model\\train.txt", // 存放SVM训练模型用的数据的路径
String[] arg = { "-b","1", "model\\train.txt", // 存放SVM训练模型用的数据的路径
"model\\model.txt" }; // 存放SVM通过训练数据训/ //练出来的模型的路径
......
......@@ -116,13 +116,6 @@ class svm_train {
public static void main(String argv[]) throws IOException
{
String model_file = argv[0];
String output_file = argv[1];
argv = new String[4];
argv[0] = model_file;
argv[1] = output_file;
svm_train t = new svm_train();
t.run(argv);
}
......
......@@ -59,7 +59,7 @@ public class DocumentWordsCollector extends AbstractComponent {
int n = 1;
for(File file : files) {
String doc = file.getAbsolutePath();
//遍历文件里的每一行内容
//遍历文件里的每一行内容
BufferedReader br = null;
try {
br = new BufferedReader(
......
......@@ -13,8 +13,7 @@
"keyList": ["吃饭","外卖","科","吃"],
"regexeList": [
"^.*(叫|点|吃).*$",
"^.*来.*碗.*$",
"^.*(科技).*$"
"^.*来.*碗.*$"
]
}
]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment