C. R. A. P.
(Certain Research Articles & Programs)
I am currently working with neural networks, mainly feed-forward models (FFNNs).
I am trying to put several FFNNs together and construct a network of neural networks (entity).
If you are interested on the subject I have a report (account of my work, see below),
some papers and a program to help you create, train and test FFNN entities.
BTW, I got my PhD in 2000.
PhD Thesis
Articles
Other documents
Computer programs
Data
np script language : training and testing a neural network example
top
First, use
this bash script
to produce the training and test data (it simulates the function of two inputs, x/(4y) for x and y
between 0 and 4, the output is between 0 and 1. A different function can be used instead, see the line starting with $out):
#!/bin/bash
num_training_lines=2000
num_test_lines=3000
# nothing to change from here on
total_lines=$((num_training_lines+num_test_lines))
perl -e '
srand(1981);
for($i=0;$i<'${total_lines}';$i++){
$x = rand(3) + 1;
$y = rand(3) + 1;
$out = $x / (4*$y);
print "$x\t$y\t$out\n";
}' > data
head -${num_training_lines} data > train
tail -${num_test_lines} data > test
Once the training / test data (in files 'train' and 'test' in your local directory)
have been produced by running the above script, and provided that you have installed
successfully both NNengine and NeuralParser, then run the following
np script
A_TRAIN_FILE = OpenFileObject { Filename = train; }
A_TEST_FILE = OpenFileObject { Filename = test; }
A_TRAIN_INPUTS = ExtractColumnsFromObject { Columns = A_TRAIN_FILE[1..2]; }
A_TEST_INPUTS = ExtractColumnsFromObject { Columns = A_TEST_FILE[1..2]; }
FFNN = CreateSingle {
SingleType = FFNN;
Arch = 2,5,1;
Weights = W_SINGLE;
Sigmoid = Yes;
}
TrainSingle {
Obj = FFNN;
InpFileObj = A_TRAIN_FILE;
Iters = 3000;
Beta = 0.015;
Lamda = 0.0;
Seed = 1234;
TrainingType = Continuous;
SaveWeightsEveryNIterations = 25;
ShowProgressEveryNIterations = 25;
ProgressFilename = NNengineProgress;
UniqueWeightsFile = no;
Silent = Yes;
PIDfilename = nnengine.pid;
}
# assessment
# known error, on the training set
TestSingle {
Obj = FFNN;
InpFileObj = A_TRAIN_INPUTS;
OutFileName = final_output.train;
}
EXPECTED_OUTPUT_KNOWN = ExtractColumnsFromObject { Columns = A_TRAIN_FILE[3]; }
ACTUAL_OUTPUT_KNOWN = OpenFileObject { Filename = final_output.train; }
ERROR_KNOWN = ColumnsArithmetic {
RowExpr = 0.5*(ACTUAL_OUTPUT_KNOWN[1]-EXPECTED_OUTPUT_KNOWN[1])**2;
ColExpr = sqrt, sum;
OutFileName = error_known;
}
ERRORPERCENT_KNOWN = ColumnsArithmetic {
RowExpr = (abs(ACTUAL_OUTPUT_KNOWN[1]-EXPECTED_OUTPUT_KNOWN[1])/(max(abs(ACTUAL_OUTPUT_KNOWN[1]),abs(EXPECTED_OUTPUT_KNOWN[1]))));
# count all the mismatches and divide
# by the number of rows in the file to get percent of mismatches
# i.e. 25 means 25 % mismatches
ColExpr = average, 100.0 *;
OutFileName = error_known_percent;
}
# unknown error, on the test set
TestSingle {
Obj = FFNN;
InpFileObj = A_TEST_INPUTS;
OutFileName = final_output.test;
}
EXPECTED_OUTPUT_UNKNOWN = ExtractColumnsFromObject { Columns = A_TEST_FILE[3]; }
ACTUAL_OUTPUT_UNKNOWN = OpenFileObject { Filename = final_output.test; }
# below, the RowExpr, means do that for each row - when all rows are processed, then execute
# the ColExpr over all the numbers, below is a root mean square error
ERROR_UNKNOWN = ColumnsArithmetic {
RowExpr = 0.5*(ACTUAL_OUTPUT_UNKNOWN[1]-EXPECTED_OUTPUT_UNKNOWN[1])**2;
ColExpr = sqrt, sum;
OutFileName = error_unknown;
}
# this is somewhat controversial, as it tries to calculate a 'percent error' as follows:
# for each expected,actual output pair do:
# abs(exp-act)/abs(exp)
# and then find the average of all these numbers and multiply by 100 to get percent.
ERRORPERCENT_UNKNOWN = ColumnsArithmetic {
# RowExpr = (abs(ACTUAL_OUTPUT_UNKNOWN[1]-EXPECTED_OUTPUT_UNKNOWN[1])/(min(abs(ACTUAL_OUTPUT_UNKNOWN[1]),abs(EXPECTED_OUTPUT_UNKNOWN[1]))));
RowExpr = (abs(ACTUAL_OUTPUT_UNKNOWN[1]-EXPECTED_OUTPUT_UNKNOWN[1])/abs(EXPECTED_OUTPUT_UNKNOWN[1]));
ColExpr = average, 100.0 *;
OutFileName = error_unknown_percent;
}
DeleteObjects {
Obj = A_TEST_INPUTS, A_TRAIN_INPUTS, EXPECTED_OUTPUT_KNOWN, EXPECTED_OUTPUT_UNKNOWN;
Unlink = Yes;
}
$
np script language : training and testing a Support Vector Machine example
top
Below is an example on how to use Support Vector Machine (SVM) to learn a boolean function.
First, use
this bash script
to produce the training and test data - it is a boolean function on 6 inputs - this is a stupid example really.
#!/bin/bash
num_training_lines=40
num_test_lines=3000
# nothing to change from here on
total_lines=$((num_training_lines+num_test_lines))
perl -e '
srand(1981);
@inps=(0)x6;
for($i=0;$i<'${total_lines}';$i++){
for($j=0;$j<6;$j++){ $inps[$j] = int(rand()*10000000000) % 2 }
$out = boolean_function(@inps);
print join("\t", @inps)."\t$out\n";
}
sub boolean_function {
return (($_[0] && $_[1]) || ($_[1] && $_[3])) && ($_[4] || $_[5] || $_[0]) ? "1" : "0";
}
' > data
head -${num_training_lines} data > train
tail -${num_test_lines} data > test
Once the training / test data (in files 'train' and 'test' in your local directory) have
been produced by running the above script, and provided that you have installed successfully
both NNengine and NeuralParser, then run the following
np script
A_TRAIN_FILE_TAB = OpenFileObject { Filename = train; }
A_TEST_FILE_TAB = OpenFileObject { Filename = test; }
# we need to convert the tab-separated (one input per column, input columns followed by output columns)
# data files to libSVM format using this command:
ConvertColumnDataToLIBSVMFormat {
InpFileObj = A_TRAIN_FILE_TAB;
OutFileName = train.svm;
OutputsFirst = no;
NumInputs = 6;
NumOutputs = 1;
}
ConvertColumnDataToLIBSVMFormat {
InpFileObj = A_TEST_FILE_TAB;
OutFileName = test.svm;
OutputsFirst = no;
NumInputs = 6;
NumOutputs = 1;
}
A_TRAIN_FILE = OpenFileObject { Filename = train.svm; }
A_TEST_FILE = OpenFileObject { Filename = test.svm; }
SVM = CreateSVM {
Model = svm_model;
ProabilityEstimates = no;
}
SVP = CreateSVMTrainingParameters {
Degree = 5;
Kernel = 2; # radial basis
Cachesize = 50;
}
# now, SVM is weird in that the running parameters are CRUCIAL in its performance,
# therefore we are going to spend a lot of time finding these parameters using this command:
FindOptimalSVMTrainingParameters {
ParamsObj = SVP;
InpFileObj = A_TRAIN_FILE;
SaveParamsObjName = SVPoptimal;
NumThreads = 2; # parallelise in 2 threads if you have multi-core cpu
NFoldValidation = 0;
ExploreAtMostNEqualSolutions = 2;
MaxResolution = 0.1;
# min,max,numSteps
RangeCost = -3,15,2;
RangeGamma = -5,12,2;
# use per-class mean accuracy
PCMCriterion = yes;
MaxDepth = 6;
MaxAccuracy = 100.0;
MinRateOfChangeOfAccuracy = -1;
MinRateOfRateOfChangeOfAccuracy = -1;
OutputFileName = svm.optimal_parameters;
Verbose = yes;
}
# now train without crossvalidation so as to get a model out
TrainSVM {
Obj = SVM;
InpFileObj = A_TRAIN_FILE;
ParamsObj = SVPoptimal;
Overwrite = Yes;
}
# assessment
# known error, on the training set
TestSVM {
Obj = SVM;
InpFileObj = A_TRAIN_FILE;
OutFileName = final_output.train;
}
EXPECTED_OUTPUT_KNOWN = ExtractColumnsFromObject { Columns = A_TRAIN_FILE_TAB[7]; }
ACTUAL_OUTPUT_KNOWN = OpenFileObject { Filename = final_output.train; }
ERROR_KNOWN = ColumnsArithmetic {
RowExpr = (inrange(ACTUAL_OUTPUT_KNOWN[1],0.0,0.5,0.0,1.0)!=inrange(EXPECTED_OUTPUT_KNOWN[1],0.0,0.5,0.0,1.0));
ColExpr = sqrt, sum;
OutFileName = error_known;
}
ERRORPERCENT_KNOWN = ColumnsArithmetic {
RowExpr = (inrange(ACTUAL_OUTPUT_KNOWN[1],0.0,0.5,0.0,1.0)!=inrange(EXPECTED_OUTPUT_KNOWN[1],0.0,0.5,0.0,1.0));
# count all the mismatches and divide
# by the number of rows in the file to get percent of mismatches
# i.e. 25 means 25 % mismatches
ColExpr = average, 100.0 *;
OutFileName = error_known_percent;
}
TestSVM {
Obj = SVM;
InpFileObj = A_TEST_FILE;
OutFileName = final_output.test;
}
EXPECTED_OUTPUT_UNKNOWN = ExtractColumnsFromObject { Columns = A_TEST_FILE_TAB[7]; }
ACTUAL_OUTPUT_UNKNOWN = OpenFileObject { Filename = final_output.test; }
# an error is a discrepancy between expected and actual outputs for the given output column
# if you have binary (two classes 0,1) outputs then error expression can be
# ACTUAL_OUTPUT[1]==EXPECTED_OUTPUT[1]
# (((ACTUAL_OUTPUT[1]<0.5)&&(EXPECTED_OUTPUT[1]<0.5))||((ACTUAL_OUTPUT[1]>=0.5)&&(EXPECTED_OUTPUT[1]>=0.5))) == 0
# or inrange(ACTUAL_OUTPUT[1], 0.0, 0.5, 0.0, 1.0) && inrange(EXPECTED_OUTPUT[1], 0.0, 0.5, 0.0, 1.0)
# inrange(a,b,c,d,e){if((a>=b)&&(a<c)){return d}return e}
# for more inputs then AND the above
# each error will be a 1 in the output (i.e. note the ==0 at the end)
# and each success will be a 0, so count all ones to get errors
ERROR_UNKNOWN = ColumnsArithmetic {
RowExpr = (inrange(ACTUAL_OUTPUT_UNKNOWN[1],0.0,0.5,0.0,1.0)!=inrange(EXPECTED_OUTPUT_UNKNOWN[1],0.0,0.5,0.0,1.0));
ColExpr = sqrt, sum;
OutFileName = error_unknown;
}
ERRORPERCENT_UNKNOWN = ColumnsArithmetic {
RowExpr = (inrange(ACTUAL_OUTPUT_UNKNOWN[1],0.0,0.5,0.0,1.0)!=inrange(EXPECTED_OUTPUT_UNKNOWN[1],0.0,0.5,0.0,1.0));
# count all the mismatches and divide
# by the number of rows in the file to get percent of mismatches
# i.e. 25 means 25 % mismatches
ColExpr = average, 100.0 *;
OutFileName = error_unknown_percent;
}
$