One of the biggest dilemmas faced by decision-making systems is to determine an efficient means to produce classifiers from data base regarding the processing time and the form of simple symbolic representation understandable that facilitates the analysis of the problem in question. In this brief report we will discuss a very popular tool in knowledge discovery in databases process and thus aid in making decisions: the Decision Trees.
13. Chapter 4
Namespace Documentation
4.1
LibraryUtils Namespace Reference
Classes
• class ConversionError
Functions
• template<typename T >
std::string DoubleToStr (T const &myvalue, unsigned int precision)
• bool IsNumeric (const std::string &str)
• long double StrToFloat (const std::string &str)
• string IntToStr (int numero)
• long int StrToInt (const std::string &str)
• string TrimString (const std::string str)
• int SignalNumber (long double value)
• int StrIndexOf (const std::string &str, const std::string &piece)
• bool StrStartsWith (const std::string &str, const std::string &piece)
4.1.1
Function Documentation
4.1.1.1
template<typename T > std::string LibraryUtils::DoubleToStr (T const & myvalue,
unsigned int precision) [inline]
Definition at line 56 of file LibraryUtils.h.
00057 {
00058
std::ostringstream oss;
00059
oss << std::setprecision(precision) << myvalue;
00060
return oss.str();
00061 }
14. 8
4.1.1.2
Namespace Documentation
string LibraryUtils::IntToStr (int numero)
Definition at line 101 of file LibraryUtils.h.
00102 {
00103
std::ostringstream osbuffer;
00104
osbuffer << numero;
00105
return osbuffer.str();
00106 }
4.1.1.3
bool LibraryUtils::IsNumeric (const std::string & str)
Definition at line 65 of file LibraryUtils.h.
00066 {
00067
//Declaração das variáveis de conversão
00068
std::istringstream iss(str);
00069
long double d;
00070
00071
//Movendo o conteudo da string para a variavel do tipo double
00072
iss >> d;
00073
00074
//Fazendo verificações de conversão
00075
if ( !(iss && (iss >> std::ws).eof())) return false;
00076
00077
//Caso haja falha na conversão uma exceção é lançada caso não o valor é retorna
do
00078
return true;
00079 }
4.1.1.4
int LibraryUtils::SignalNumber (long double value)
Definition at line 136 of file LibraryUtils.h.
00137 {
00138
return (value >= 0)?(1):(-1);
00139 }
4.1.1.5
int LibraryUtils::StrIndexOf (const std::string & str, const std::string & piece)
Retorna posição de uma substring dentro de outra.
Definition at line 144 of file LibraryUtils.h.
00145 {
00146
std::string::size_type loc = str.find(piece, 0);
00147
return (loc != std::string::npos)?(loc):(-1);
00148 }
4.1.1.6
bool LibraryUtils::StrStartsWith (const std::string & str, const std::string & piece)
Verifica se uma cadeia de caracteres começa por uma determinada subcadeia.
Definition at line 154 of file LibraryUtils.h.
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
15. 4.1 LibraryUtils Namespace Reference
9
00155 {
00156
return str.find(piece) == 0;
00157 }
Here is the caller graph for this function:
LibraryUtils::StrStartsWith
4.1.1.7
ID3::readData
main
long double LibraryUtils::StrToFloat (const std::string & str)
Definition at line 83 of file LibraryUtils.h.
00084 {
00085
//Declaração das variáveis de conversão
00086
std::istringstream iss(str);
00087
double d;
00088
00089
//Movendo o conteudo da string para a variavel do tipo double
00090
iss >> d;
00091
00092
//Fazendo verificações de conversão
00093
if ( !(iss && (iss >> std::ws).eof())) throw ConversionError();
00094
00095
//Caso haja falha na conversão uma exceção é lançada caso não o valor é retorna
do
00096
return d;
00097 }
4.1.1.8
long int LibraryUtils::StrToInt (const std::string & str)
Definition at line 110 of file LibraryUtils.h.
00111 {
00112
//Declaração das variaveis de conversão
00113
std::istringstream iss(str);
00114
int i;
00115
00116
//Movendo o conteudo do stringstream para a variavel
00117
iss >> i;
00118
00119
//Fazendo verificações de conversão
00120
if ( !(iss && (iss >> std::ws).eof())) throw ConversionError();
00121
00122
//Caso haja falha na conversão uma exceção é lançada caso não o valor é retorna
do
00123
return i;
00124 }
4.1.1.9
string LibraryUtils::TrimString (const std::string str)
Definition at line 128 of file LibraryUtils.h.
00129 {
00130
string result = str;
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
17. Chapter 5
Class Documentation
5.1
LibraryUtils::ConversionError Class Reference
#include <LibraryUtils.h>
Public Member Functions
• const char ∗ what ()
5.1.1
Detailed Description
Definition at line 37 of file LibraryUtils.h.
5.1.2
Member Function Documentation
5.1.2.1
const char∗ LibraryUtils::ConversionError::what () [inline]
Definition at line 40 of file LibraryUtils.h.
00040 { return "Erro de conversão !"; };
The documentation for this class was generated from the following file:
• LibraryUtils.h
18. 12
Class Documentation
5.2
ID3 Class Reference
Collaboration diagram for ID3:
ID3::TreeNode
+ entropy
+ data
+ decompositionAttribute
+ decompositionValue
+ children
+ parent
parent
+ TreeNode()
root
ID3
- number_of_attributes
- attribute_names
- domains
- root
+ ID3()
+ getSymbolValue()
+ getAllValues()
+ getSubset()
+ calculateEntropy()
+ alreadyUsedToDecompose()
+ decomposeNode()
+ readData()
+ printTree()
+ createDecisionTree()
+ printMyLabels()
+ printDomains()
Classes
• class DataPoint
• class TreeNode
Public Member Functions
•
•
•
•
•
ID3 ()
int getSymbolValue (int attribute, std::string symbol)
IntegerList getAllValues (std::vector< DataPoint > data, int attribute)
std::vector< DataPoint > getSubset (std::vector< DataPoint > data, int attribute, int value)
double calculateEntropy (const std::vector< DataPoint > &data)
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
19. 5.2 ID3 Class Reference
•
•
•
•
•
•
•
13
bool alreadyUsedToDecompose (TreeNode ∗node, int attribute)
void decomposeNode (TreeNode ∗node)
bool readData (std::string filename)
void printTree (TreeNode ∗node, std::string tab)
void createDecisionTree ()
void printMyLabels ()
void printDomains ()
5.2.1
Detailed Description
Uma implementação simples do algoritmo ID3 para criação de árvores de decisão
Version
Dec. 01 2011
Author
Michel Alves dos Santos
Definition at line 56 of file MyID3Algorithm.cpp.
5.2.2
Constructor & Destructor Documentation
5.2.2.1
ID3::ID3 () [inline]
Definition at line 152 of file MyID3Algorithm.cpp.
00153
00154
00155
{
5.2.3
Member Function Documentation
5.2.3.1
bool ID3::alreadyUsedToDecompose (TreeNode ∗ node, int attribute) [inline]
root = new TreeNode();
};
Esta função verifica se o atributo especificado é usado para decompor o conjunto de dados em qualquer um
dos pais do nó existente na árvore de decomposição. Verifica recursivamente o nó especificado, bem como
todos os pais.
= null
Definition at line 251 of file MyID3Algorithm.cpp.
00252
00253
00254
00255
00256
00257
00258
00259
00260
{
if (node->children.size() != 0 )
{
if (node->decompositionAttribute == attribute ) return true;
}
if (node->parent == NULL) return false;
return alreadyUsedToDecompose(node->parent, attribute);
}
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
20. 14
Class Documentation
Here is the caller graph for this function:
ID3::alreadyUsedToDecompose
5.2.3.2
ID3::decomposeNode
ID3::createDecisionTree
main
double ID3::calculateEntropy (const std::vector< DataPoint > & data) [inline]
Calcula a entropia do conjunto de dados.
Definition at line 220 of file MyID3Algorithm.cpp.
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
{
int numdata = data.size();
if (numdata == 0) return 0;
int attribute = number_of_attributes - 1;
int numvalues = domains[attribute].size();
double sum = 0;
for (int i = 0; i < numvalues; i++)
{
int count = 0;
for (int j = 0; j < numdata; j++)
{
DataPoint point = data.at(j);
if (point.attributes[attribute] == i) count++;
}
double probability = 1.0*count/numdata;
if (count > 0) sum += -probability*log2(probability);
}
return sum;
};
Here is the caller graph for this function:
ID3::calculateEntropy
5.2.3.3
ID3::decomposeNode
ID3::createDecisionTree
main
void ID3::createDecisionTree () [inline]
Essa função cria a árvore de decisão e a imprime na forma de regras no console.
Definition at line 446 of file MyID3Algorithm.cpp.
00447
00448
00449
00450
{
decomposeNode(root);
printTree(root, "");
};
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
21. 5.2 ID3 Class Reference
15
Here is the call graph for this function:
ID3::alreadyUsedToDecompose
ID3::decomposeNode
ID3::createDecisionTree
ID3::calculateEntropy
ID3::getSubset
ID3::printTree
ID3::getAllValues
VectorIndexOf
Here is the caller graph for this function:
ID3::createDecisionTree
5.2.3.4
main
void ID3::decomposeNode (TreeNode ∗ node) [inline]
Esta função decompõe o nó especificado de acordo com o algoritmo ID3.
Nos dois seguintes laços, o melhor atributo é localizado.
Definition at line 265 of file MyID3Algorithm.cpp.
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
{
double bestEntropy = 0;
bool
selected = false;
int
selectedAttribute = 0;
int numdata = node->data.size();
int numinputattributes = number_of_attributes - 1;
node->entropy = calculateEntropy(node->data);
if (node->entropy == 0) return;
for (int i = 0; i < numinputattributes; i++)
{
int numvalues = domains[i].size();
if ( alreadyUsedToDecompose(node, i) ) continue;
// Use a variável seguinte para armazenar a entropia para o nó de teste cri
ado com o atributo i
double averageentropy = 0;
for (int j = 0; j < numvalues; j++)
{
std::vector<DataPoint> subset = getSubset(node->data, i, j);
if (subset.size() == 0) continue;
double subentropy = calculateEntropy(subset);
averageentropy += subentropy * subset.size();
}
averageentropy = (double)averageentropy/numdata;
if (selected == false)
{
selected = true;
bestEntropy = averageentropy;
selectedAttribute = i;
}
else
{
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
22. 16
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
Class Documentation
if (averageentropy < bestEntropy)
{
selected = true;
bestEntropy = averageentropy;
selectedAttribute = i;
}
}
}
if (selected == false) return;
// Agora divide o conjunto de dados usando o atributo selecionado
int numvalues = domains[selectedAttribute].size();
node->decompositionAttribute = selectedAttribute;
for (int j = 0; j < numvalues; j++)
{
node->children.push_back( new TreeNode() );
node->children[j]->parent = node;
node->children[j]->data = getSubset(node->data, selectedAttribute, j);
node->children[j]->decompositionValue = j;
}
// Recursivamente divide nós filhos
for (int j = 0; j < numvalues; j++)
{
decomposeNode(node->children[j]);
}
};
Here is the call graph for this function:
ID3::alreadyUsedToDecompose
ID3::decomposeNode
ID3::calculateEntropy
ID3::getSubset
Here is the caller graph for this function:
ID3::decomposeNode
5.2.3.5
ID3::createDecisionTree
main
IntegerList ID3::getAllValues (std::vector< DataPoint > data, int attribute) [inline]
Retorna todos os valores do atributo especificado no conjunto de dados.
Definition at line 175 of file MyID3Algorithm.cpp.
00176
00177
00178
00179
00180
00181
00182
00183
{
StringList values;
int num = data.size();
for (int i = 0; i < num; i++)
{
DataPoint point = data.at(i);
std::string symbol = domains[attribute].at( point.attributes[attribute] );
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
23. 5.2 ID3 Class Reference
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
17
int index = VectorIndexOf(values, symbol);
if (index < 0) { values.push_back(symbol); }
}
int array[ values.size() ];
for (unsigned int i = 0; i < values.size(); i++)
{
std::string symbol = values.at(i);
array[i] = VectorIndexOf(domains[attribute], symbol);
}
IntegerList l;
for (unsigned int i = 0; i < values.size(); i++) l.push_back( array[i] );
return l;
}
Here is the call graph for this function:
ID3::getAllValues
VectorIndexOf
Here is the caller graph for this function:
ID3::getAllValues
5.2.3.6
ID3::printTree
ID3::createDecisionTree
main
std::vector<DataPoint> ID3::getSubset (std::vector< DataPoint > data, int attribute, int
value) [inline]
Retorna um subconjunto de dados, no qual o valor do atributo de todos os pontos de dados é o valor
especificado
Definition at line 204 of file MyID3Algorithm.cpp.
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
{
std::vector<DataPoint> subset; //= new Vector();
int num = data.size();
for (int i = 0; i < num; i++)
{
DataPoint point = data.at(i);
if (point.attributes[attribute] == value) subset.push_back(point);
}
return subset;
};
Here is the caller graph for this function:
ID3::getSubset
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
ID3::decomposeNode
ID3::createDecisionTree
main
24. 18
5.2.3.7
Class Documentation
int ID3::getSymbolValue (int attribute, std::string symbol) [inline]
Esta função retorna um inteiro correspondente ao valor simbólico do atributo. Se o símbolo não existe no
domínio, o símbolo é adicionado ao domínio do atributo.
Definition at line 161 of file MyID3Algorithm.cpp.
00162
00163
00164
00165
00166
00167
00168
00169
00170
{
int index = VectorIndexOf(domains.at(attribute), symbol);
if (index < 0)
{
domains[attribute].push_back(symbol);
return domains[attribute].size() - 1;
}
return index;
};
Here is the call graph for this function:
ID3::getSymbolValue
VectorIndexOf
Here is the caller graph for this function:
ID3::getSymbolValue
5.2.3.8
ID3::readData
main
void ID3::printDomains () [inline]
Imprime o conjunto de testes utilizado.
Definition at line 467 of file MyID3Algorithm.cpp.
00468
00469
00470
00471
00472
{
for(unsigned int i = 0; i < domains.size(); i++)
{
std::cerr << "[" << attribute_names.at(i) << "]t";
for(unsigned int j = 0; j < domains[i].size(); j++){ std::cerr << "[" << (d
omains[i]).at(j) << "]t"; }
00473
std::cerr << std::endl;
00474
}
00475
};
Here is the caller graph for this function:
ID3::printDomains
5.2.3.9
main
void ID3::printMyLabels () [inline]
Imprime o nome dos rótulos ou nome dos atributos.
Definition at line 455 of file MyID3Algorithm.cpp.
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
25. 5.2 ID3 Class Reference
00456
00457
00458
00459
00460
00461
00462
5.2.3.10
19
{
for(unsigned int i = 0; i < attribute_names.size(); i++)
{
std::cerr << "[" << attribute_names.at(i) << "]t";
}
std::cerr << std::endl;
};
void ID3::printTree (TreeNode ∗ node, std::string tab) [inline]
Esta função imprime a árvore de decisão na forma de regras.
Definition at line 408 of file MyID3Algorithm.cpp.
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
{
int outputattr = number_of_attributes - 1;
if (node->children.size() == 0)
{
IntegerList values = getAllValues(node->data, outputattr);
if (values.size() == 1)
{
std::cerr << tab + " " + attribute_names[outputattr] + " = "" + domains[
outputattr].at(values[0]) + "";" << std::endl;
00419
return;
00420
}
00421
00422
std::cerr << tab + "t" + attribute_names[outputattr] + " = {" << std::endl
;
00423
for (unsigned int i = 0; i < values.size(); i++)
00424
{
00425
std::cerr << """ + domains[outputattr].at(values[i]) + "" " << std::end
l;
00426
if ( i != values.size() - 1 ) std::cerr << " , ";
00427
}
00428
00429
std::cerr << " };" << std::endl;
00430
return;
00431
}
00432
00433
unsigned int numvalues = node->children.size();
00434
for (unsigned int i = 0; i < numvalues; i++)
00435
{
00436
std::cerr << tab + "if( " + attribute_names[node->decompositionAttribute] +
" == "" + domains[node->decompositionAttribute].at(i) + "")" + "{" << std::end
l;
00437
printTree(node->children[i], tab + " ");
00438
if (i != numvalues - 1) std::cerr << tab + "} " + "else ";
00439
else std::cerr << tab + "}" << std::endl;
00440
}
00441
};
Here is the call graph for this function:
ID3::printTree
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
ID3::getAllValues
VectorIndexOf
26. 20
Class Documentation
Here is the caller graph for this function:
ID3::printTree
5.2.3.11
ID3::createDecisionTree
main
bool ID3::readData (std::string filename) [inline]
Função para ler a base de dados.
Definition at line 338 of file MyID3Algorithm.cpp.
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
{
/*Objetos para manipulação do arquivo*/
std::fstream myfile(filename.c_str());
std::string myline;
/*Verifica se o arquivo pôde ser aberto*/
if (!myfile.is_open())
{
std::cerr << "Unable to open data file: " + filename + "n" << std::endl;
return EXIT_FAILURE;
}
/*Executa a primeira leitura - leitura do cabeçalho da base de testes*/
getline(myfile, myline);
/*Extração das strings de cabeçalho*/
StringTokenizer mytokenizer = StringTokenizer(myline, "t");
number_of_attributes = mytokenizer.countTokens();
if (number_of_attributes <= 1)
{
std::cerr << "Read line: " + myline << std::endl;
std::cerr << "Could not obtain the names of attributes in the line" << std:
:endl;
00362
std::cerr << "Expecting at least one input attribute and one output attribu
te" << std::endl;
00363
return EXIT_FAILURE;
00364
}
00365
00366
/*Alocando espaço para as listas de valores dos atributos e extraindo rótulos
de cada atributo*/
00367
for (int i = 0; i < number_of_attributes; i++)
00368
{
00369
domains.push_back( StringList() );
00370
attribute_names.push_back( mytokenizer.nextToken() );
00371
}
00372
00373
/*Executa leitura das demais linhas da base de dados*/
00374
while (!myfile.eof())
00375
{
00376
/*Extração da linha e atribuição a variável temporária*/
00377
getline(myfile, myline);
00378
00379
/*Caso não encontre nenhum dado então não deve ser feita a quebra da cadeia
*/
00380
bool can_tokenize = !(LibraryUtils::StrStartsWith(myline, "//") || (myline
== ""));
00381
00382
/*Tokenizando a string*/
00383
if (can_tokenize)
00384
{
00385
/*Estabelecendo o token e o número de substrings*/
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
27. 5.2 ID3 Class Reference
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
21
mytokenizer = StringTokenizer(myline, "t");
/*Criação dos datapoints e nós*/
DataPoint point = DataPoint(number_of_attributes);
for(int i = 0; i < number_of_attributes; i++)
{
point.attributes[i] = getSymbolValue(i, mytokenizer.nextToken() );
}
root->data.push_back(point);
}
}
/*Fechando o arquivo*/
myfile.close();
/*Retorno da função*/
return true;
};
Here is the call graph for this function:
StringTokenizer::countTokens
ID3::getSymbolValue
VectorIndexOf
ID3::readData
StringTokenizer::nextToken
LibraryUtils::StrStartsWith
Here is the caller graph for this function:
ID3::readData
main
The documentation for this class was generated from the following file:
• MyID3Algorithm.cpp
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
28. 22
Class Documentation
5.3
LibraryTime Class Reference
#include <LibraryTime.h>
Public Member Functions
•
•
•
•
•
•
LibraryTime ()
void Start (void)
void Stop (void)
double Get (void)
void Delay (const int miliseconds)
virtual ∼LibraryTime ()
5.3.1
Detailed Description
Classe que executa verificação de tempo decorrido para realização de determinadas tarefas. Usada para
fazer análise de complexidade computacional em relação a quanto tempo um determinado trecho de código
consome para ser realizado. O retorno é dado em milisegundos com base no clock da máquina.
Definition at line 19 of file LibraryTime.h.
5.3.2
Constructor & Destructor Documentation
5.3.2.1
LibraryTime::LibraryTime () [inline]
Método construtor da classe.
Definition at line 35 of file LibraryTime.h.
00035 {};
5.3.2.2
virtual LibraryTime::∼LibraryTime () [inline, virtual]
Método destrutor da classe responsável por desalocar quaisquer recursos previamente alocados retornando
os mesmos ao sistema.
Definition at line 66 of file LibraryTime.h.
00066 {};
5.3.3
Member Function Documentation
5.3.3.1
void LibraryTime::Delay (const int miliseconds) [inline]
Método que imprime uma pausa em milisegundos na execução do fluxo do programa a partir do ponto onde
ela foi chamada.
Definition at line 59 of file LibraryTime.h.
00060
{ clock_t exit_time = clock() + miliseconds; while(clock() <= exit_time); }
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
29. 5.3 LibraryTime Class Reference
5.3.3.2
23
double LibraryTime::Get (void) [inline]
Método responsável por retornar o intervalo em milisegundos decorrido entre as chamadas dos método
Start() e Stop()
Definition at line 53 of file LibraryTime.h.
00053 { return (double( stop - start )/CLOCKS_PER_SEC); }
Here is the caller graph for this function:
LibraryTime::Get
5.3.3.3
main
void LibraryTime::Start (void) [inline]
Método responsável por capturar o tempo ou intervalo inicial de execução de alguma operação.
Definition at line 41 of file LibraryTime.h.
00041 { start = clock(); }
Here is the caller graph for this function:
LibraryTime::Start
5.3.3.4
main
void LibraryTime::Stop (void) [inline]
Método responsável por capturar o tempo ou intervalo de interrupção de execução de alguma operação.
Definition at line 47 of file LibraryTime.h.
00047 { stop
= clock(); }
Here is the caller graph for this function:
LibraryTime::Stop
main
The documentation for this class was generated from the following file:
• LibraryTime.h
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
30. 24
Class Documentation
5.4
StringTokenizer Class Reference
#include <StringTokenizer.h>
Public Member Functions
•
•
•
•
•
•
•
•
•
•
StringTokenizer (const std::string &_str, const std::string &_delim)
∼StringTokenizer ()
int countTokens ()
bool hasMoreTokens ()
std::string nextToken ()
int nextIntToken ()
double nextFloatToken ()
std::string nextToken (const std::string &delim)
std::string remainingString ()
std::string filterNextToken (const std::string &filterStr)
5.4.1
Detailed Description
Definition at line 18 of file StringTokenizer.h.
5.4.2
Constructor & Destructor Documentation
5.4.2.1
StringTokenizer::StringTokenizer (const std::string & _str, const std::string & _delim)
Definition at line 9 of file StringTokenizer.cpp.
00010 {
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
if ((_str.length() == 0) || (_delim.length() == 0)) return;
token_str = _str;
delim
= _delim;
/*
Remove sequential delimiter
*/
unsigned int curr_pos = 0;
while(true)
{
if ((curr_pos = token_str.find(delim,curr_pos)) != std::string::npos)
{
curr_pos += delim.length();
while(token_str.find(delim,curr_pos) == curr_pos)
{
token_str.erase(curr_pos,delim.length());
}
}
else
break;
}
/*
Trim leading delimiter
*/
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
31. 5.4 StringTokenizer Class Reference
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053 }
5.4.2.2
25
if (token_str.find(delim,0) == 0)
{
token_str.erase(0,delim.length());
}
/*
Trim ending delimiter
*/
curr_pos = 0;
if ((curr_pos = token_str.rfind(delim)) != std::string::npos)
{
if (curr_pos != (token_str.length() - delim.length())) return;
token_str.erase(token_str.length() - delim.length(),delim.length());
}
StringTokenizer::∼StringTokenizer () [inline]
Definition at line 22 of file StringTokenizer.h.
00022 {};
5.4.3
Member Function Documentation
5.4.3.1
int StringTokenizer::countTokens ()
Definition at line 55 of file StringTokenizer.cpp.
00056 {
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082 }
unsigned int prev_pos = 0;
int num_tokens
= 0;
if (token_str.length() > 0)
{
num_tokens = 0;
unsigned int curr_pos = 0;
while(true)
{
if ((curr_pos = token_str.find(delim,curr_pos)) != std::string::npos)
{
num_tokens++;
prev_pos = curr_pos;
curr_pos += delim.length();
}
else
break;
}
return ++num_tokens;
}
else
{
return 0;
}
Here is the caller graph for this function:
StringTokenizer::countTokens
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
ID3::readData
main
32. 26
5.4.3.2
Class Documentation
std::string StringTokenizer::filterNextToken (const std::string & filterStr)
Definition at line 147 of file StringTokenizer.cpp.
00148 {
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158 }
std::string tmp_str
= nextToken();
unsigned int currentPos = 0;
while((currentPos = tmp_str.find(filterStr,currentPos)) != std::string::npos)
{
tmp_str.erase(currentPos,filterStr.length());
}
return tmp_str;
Here is the call graph for this function:
StringTokenizer::filterNextToken
5.4.3.3
StringTokenizer::nextToken
bool StringTokenizer::hasMoreTokens ()
Definition at line 84 of file StringTokenizer.cpp.
00085 {
00086
00087 }
5.4.3.4
return (token_str.length() > 0);
double StringTokenizer::nextFloatToken ()
Definition at line 115 of file StringTokenizer.cpp.
00116 {
00117
00118 }
return atof(nextToken().c_str());
Here is the call graph for this function:
StringTokenizer::nextFloatToken
5.4.3.5
StringTokenizer::nextToken
int StringTokenizer::nextIntToken ()
Definition at line 110 of file StringTokenizer.cpp.
00111 {
00112
00113 }
return atoi(nextToken().c_str());
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
33. 5.4 StringTokenizer Class Reference
27
Here is the call graph for this function:
StringTokenizer::nextIntToken
5.4.3.6
StringTokenizer::nextToken
std::string StringTokenizer::nextToken (const std::string & delim)
Definition at line 120 of file StringTokenizer.cpp.
00121 {
00122
if (token_str.length() == 0)
00123
return "";
00124
00125
std::string tmp_str = "";
00126
unsigned int pos
= token_str.find(delimiter,0);
00127
00128
if (pos != std::string::npos)
00129
{
00130
tmp_str
= token_str.substr(0,pos);
00131
token_str = token_str.substr(pos + delimiter.length(),token_str.length() pos);
00132
}
00133
else
00134
{
00135
tmp_str
= token_str.substr(0,token_str.length());
00136
token_str = "";
00137
}
00138
00139
return tmp_str;
00140 }
5.4.3.7
std::string StringTokenizer::nextToken ()
Definition at line 89 of file StringTokenizer.cpp.
00090 {
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108 }
if (token_str.length() == 0)
return "";
std::string tmp_str = "";
unsigned int pos
= token_str.find(delim,0);
if (pos != std::string::npos)
{
tmp_str
= token_str.substr(0,pos);
token_str = token_str.substr(pos+delim.length(),token_str.length()-pos);
}
else
{
tmp_str
= token_str.substr(0,token_str.length());
token_str = "";
}
return tmp_str;
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
34. 28
Class Documentation
Here is the caller graph for this function:
StringTokenizer::filterNextToken
StringTokenizer::nextFloatToken
StringTokenizer::nextToken
StringTokenizer::nextIntToken
ID3::readData
5.4.3.8
main
std::string StringTokenizer::remainingString ()
Definition at line 142 of file StringTokenizer.cpp.
00143 {
00144
00145 }
return token_str;
The documentation for this class was generated from the following files:
• StringTokenizer.h
• StringTokenizer.cpp
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
35. Chapter 6
File Documentation
6.1
LibraryTime.cxx File Reference
#include "LibraryTime.h"
#include <ctime>
Include dependency graph for LibraryTime.cxx:
LibraryTime.cxx
ctime
This graph shows which files directly or indirectly include this file:
LibraryTime.cxx
36. 30
6.2
File Documentation
LibraryTime.cxx
00001 /*
00002 * TimeInterval.cpp
00003 *
00004 * Created on: 26/10/2009
00005 *
Author: Michel Alves dos Santos
00006 */
00007
00008 #include "LibraryTime.h"
00009
00010
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
37. 6.3 LibraryTime.h File Reference
6.3
31
LibraryTime.h File Reference
#include <ctime>
Include dependency graph for LibraryTime.h:
LibraryTime.h
ctime
This graph shows which files directly or indirectly include this file:
LibraryTime.h
MyID3Algorithm.cpp
Classes
• class LibraryTime
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
39. 6.5 LibraryUtils.h File Reference
6.5
33
LibraryUtils.h File Reference
#include <cstdio>
#include <vector>
#include <iomanip>
#include <sstream>
#include <cstdlib>
#include <iostream>
#include <exception>
#include <algorithm>
Include dependency graph for LibraryUtils.h:
LibraryUtils.h
cstdio
vector
iomanip
sstream
cstdlib
iostream
exception
algorithm
Classes
• class LibraryUtils::ConversionError
Namespaces
• namespace LibraryUtils
Functions
• template<typename T >
std::string LibraryUtils::DoubleToStr (T const &myvalue, unsigned int precision)
• bool LibraryUtils::IsNumeric (const std::string &str)
• long double LibraryUtils::StrToFloat (const std::string &str)
• string LibraryUtils::IntToStr (int numero)
• long int LibraryUtils::StrToInt (const std::string &str)
• string LibraryUtils::TrimString (const std::string str)
• int LibraryUtils::SignalNumber (long double value)
• int LibraryUtils::StrIndexOf (const std::string &str, const std::string &piece)
• bool LibraryUtils::StrStartsWith (const std::string &str, const std::string &piece)
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
40. 34
6.6
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
File Documentation
LibraryUtils.h
/*
* LibraryUtils.h
*
* Created on: 05/01/2009
Author: Michel Alves dos Santos
*
*/
#ifndef LIBRARYUTILS_H_
#define LIBRARYUTILS_H_
//Area de definicao de cabecalhos utilizados.
#include <cstdio>
#include <vector>
#include <iomanip>
#include <sstream>
#include <cstdlib>
#include <iostream>
#include <exception>
#include <algorithm>
using namespace std;
//Definicao de constantes e macros utilizadas
//const int MAX_LENGTH_CHAR_ARRAY = 255;
//Definicao de tipos usados
//typedef enum{False, True}
//typedef float
//typedef char
no projeto
Boolean;
real;
MyCharArrayString[MAX_LENGTH_CHAR_ARRAY];
//-------------------------------------------------------------//
//Implementacao dos prototipos das funcoes
//
//-------------------------------------------------------------//
namespace LibraryUtils
{
//Classe que trata erros relativos a conversão de tipo
class ConversionError : public exception
{
public :
const char* what() { return "Erro de conversão !"; };
};
//Funcao para conversao de numeros reais [ponto flutuante] para string.
//@numero : Número que deve ser convertido
//@digitos : Quantidade de dígitos que deve ser convertida.
//string FloatToStr(double numero, int digitos)
//{
// MyCharArrayString str;
// gcvt(numero, (digitos > MAX_LENGTH_CHAR_ARRAY)?(MAX_LENGTH_CHAR_ARRAY):(digit
os) , str);
00050 // return string(str);
00051 //}
00052
00053 //Função pra conversão de números reais de precisão dupla para string
00054 //@myvalue
: Número que deve ser convertido
00055 //@precision : Quantidade de dígitos que devem ser utilizados
00056 template <typename T> std::string DoubleToStr(T const& myvalue, unsigned int prec
ision)
00057 {
00058
std::ostringstream oss;
00059
oss << std::setprecision(precision) << myvalue;
00060
return oss.str();
00061 }
00062
00063 //Função que verifica se determinado valor realmente é numérico
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
41. 6.6 LibraryUtils.h
35
00064 //@str : String que deve ser verificada
00065 bool IsNumeric(const std::string& str)
00066 {
00067
//Declaração das variáveis de conversão
00068
std::istringstream iss(str);
00069
long double d;
00070
00071
//Movendo o conteudo da string para a variavel do tipo double
00072
iss >> d;
00073
00074
//Fazendo verificações de conversão
00075
if ( !(iss && (iss >> std::ws).eof())) return false;
00076
00077
//Caso haja falha na conversão uma exceção é lançada caso não o valor é retorna
do
00078
return true;
00079 }
00080
00081 //Funcao para conversão de strings em numeros de ponto flutuante
00082 //@str : String que deve ser convertida
00083 long double StrToFloat(const std::string& str)
00084 {
00085
//Declaração das variáveis de conversão
00086
std::istringstream iss(str);
00087
double d;
00088
00089
//Movendo o conteudo da string para a variavel do tipo double
00090
iss >> d;
00091
00092
//Fazendo verificações de conversão
00093
if ( !(iss && (iss >> std::ws).eof())) throw ConversionError();
00094
00095
//Caso haja falha na conversão uma exceção é lançada caso não o valor é retorna
do
00096
return d;
00097 }
00098
00099 //Funcao para conversão de numeros inteiros para string.
00100 //@numero : Número que deve ser convertido
00101 string IntToStr(int numero)
00102 {
00103
std::ostringstream osbuffer;
00104
osbuffer << numero;
00105
return osbuffer.str();
00106 }
00107
00108 //Funcao para conversão de strings em numeros inteiros
00109 //@str : String que deve ser convertida
00110 long int StrToInt(const std::string& str)
00111 {
00112
//Declaração das variaveis de conversão
00113
std::istringstream iss(str);
00114
int i;
00115
00116
//Movendo o conteudo do stringstream para a variavel
00117
iss >> i;
00118
00119
//Fazendo verificações de conversão
00120
if ( !(iss && (iss >> std::ws).eof())) throw ConversionError();
00121
00122
//Caso haja falha na conversão uma exceção é lançada caso não o valor é retorna
do
00123
return i;
00124 }
00125
00126 //Função que remove espaços em branco da direira e da esquerda de uma string
00127 //@str : String da qual devem ser retirados os espaços finais e iniciais
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
43. 6.7 MyID3Algorithm.cpp File Reference
6.7
37
MyID3Algorithm.cpp File Reference
#include <cmath>
#include <iomanip>
#include <vector>
#include <fstream>
#include <cstdlib>
#include <iostream>
#include <exception>
#include "LibraryTime.h"
#include "LibraryUtils.h"
#include <cstdio>
#include <sstream>
#include <algorithm>
#include <stdio.h>
#include <stdlib.h>
#include <string>
Include dependency graph for MyID3Algorithm.cpp:
MyID3Algorithm.cpp
cmath
iomanip
vector
fstream
cstdlib
iostream
exception
LibraryTime.h
cstdio
ctime
This graph shows which files directly or indirectly include this file:
MyID3Algorithm.cpp
Classes
• class ID3
• class ID3::DataPoint
• class ID3::TreeNode
Typedefs
• typedef std::vector< std::string > StringList
• typedef std::vector< StringList > StringTable
• typedef std::vector< int > IntegerList
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
sstream
algorithm
stdio.h
stdlib.h
string
44. 38
File Documentation
Functions
• int VectorIndexOf (const StringList v, const std::string element)
• int main (int argc, char ∗argv[ ])
6.7.1
Typedef Documentation
6.7.1.1
typedef std::vector<int> IntegerList
Definição do tipo ’lista de inteiros’. Com esse tipo podemos manter uma lista de números inteiros através
de uma definição mais enxuta.
Definition at line 38 of file MyID3Algorithm.cpp.
6.7.1.2
typedef std::vector<std::string> StringList
Definição do tipo ’lista de cadeias de caracteres’. Com esse tipo é possível manter listas de cadeias na
forma de um vetor.
Definition at line 26 of file MyID3Algorithm.cpp.
6.7.1.3
typedef std::vector<StringList> StringTable
Definição do tipo ’lista de listas de cadeias de caracteres’. Com esse tipo podemos manter uma tabela de
cadeias de caracteres.
Definition at line 32 of file MyID3Algorithm.cpp.
6.7.2
Function Documentation
6.7.2.1
int main (int argc, char ∗ argv[ ])
Definição da função principal do programa.
Definition at line 481 of file MyID3Algorithm.cpp.
00482 {
00483
//Resgata o número de argumentos.
00484
int number_of_arguments = argc;
00485
std::string str_file_path;
00486
00487
//Testando o número de argumentos
00488
if (number_of_arguments < 2 )
00489
{
00490
std::cerr << std::endl << "[Você precisa especificar a base de testes!]" << s
td::endl << std::endl;
00491
return EXIT_FAILURE;
00492
}
00493
else
00494
{
00495
str_file_path = argv[1];
00496
}
00497
00498
//Declaração de um objeto do tipo LibraryTime
00499
LibraryTime t;
00500
00501
//Inicio da captura de tempo
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
45. 6.7 MyID3Algorithm.cpp File Reference
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
39
t.Start();
//Declarando objeto do tipo ID3
ID3 myid3 = ID3();
//Verificando o status de leitura
if (!myid3.readData( str_file_path )) return EXIT_FAILURE;
//Executa a criação da árvore
myid3.createDecisionTree();
std::cerr << std::endl << "[Dataset]" << std::endl;
myid3.printDomains(); std::cerr << std::endl;
//Término da captura de tempo
t.Stop();
//Tempo consumido para execução do programa
std::cout << std::endl << "Tempo consumido: "<< std::setprecision(5) << t.Get()
<< std::endl;
00521
return EXIT_SUCCESS;
00522 }
Here is the call graph for this function:
ID3::alreadyUsedToDecompose
ID3::decomposeNode
ID3::calculateEntropy
ID3::printTree
ID3::getSubset
StringTokenizer::countTokens
ID3::getAllValues
ID3::createDecisionTree
LibraryTime::Get
ID3::printDomains
VectorIndexOf
main
ID3::getSymbolValue
ID3::readData
StringTokenizer::nextToken
LibraryTime::Start
LibraryUtils::StrStartsWith
LibraryTime::Stop
6.7.2.2
int VectorIndexOf (const StringList v, const std::string element)
Função que verifica se um determinado elemento pertence a uma lista e qual é a sua posição. Caso o
elemento não seja encontrado a função retorna -1.
Definition at line 44 of file MyID3Algorithm.cpp.
00045 {
00046
int index = -1;
00047
for(unsigned int i = 0; i < v.size(); i++) { if(v.at(i) == element){ index = i;
break;} }
00048
return index;
00049 }
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
46. 40
File Documentation
Here is the caller graph for this function:
ID3::getAllValues
ID3::printTree
ID3::createDecisionTree
ID3::getSymbolValue
ID3::readData
main
VectorIndexOf
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
47. 6.8 MyID3Algorithm.cpp
6.8
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00026
00027
00032
00033
00038
00039
00044
00045
00046
00047
41
MyID3Algorithm.cpp
//============================================================================
// Name
: MyID3Algorithm
// Author
: Michel Alves dos Santos
// Version
: 1.0 Beta
// Description : Algoritmo de geração de árvores de decisão baseado no algoritmo
//
original de Ross Quilan (1986)
//============================================================================
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
<cmath>
<iomanip>
<vector>
<fstream>
<cstdlib>
<iostream>
<exception>
"LibraryTime.h"
"LibraryUtils.h"
"StringTokenizer.h"
using namespace std;
typedef std::vector<std::string> StringList;
typedef std::vector<StringList> StringTable;
typedef std::vector<int> IntegerList;
int VectorIndexOf(const StringList v, const std::string element)
{
int index = -1;
for(unsigned int i = 0; i < v.size(); i++) { if(v.at(i) == element){ index = i;
break;} }
00048
return index;
00049 }
00050
00056 class ID3
00057 {
00061
int number_of_attributes;
00062
00066
StringList attribute_names;
00067
00076
StringTable domains;
00077
00081
class DataPoint
00082
{
00083
public:
00087
IntegerList attributes;
00088
00092
DataPoint(int number_of_attributes)
00093
{
00094
//attributes.reserve(number_of_attributes);
00095
for(int i = 0; i < number_of_attributes; i++) attributes.push_back(0);
00096
};
00097
};
00098
00102
class TreeNode
00103
{
00104
public:
00108
double entropy;
00109
00113
std::vector<DataPoint> data;
00114
00118
int decompositionAttribute;
00119
00123
int decompositionValue;
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
48. 42
00124
00128
00129
00133
00134
00138
00139
00140
00141
00142
00146
00147
00148
00152
00153
00154
00155
00156
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00204
File Documentation
std::vector< TreeNode* > children;
TreeNode* parent;
TreeNode()
{
}
};
TreeNode* root;
public:
ID3()
{
root = new TreeNode();
};
int getSymbolValue(int attribute, std::string symbol)
{
int index = VectorIndexOf(domains.at(attribute), symbol);
if (index < 0)
{
domains[attribute].push_back(symbol);
return domains[attribute].size() - 1;
}
return index;
};
IntegerList getAllValues(std::vector<DataPoint> data, int attribute)
{
StringList values;
int num = data.size();
for (int i = 0; i < num; i++)
{
DataPoint point = data.at(i);
std::string symbol = domains[attribute].at( point.attributes[attribute] );
int index = VectorIndexOf(values, symbol);
if (index < 0) { values.push_back(symbol); }
}
int array[ values.size() ];
for (unsigned int i = 0; i < values.size(); i++)
{
std::string symbol = values.at(i);
array[i] = VectorIndexOf(domains[attribute], symbol);
}
IntegerList l;
for (unsigned int i = 0; i < values.size(); i++) l.push_back( array[i] );
return l;
}
std::vector<DataPoint> getSubset(std::vector<DataPoint> data, int attribute, in
t value)
00205
{
00206
std::vector<DataPoint> subset; //= new Vector();
00207
int num = data.size();
00208
00209
for (int i = 0; i < num; i++)
00210
{
00211
DataPoint point = data.at(i);
00212
if (point.attributes[attribute] == value) subset.push_back(point);
00213
}
00214
return subset;
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
49. 6.8 MyID3Algorithm.cpp
00215
00216
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
43
};
double calculateEntropy(const std::vector<DataPoint> &data)
{
int numdata = data.size();
if (numdata == 0) return 0;
int attribute = number_of_attributes - 1;
int numvalues = domains[attribute].size();
double sum = 0;
for (int i = 0; i < numvalues; i++)
{
int count = 0;
for (int j = 0; j < numdata; j++)
{
DataPoint point = data.at(j);
if (point.attributes[attribute] == i) count++;
}
double probability = 1.0*count/numdata;
if (count > 0) sum += -probability*log2(probability);
}
return sum;
};
bool alreadyUsedToDecompose(TreeNode* node, int attribute)
{
if (node->children.size() != 0 )
{
if (node->decompositionAttribute == attribute ) return true;
}
if (node->parent == NULL) return false;
return alreadyUsedToDecompose(node->parent, attribute);
}
void decomposeNode(TreeNode* node)
{
double bestEntropy = 0;
bool
selected = false;
int
selectedAttribute = 0;
int numdata = node->data.size();
int numinputattributes = number_of_attributes - 1;
node->entropy = calculateEntropy(node->data);
if (node->entropy == 0) return;
for (int i = 0; i < numinputattributes; i++)
{
int numvalues = domains[i].size();
if ( alreadyUsedToDecompose(node, i) ) continue;
// Use a variável seguinte para armazenar a entropia para o nó de teste cri
ado com o atributo i
double averageentropy = 0;
for (int j = 0; j < numvalues; j++)
{
std::vector<DataPoint> subset = getSubset(node->data, i, j);
if (subset.size() == 0) continue;
double subentropy = calculateEntropy(subset);
averageentropy += subentropy * subset.size();
}
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
50. 44
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
File Documentation
averageentropy = (double)averageentropy/numdata;
if (selected == false)
{
selected = true;
bestEntropy = averageentropy;
selectedAttribute = i;
}
else
{
if (averageentropy < bestEntropy)
{
selected = true;
bestEntropy = averageentropy;
selectedAttribute = i;
}
}
}
if (selected == false) return;
// Agora divide o conjunto de dados usando o atributo selecionado
int numvalues = domains[selectedAttribute].size();
node->decompositionAttribute = selectedAttribute;
for (int j = 0; j < numvalues; j++)
{
node->children.push_back( new TreeNode() );
node->children[j]->parent = node;
node->children[j]->data = getSubset(node->data, selectedAttribute, j);
node->children[j]->decompositionValue = j;
}
// Recursivamente divide nós filhos
for (int j = 0; j < numvalues; j++)
{
decomposeNode(node->children[j]);
}
};
bool readData(std::string filename)
{
/*Objetos para manipulação do arquivo*/
std::fstream myfile(filename.c_str());
std::string myline;
/*Verifica se o arquivo pôde ser aberto*/
if (!myfile.is_open())
{
std::cerr << "Unable to open data file: " + filename + "n" << std::endl;
return EXIT_FAILURE;
}
/*Executa a primeira leitura - leitura do cabeçalho da base de testes*/
getline(myfile, myline);
/*Extração das strings de cabeçalho*/
StringTokenizer mytokenizer = StringTokenizer(myline, "t");
number_of_attributes = mytokenizer.countTokens();
if (number_of_attributes <= 1)
{
std::cerr << "Read line: " + myline << std::endl;
std::cerr << "Could not obtain the names of attributes in the line" << std:
:endl;
00362
std::cerr << "Expecting at least one input attribute and one output attribu
te" << std::endl;
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
51. 6.8 MyID3Algorithm.cpp
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
45
return EXIT_FAILURE;
}
/*Alocando espaço para as listas de valores dos atributos e extraindo rótulos
de cada atributo*/
for (int i = 0; i < number_of_attributes; i++)
{
domains.push_back( StringList() );
attribute_names.push_back( mytokenizer.nextToken() );
}
/*Executa leitura das demais linhas da base de dados*/
while (!myfile.eof())
{
/*Extração da linha e atribuição a variável temporária*/
getline(myfile, myline);
/*Caso não encontre nenhum dado então não deve ser feita a quebra da cadeia
*/
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
bool can_tokenize = !(LibraryUtils::StrStartsWith(myline, "//") || (myline
== ""));
/*Tokenizando a string*/
if (can_tokenize)
{
/*Estabelecendo o token e o número de substrings*/
mytokenizer = StringTokenizer(myline, "t");
/*Criação dos datapoints e nós*/
DataPoint point = DataPoint(number_of_attributes);
for(int i = 0; i < number_of_attributes; i++)
{
point.attributes[i] = getSymbolValue(i, mytokenizer.nextToken() );
}
root->data.push_back(point);
}
}
/*Fechando o arquivo*/
myfile.close();
/*Retorno da função*/
return true;
};
void printTree(TreeNode* node, std::string tab)
{
int outputattr = number_of_attributes - 1;
if (node->children.size() == 0)
{
IntegerList values = getAllValues(node->data, outputattr);
if (values.size() == 1)
{
std::cerr << tab + " " + attribute_names[outputattr] + " = "" + domains[
outputattr].at(values[0]) + "";" << std::endl;
00419
return;
00420
}
00421
00422
std::cerr << tab + "t" + attribute_names[outputattr] + " = {" << std::endl
;
00423
for (unsigned int i = 0; i < values.size(); i++)
00424
{
00425
std::cerr << """ + domains[outputattr].at(values[i]) + "" " << std::end
l;
00426
if ( i != values.size() - 1 ) std::cerr << " , ";
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
52. 46
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00446
00447
00448
00449
00450
00451
00455
00456
00457
00458
00459
00460
00461
00462
00463
00467
00468
00469
00470
00471
00472
File Documentation
}
std::cerr << " };" << std::endl;
return;
}
unsigned int numvalues = node->children.size();
for (unsigned int i = 0; i < numvalues; i++)
{
std::cerr << tab + "if( " + attribute_names[node->decompositionAttribute] +
" == "" + domains[node->decompositionAttribute].at(i) + "")" + "{" << std::end
l;
printTree(node->children[i], tab + " ");
if (i != numvalues - 1) std::cerr << tab + "} " + "else ";
else std::cerr << tab + "}" << std::endl;
}
};
void createDecisionTree()
{
decomposeNode(root);
printTree(root, "");
};
void printMyLabels()
{
for(unsigned int i = 0; i < attribute_names.size(); i++)
{
std::cerr << "[" << attribute_names.at(i) << "]t";
}
std::cerr << std::endl;
};
void printDomains()
{
for(unsigned int i = 0; i < domains.size(); i++)
{
std::cerr << "[" << attribute_names.at(i) << "]t";
for(unsigned int j = 0; j < domains[i].size(); j++){ std::cerr << "[" << (d
omains[i]).at(j) << "]t"; }
std::cerr << std::endl;
}
};
};
00473
00474
00475
00476
00477
00481 int main(int argc, char* argv[])
00482 {
00483
//Resgata o número de argumentos.
00484
int number_of_arguments = argc;
00485
std::string str_file_path;
00486
00487
//Testando o número de argumentos
00488
if (number_of_arguments < 2 )
00489
{
00490
std::cerr << std::endl << "[Você precisa especificar a base de testes!]" << s
td::endl << std::endl;
00491
return EXIT_FAILURE;
00492
}
00493
else
00494
{
00495
str_file_path = argv[1];
00496
}
00497
00498
//Declaração de um objeto do tipo LibraryTime
00499
LibraryTime t;
00500
00501
//Inicio da captura de tempo
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
53. 6.8 MyID3Algorithm.cpp
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
47
t.Start();
//Declarando objeto do tipo ID3
ID3 myid3 = ID3();
//Verificando o status de leitura
if (!myid3.readData( str_file_path )) return EXIT_FAILURE;
//Executa a criação da árvore
myid3.createDecisionTree();
std::cerr << std::endl << "[Dataset]" << std::endl;
myid3.printDomains(); std::cerr << std::endl;
//Término da captura de tempo
t.Stop();
//Tempo consumido para execução do programa
std::cout << std::endl << "Tempo consumido: "<< std::setprecision(5) << t.Get()
<< std::endl;
00521
return EXIT_SUCCESS;
00522 }
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
54. 48
6.9
File Documentation
StringTokenizer.cpp File Reference
#include "StringTokenizer.h"
Include dependency graph for StringTokenizer.cpp:
StringTokenizer.cpp
StringTokenizer.h
stdio.h
stdlib.h
iostream
string
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
55. 6.10 StringTokenizer.cpp
6.10
00001
00002
00003
00004
00005
00006
00007
00008
00009
49
StringTokenizer.cpp
/*
***********************************************************************
* Note: This library has been deprecated in favour of the C++ String *
***********************************************************************
*/
#include "StringTokenizer.h"
StringTokenizer::StringTokenizer(const std::string& _str, const std::string& _del
im)
00010 {
00011
if ((_str.length() == 0) || (_delim.length() == 0)) return;
00012
00013
token_str = _str;
00014
delim
= _delim;
00015
00016
/*
00017
Remove sequential delimiter
00018
*/
00019
unsigned int curr_pos = 0;
00020
00021
while(true)
00022
{
00023
if ((curr_pos = token_str.find(delim,curr_pos)) != std::string::npos)
00024
{
00025
curr_pos += delim.length();
00026
00027
while(token_str.find(delim,curr_pos) == curr_pos)
00028
{
00029
token_str.erase(curr_pos,delim.length());
00030
}
00031
}
00032
else
00033
break;
00034
}
00035
00036
/*
00037
Trim leading delimiter
00038
*/
00039
if (token_str.find(delim,0) == 0)
00040
{
00041
token_str.erase(0,delim.length());
00042
}
00043
00044
/*
00045
Trim ending delimiter
00046
*/
00047
curr_pos = 0;
00048
if ((curr_pos = token_str.rfind(delim)) != std::string::npos)
00049
{
00050
if (curr_pos != (token_str.length() - delim.length())) return;
00051
token_str.erase(token_str.length() - delim.length(),delim.length());
00052
}
00053 }
00054
00055 int StringTokenizer::countTokens()
00056 {
00057
unsigned int prev_pos = 0;
00058
int num_tokens
= 0;
00059
00060
if (token_str.length() > 0)
00061
{
00062
num_tokens = 0;
00063
00064
unsigned int curr_pos = 0;
Generated on Fri Dec 16 16:50:04 2011 by Doxygen
58. 52
File Documentation
6.11
StringTokenizer.h File Reference
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <string>
Include dependency graph for StringTokenizer.h:
StringTokenizer.h
stdio.h
stdlib.h
iostream
string
This graph shows which files directly or indirectly include this file:
StringTokenizer.h
StringTokenizer.cpp
Classes
• class StringTokenizer
Generated on Fri Dec 16 16:50:04 2011 by Doxygen