| File name: | Introduction to Information Retrieval.pdf |
| Description: | Kapitola 13 (Text classification and naive Bayes), strana 253 – 288 v knize.
Nějaký úvod, využití v různých oblastech (detekce spamu, sentimentu atd..), definice problému klasifikace dokumentů,
Bayesovský klasifikátor - detailní popis včetně pseudoalgoritmu včetně výsledků a možnných vylepšení. Zajímavá strana
je od 271 dál, popisuje metodu výběru příznaků, metriku "Mutual Information" udávající jak konkrétní příznak přispívá
ke správné klasifikaci, zase včetně ukázek. Dále popisuje různé statistiky jako příznakový vektor (například chi kvadrát) a ruzne
další metody použitelné pro klasifikaci. Následuje vyhodnocení jednotlivých přístupů a nakonec spousta literatury.
|
| Bibtex: |
@book{manning08,
author = {Manning, Christopher D. and Raghavan, Prabhakar and Sch\"{u}tze, Hinrich},
day = {07},
edition = {1},
howpublished = {Hardcover},
isbn = {0521865719},
keywords = {information-retrieval},
month = jul,
posted-at = {2008-08-06 22:01:26},
priority = {3},
publisher = {Cambridge University Press},
title = {{Introduction to Information Retrieval}},
url = {http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20\&path=ASIN/0521865719},
year = {2008}
} |
| |
| File name: | Feature selection on hierarchy of web documents.pdf |
| Description: | Spousta užitečných vzorečků pro zjištění jak hodně příznak ovlivňuje klasifikaci, včetně
postupu jak vybrat ty nejlepší včetně srovnání výsledků. Podrobný popis problému klasikace dokumentů
|
| Bibtex: |
@article{citeulike:4983972,
author = {Mladenic, D.},
doi = {10.1016/S0167-9236(02)00097-0},
issn = {01679236},
journal = {Decision Support Systems},
keywords = {datamining, ontology},
month = apr,
number = {1},
pages = {45--87},
posted-at = {2009-06-27 02:15:42},
priority = {3},
title = {{Feature selection on hierarchy of web documents}},
url = {http://dx.doi.org/10.1016/S0167-9236(02)00097-0},
volume = {35},
year = {2003}
}
|
| |
| File name: | Exploiting structural information for semi-structured document categorization.pdf |
| Description: | Stručně Bayesovský klasikátor a SVM klasifikátor. Uvádim ho jen kvuli SVM klasifikátoru a referencí na něj.
|
| Bibtex: |
@INPROCEEDINGS{Bratko04exploitingstructural,
author = {Andrej Bratko and Bogdan Filipič},
title = {Exploiting structural information for semi-structured document categorization},
booktitle = {Information Processing & Management},
year = {2004},
pages = {679--694}
}
|
| |
| File name: | Multiple sets of features for automatic genre classification of web documents.pdf |
| Description: | Využití syntaktických a lexikálních informací pro klasifikaci
|
| Bibtex: |
@article{Lim05Genre,
author = {Lim, C. and Lee, K. and Kim, G.},
comment = {features include: URL features, HTML tags, token statistics (word, POS, symbols), selected term features,
linguistically-motivated structural information. Target is web},
doi = {10.1016/j.ipm.2004.06.004},
issn = {03064573},
journal = {Information Processing \& Management},
keywords = {genre},
month = sep,
number = {5},
pages = {1263--1276},
posted-at = {2011-01-26 05:36:10},
priority = {0},
title = {{Multiple sets of features for automatic genre classification of web documents}},
url = {http://dblab.mgt.ncu.edu.tw/\%E6\%95\%99\%E6\%9D\%90/2005\%20DM/57.pdf},
volume = {41},
year = {2005}
}
|
| |
| File name: | Intelligent document classification.pdf |
| Description: | Spíš postup obecné úlohy klasifikace dokumentů, dobrý návod jak začít. Na závěr srovnání
klasifikátorů SVM, kNN, NNnet a NB (bayes)
|
| Bibtex: |
@article{Calvo2000,
author = {Calvo, R. A. and Ceccatto, H. A.},
citeulike-article-id = {1530595},
journal = {Journal of Intelligent Data Analysis},
keywords = {bibtex-import},
number = {5},
pages = {411--420},
posted-at = {2007-08-02 14:05:22},
priority = {0},
title = {{Intelligent document classification}},
volume = {4},
year = {2000}
}
|
| |
| File name: | Feature selection with dynamic mutual information.pdf |
| Description: | Popis dynamické metody výběru příznaků.
|
| Bibtex: |
@article{Liu_Sun_Liu_Zhang_2009,
title={Feature selection with dynamic mutual information},
volume={42},
url={http://linkinghub.elsevier.com/retrieve/pii/S0031320308004615},
number={7}, journal={Pattern Recognition},
author={Liu, Huawen and Sun, Jigui and Liu, Lei and Zhang, Huijie},
year={2009},
pages={1330--1339}
}
|
| |
| File name: | A two-stage feature selection method for text categorization.pdf |
| Description: | Jiný popis výběru příznaků včetně srovnání jednotlivých přístupů.
|
| Bibtex: |
@article{DBLP:journals/kbs/Uguz11,
author = {Harun Uguz},
title = {A two-stage feature selection method for text categorization
by using information gain, principal component analysis
and genetic algorithm},
journal = {Knowl.-Based Syst.},
volume = {24},
number = {7},
year = {2011},
pages = {1024-1032},
ee = {http://dx.doi.org/10.1016/j.knosys.2011.04.014},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
|
| |
| File name: | Automatic classification using supervised learning in a medical document filtering application.pdf |
| Description: | Dobrý popis klasifikační úlohy včetně podrobného návodu.
|
| Bibtex: |
@article{journals/ipm/MostafaL00,
author = {Mostafa, Javed and Lam, Wai},
interhash = {70f81efd57e5c13aa6fbd401f27ea819},
intrahash = {eae677fbb1abde3617534fe69e791cfb},
journal = {Inf. Process. Manage.},
number = 3,
pages = {415-444},
title = {Automatic classification using supervised learning in a medical document filtering application.},
url = {http://dblp.uni-trier.de/db/journals/ipm/ipm36.html#MostafaL00},
volume = 36,
year = 2000,
timestamp = {2011-07-08T00:00:00.000+0200},
keywords = {dblp},
ee = {http://dx.doi.org/10.1016/S0306-4573(99)00033-3},
added-at = {2011-07-08T00:00:00.000+0200},
biburl = {http://www.bibsonomy.org/bibtex/2eae677fbb1abde3617534fe69e791cfb/dblp}
}
|
| |
| |
| File name: | An Extensive Empirical Study of Feature Selection Metrics for Text Classification.pdf |
| Description: | Různé metriky pro ověření správnosti vybraných příznaků a jejich porovnání.
|
| Bibtex: |
@ARTICLE{Forman03anextensive,
author = {George Forman and Isabelle Guyon and André Elisseeff},
title = {An extensive empirical study of feature selection metrics for text classification},
journal = {Journal of Machine Learning Research},
year = {2003},
volume = {3},
pages = {1289--1305}
}
|
| |
| File name: | Hierarchically SVM classification based on support vector clustering method and its application to document categorization.pdf |
| Description: | Dobrý popis SVM klasifikátoru
|
| Bibtex: |
@article{Hao:2007:HSC:1230143.1230212,
author = {Hao, Pei-Yi and Chiang, Jung-Hsien and Tu, Yi-Kun},
title = {Hierarchically SVM classification based on support vector clustering method and its application to document categorization},
journal = {Expert Syst. Appl.},
volume = {33},
issue = {3},
month = {October},
year = {2007},
issn = {0957-4174},
pages = {627--635},
numpages = {9},
url = {http://dx.doi.org/10.1016/j.eswa.2006.06.009},
doi = {http://dx.doi.org/10.1016/j.eswa.2006.06.009},
acmid = {1230212},
publisher = {Pergamon Press, Inc.},
address = {Tarrytown, NY, USA},
}
|
| |
| File name: | Automatically computed document dependent weighting factor facility for Naïve Bayes classification.pdf |
| Description: | Další metrika pro výpočet kvality vybraných příznaků pro bayesovský klasfikátor
|
| Bibtex: |
@article{citeulike:7278202,
author = {Lee, Lam H. and Isa, Dino},
day = {02},
doi = {10.1016/j.eswa.2010.05.030},
issn = {09574174},
journal = {Expert Systems with Applications},
keywords = {bayes, classification, dependent, document, naive, weighting},
month = dec,
number = {12},
pages = {8471--8478},
posted-at = {2011-02-27 07:40:42},
priority = {3},
title = {{Automatically computed document dependent weighting factor facility for Na\"{i}ve Bayes classification}},
url = {http://dx.doi.org/10.1016/j.eswa.2010.05.030},
volume = {37},
year = {2010}
}
|
| |
| File name: | .pdf |
| Description: |
|
| Bibtex: |
|
| |