@misc{New1676, author = {Isaac Newton}, howpublished = {Letter to Hooke}, month = {5~} # feb, year = {1676}, } @misc{FIT1998, author = {Dan Eaves and John Hurst}, title = {Standards for the Presentation of Written Assignments}, howpublished = {School of Computer Science and Software Engineering, Faculty of Information Technology, Monash University, Australia; On-line resource}, year = {1998}, url = {http://www.csse.monash.edu.au/\~{}ajh/teaching/resources/studentguide.pdf}, note = {(last accessed June 8, 2005)}, } @book{Lam1994, author = {Leslie Lamport}, title = {\LaTeX: a document preparation system}, edition = {Second}, year = {1994}, publisher = {Addison-Wesley Publishing Company, Inc.}, address = {Reading, MA, USA}, } @inproceedings{AgS1995, author = {Rakesh Agrawal and Ramakrishnan Srikant}, title = {Mining Sequential Patterns}, booktitle = {Proceedings of the 11th International Conference on Data Engineering}, editor = {Yu, P. S. and Chen, A. L. P.}, address = {Taipei, Taiwan}, pages = {3--14}, month = mar, year = {1995}, } @article{BoL2000, author = {Jos\'{e} Borges and Mark Levene}, title = {A Fine Grained Heuristic to Capture Web Navigation Patterns}, journal = {ACM SIGKDD Explorations}, volume = {2}, number = {1}, pages = {40--50}, month = jun, year = {2000}, } @book{WeB1998, author = {C. R. Westphal and T. A. Blaxton}, title = {Data Mining Solutions: Methods and Tools for Solving Real-World Problems}, year = {1998}, publisher = {John Wiley \& Sons}, address = {New York, USA}, } @misc{HCH2000, author = {John L. Houle and Wanda Cadigan and Sylvain Henry and Anu Pinnamaneni and Sonny Lundahl}, title = {Database Mining in the Human Genome Initiative}, howpublished = {Bio-databases.com white paper, Amita Corporation, 1420 Blair Place, Suite 500, Ottawa, Ontario, Canada, K1J 9L8}, year = {2000}, url = {http://www.biodatabases.com/whitepaper01.html}, } @string{SP = "Signal Processing"} @string{NC = "Neural Computation"} @article{Ade1983, author = {F. Ade}, title = {Characterisation of textures by ``eigenfilters''}, journal = SP, volume = {5}, pages = {451--457}, year = {1983} } @book{AaK1989, author = {E. Aarts and J. Korst}, title = {Simulated annealing and {B}oltzmann machines}, publisher = {John Wiley and Sons}, year = {1989}, address = {New York} } @article{AGR1996, author = {Joseph J. Atick and Paul A. Griffin and Redlich, A. Norman}, title = {The vocabulary of shape: principal shapes for probing perception and neural response}, journal = NC, volume = {7}, number = {1}, pages = {1--5}, month = feb, year = {1996}, keywords = {principal components, shape perception, eigenmode, eigenhead}, abstract = {Humans perceive shape rapidly and effortlessly but have great difficulties describing what they perceive. This suggests that the representation of shape in the brain is abstract and very unlike that used in conscious thought. Here we explore the proposal that this representation is matched to the statistical properties of objects in the environment. From an ensemble of several hundred laser-scanned three-dimensional (3D) human heads we extract the principal components which provide a compact basis for head shape. We show that, with good accuracy, a given head can be represented by linear combination of a few dozen primary shapes just as colours can be synthesized by combining the three principal colours. We suggest new perceptual adaptation experiments for testing the brain's shape representation system. The principal head shapes can also be used to probe response properties of `face-cells' in the inferior temporal cortex.} } @misc{WiS1994, author = {Peter Williams and Thorsten Schnier}, title = {The {H}arvard Family of Bibliography Styles}, howpublished = {\LaTeX2e package documentation}, month = jun # {~21}, year = {1994}, url = {http://www.ctan.org/tex-archive/macros/latex/contrib/supported/harvard/}, note = {(last accessed July 24, 2002)}, } @inproceedings{DeV1998, vgproject = {cbir}, author = {Jeremy S. {De~Bonet} and Paul Viola}, title = {Texture Recognition Using a Non-parametric Multi-Scale Statistical Model}, crossref = {CVPR98}, year = {1998}, url = {http://www.ai.mit.edu/\~{}jsd/research/publications/1998/DeBonet-CVPR98.pdf}, abstract = {We describe a technique for using the joint occurrence of local features at multiple resolutions to measure the similarity between texture images. Though superficially similar to a number of ``Gabor'' style techniques, which recognize textures through the extraction of multi-scale feature vectors, our approach is derived from an accurate generative model of texture, which is explicitly multi-scale and non-parametric. The resulting recognition procedure is similarly non-parametric, and can model complex non-homogeneous textures. We report results on publicly available texture databases. In addition, experiments indicate that this approach may have sufficient discrimination power to perform target detection in synthetic aperture radar images (SAR).} } @proceedings{CVPR98, title = {Proceedings of the 1998 IEEE Conference on Computer Vision and Pattern Recognition (CVPR'98)}, booktitle = {Proceedings of the 1998 IEEE Conference on Computer Vision and Pattern Recognition (CVPR'98)}, key = {CVPR98}, month = jun, year = {1998}, address = {Santa Barbara, California, USA} }` @misc{SNS2002, author = {David McG. Squire and Fred Quentin Nurke and Karen {Sparck Jones}}, title = {Something about {XML}}, year = {2002}, } @misc{SNS2002a, author = {Squire, David McG. and Nurke, Fred Quentin and Sparck Jones, Karen}, title = {Something about {XML}}, year = {2002}, } @misc{SNS2002b, author = {David McG. Squire and Fred Quentin Nurke and Karen Sparck Jones}, title = {Something about XML}, year = {2002}, } @inproceedings{DGV2000, author = {Anjali Dhond and Amar Gupta and Sanjeev Vadhavkar}, title = {Data mining techniques for optimizing inventories for electronic commerce}, booktitle = {Proceedings of the Sixth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, address = {Boston, Massachusetts, USA}, month = {20--23~} # aug, year = {2000}, pages = {480--486}, url = {http://doi.acm.org/10.1145/347090.347188}, keywords = {Inventory Optimization, Temporal Data Mining, Data Massaging}, abstract = {As part of their strategy for incorporating electronic commerce capabilities, many organizations are involved in the development of information systems that will establish effective linkages with their suppliers, customers, and other channel partners involved in transportation, distribution, warehousing and maintenance activities. These linkages have given birth to comprehensive data warehouses that integrate operational data with supplier, customer, channel partners and market information. Data mining techniques can now provide the technological leap needed to structure and prioritize information from these data warehouses to address specific end-user problems. Emerging data mining techniques permit the semi-automatic discovery of patterns, associations, changes, anomalies, rules, and statistically significant structures and events in data. Very significant business benefits have been attained through the integration of data mining techniques with current information systems aiding electronic commerce. This paper explains key data mining principles that can play a pivotal role in an electronic commerce environment. The paper also highlights two case studies in which neural network-based data mining techniques were used for inventory optimization. The results from the data mining prototype in a large medical distribution company provided the rationale for the strategy to reduce the total level of inventory by 50\% (from a billion dollars to half a billion dollars) in the particular organization, while maintaining the same level of probability that a particular customer s demand will be satisfied. The second case study highlights the use of neural network based data mining techniques for forecasting hot metal temperatures in a steel mill blast furnace.}, } @article{Fre2000, author = {Alex A. Freitas}, title = {Understanding the Crucial Differences Between Classification and Discovery of Association Rules---A Position Paper}, journal = {SIGKDD Explorations}, volume = {2}, number = {1}, pages = {65--69}, month = jun, year = {2000}, url = {http://www.acm.org/sigs/sigkdd/explorations/issue2-1/contents.htm\#Freitas}, keywords = {Classification, association rules, induction, prediction}, abstract = {The goal of this position paper is to contribute to a clear understanding of the profound differences between the association-rule discovery and the classification task. We argue that the classification task can be considered an ill-defined, non-deterministic task, which is unavoidable given the fact that it involved prediction; while the standard association task can be considered a well-defined, deterministic, relatively simple task, which does \emph{not} involve prediction in the same sense as the classification task does.}, } @article{The1999, author = {Kurt Thearling}, title = {Data Mining and {CRM}: Zeroing in on Your Best Customers}, journal = {DM Direct}, month = {20~} # jun, year = {1999}, note = {(on-line newsletter)}, url = {http://www.dmreview.com/master.cfm?NavID=198\&EdID=1744}, abstract = {To be successful, database marketers must first identify market segments containing customers or prospects with high profit potential and, second, build and execute campaigns that favorably impact the behavior of these individuals. The first task, identifying the market segments, requires significant data about the prospective customers and their buying behaviors. In theory, the more data the better. In practice, however, massive data stores often impede marketers who struggle to sift through the minutiae to find the nuggets of valuable information\ldots}, } @techreport{ABS1999, author = {Rakesh Agrawal and Roberto Bayardo and Ramakrishnan Srikant}, title = {Athena: Mining-based Interactive Management of Text Databases}, type = {IBM Research Report}, number = {RJ10153}, institution = {IBM Almaden Research Center}, address = {650 Harry Road, K55/B1 San Jose, CA 95120, USA}, month = jul, year = {1999}, url = {http://www.almaden.ibm.com/cs/people/srikant/papers/edbt00\_rj.pdf}, abstract = {We describe Athena: a system for creating, exploiting, and maintaining a hierarchy of textual documents through interactive mining-based operations. Requirements of any such system include speed and minimal end-user effort. Athena satisfies these requirements through linear-time classification and clustering engines which are applied interactively to speed the development of accurate models. Naive Bayes classifiers are recognized to be among the best for classifying text. We show that our specialization of the Naive Bayes classifier is considerably more accurate (7 to 29\% absolute increase in accuracy) than a standard implementation. Our enhancements include using Lidstone's law of succession instead of Laplace's law, under-weighting long documents, and over-weighting author and subject. We also present a new interactive clustering algorithm, C-Evolve, for topic discovery. C-Evolve first finds highly accurate cluster digests (partial clusters), gets user feedback to merge and correct these digests, and then uses the classification algorithm to complete the partitioning of the data. By allowing this interactivity in the clustering process, C-Evolve achieves considerably higher clustering accuracy (10 to 20\% absolute increase in our experiments) than the popular K-Means and agglomerative clustering methods.}, } @inproceedings{ABS2000, author = {Rakesh Agrawal and Roberto Bayardo and Ramakrishnan Srikant}, title = {Athena: Mining-based Interactive Management of Text Databases}, booktitle = {Proceedings of the Seventh International Conference on Extending Database Technology (EDBT00)}, address = {Konstanz, Germany}, month = {20--23~} # mar, year = {2000}, pages = {365--379}, url = {http://www.almaden.ibm.com/software/quest/Publications/papers/edbt00.pdf}, abstract = {We describe Athena: a system for creating, exploiting, and maintaining a hierarchy of textual documents through interactive mining-based operations. Requirements of any such system include speed and minimal end-user effort. Athena satisfies these requirements through linear-time classification and clustering engines which are applied interactively to speed the development of accurate models. Naive Bayes classifiers are recognized to be among the best for classifying text. We show that our specialization of the Naive Bayes classifier is considerably more accurate (7 to 29\% absolute increase in accuracy) than a standard implementation. Our enhancements include using Lidstone's law of succession instead of Laplace's law, under-weighting long documents, and over-weighting author and subject. We also present a new interactive clustering algorithm, C-Evolve, for topic discovery. C-Evolve first finds highly accurate cluster digests (partial clusters), gets user feedback to merge and correct these digests, and then uses the classification algorithm to complete the partitioning of the data. By allowing this interactivity in the clustering process, C-Evolve achieves considerably higher clustering accuracy (10 to 20\% absolute increase in our experiments) than the popular K-Means and agglomerative clustering methods.}, } @mastersthesis{Ves1997, author = {Juha Vesanto}, title = {Data Mining Techniques Based on the Self-Organizing Map}, month = {26~} # may, year = {1997}, school = {Helsinki University of Technology}, address = {Laboratory of Computer and Information Science, P.O. Box 5400, FIN-02015 HUT, Finland}, url = {http://www.cis.hut.fi/projects/ide/publications/html/mastersJV97/}, keywords = {neural network, Self-Organizing Map, data mining, knowledge discovery, pulp and paper industry}, abstract = {Data mining is a part of a larger area of recent research in artificial intelligence and information management: knowledge discovery in databases (KDD). The purpose of KDD is to find new knowledge from databases in which the dimension, complexity or the amount of data has so far been prohibitively large for human observation alone. Data mining refers to the exploratory phase of knowledge discovery. The Self-Organizing Map (SOM) is one of the most popular neural network models. The SOM quantizes the data space formed by the training data and simultaniously performs a topology-preserving projecting of the data space on a regular two-dimensional grid. The SOM also has excellent visualization capabilities including techniques to give an informative picture of the data space, and techniques to compare data vectors or whole data sets with each other. The SOM can also be used for clustering, classification and modeling. The versatile properties of the SOM make it a valuable tool in data mining and knowledge discovery. As part of this work a SOM-based data mining tool was implemented. The methods and tools presented in the work were used to analyze the pulp and paper industry worldwide and the Scandinavian industry in more detail with encouraging results. The analysis of technological data resulted in 20 major types of pulp and paper mills. Regarding Scandinavian industry a hierarchical structure of SOMs was used to combine technological, environmental and economical data. The work has been done in the Laboratory of Computer and Information Science at the Helsinki University of Technology as part of the corporate project Entire in the technology program ``Adaptive and Intelligent Systems Applications''. The project was financed by Jaakko P\"{o}yry Consulting and the Technology Development center of Finland (TEKES).}, } @misc{Dal1999, author = {Patrick W. Daly}, title = {Natural Sciences Citations and References (Author--Year and Numerical Schemes)}, howpublished = {\LaTeX2e package documentation}, month = may # {~28}, year = {1999}, url = {http://www.ctan.org/tex-archive/macros/latex/contrib/natbib/}, note = {(last accessed February 7, 2004)}, } @article{Car2005, author = {Scott Carlson}, title = {Scholars Note `Decay' of Citations to Online References}, journal = {The Chronicle of Higher Education}, month = {18~} # mar, year = {2005}, volume = {51}, number = {28}, pages = {A.30}, abstract = {Michael Bugeja says that when he got his doctorate in English, he studied the difference between ``fair'' and ``foul'' copies of Shakespeare's plays -- a foul copy being rife with inaccuracies. ``That's because the medium of printing was unstable back then,'' says Mr. Bugeja, a professor of journalism and communication at Iowa State University. Now that the Internet is the new unstable publishing medium, he and a colleague have studied how Web links stop working, or ``decay,'' as those sites change addresses or shut down. They focused on links used by scholars in footnotes that cite Web materials. After analyzing more than 1,126 citations that make reference to Web addresses, taken from online versions of five prestigious communication-studies journals, 373 of the links, or 33 percent, were found to be dead. Of the 753 of the links that worked, only 424 pointed to information pertinent to the citation. Mr. Bugeja and Daniela Dimitrova, an assistant professor of communication at Iowa State, looked at footnotes from 2000 to 2003 in Human Communication Research, the Journal of Broadcasting \& Electronic Media, the Journal of Communication, Journalism \& Mass Communication Quarterly, and New Media \& Society. ``The erosion of footnotes,'' Mr. Bugeja says, ``might put us back to a curious situation, wondering whether we have a fair copy of a journal article or a foul copy of a journal article.'' In some journals, the decay rate was particularly high. For example, of the 265 citations in New Media \& Society articles that included links, 167 did not work. Steve Jones, a professor of communication at the University of Illinois at Chicago who is an editor of ``New Media \& Society,'' called the decay of online citations "a real issue" that the journal has begun to examine. He wonders whether copyright law might someday allow scholars to copy and archive online articles that they used as sources. But he says such a solution is ``pie in the sky.'' Anthony T. Grafton, a professor of history at Princeton University who has written a book about footnotes, has read a draft of the study and agrees that citation decay is ``a real problem.'' ``I'm looking at a world in which documentation and verification melt into air,'' he says. He sees this problem growing, as today's students rely more on online sources. ``My students come to college less and less able to negotiate a book landscape and more and more adept at negotiating the Web.'' Mr. Bugeja and Ms. Dimitrova are preparing their findings for publication and are coming up with a list of recommendations to stop the decay of online citations. Their findings and recommendations will be presented at the International Communication Association conference in May.}, }