@Article{Dhillon, title = {A Data-Clustering Algorithm on Distributed Memory Multiprocessors}, author = {Inderjit S. Dhillon and Dharmendra S. Modha}, journal = {Large-scale Parallel KDD Systems Workshop, ACM SIGKDD}, year = {1999}, month = {August}, note* = {document "fondateur" du KMeans distribué via MPI} } @Article{MapReduceForMachineLearning, title = {MapReduce For Machine Learning on Multicore}, author = {Cheng-Tao Chu, Sang Kyun Kim, Yi-An Lin, YuanYuan Yu, Gary Bradski, Andrew Y. Ng, Kunle Olukotun}, journal = {-}, year = {1996}, month = {-}, note* = {document intéressant pour se donner une idée des perfs de scaling sur du 64 processeurs} } @Article{KMeansOnNOWs, title = {Parallel K-Means Clustering Algorithm on NOWs}, author = {Sanpawat Kantabutra and Alva L. Couch}, journal = {NecTec Technical Journal}, year = {2000}, month = {January}, note* = {Only article I read where parallelisation is 1 worker per cluster instead of 1 worker for N/P points} } @Article{Pollard, title = {Strong consistency of K-Means clustering}, author = {David Pollard}, journal = {The Annals of Statistics}, year = {1981}, month = {January}, note* = {Historical article on the consistency of Online K-Means algorithm} } @Article{BiauKMeans, title = {On the performance of clustering in Hilbert spaces}, author = {Gérard Biau, Luc Devroye, Gábor Lugosi}, journal = {?}, year = {-}, month = {-}, note* = {K-Means theoretical performances} } @Article{KMeansIsNPhard, title = {The Planar k-means Problem is NP-hard}, author = {Meena Mahajan, Prajakta Nimbhorkar, Kasturi Varadarajan}, journal = {?}, year = {-}, month = {-}, note* = {2D K-Means is NP-Hard} } @Article{KMeansJoshi, title = {Parallel K-means Algorithm on Distributed Memory Multiprocessors}, author = {Manasi N.Joshi}, journal = {?}, year = {2003}, month = {spring}, note* = {un papier sur un K-Means distribué} } @Article{ParaKMeans, title = {ParaKMeans : implementation of a parallelised K-Means algorithm suitable for laboratory use}, author = {Piotr Kraj, Ashok Sharma, Nikhil Garge, Robert Podolsky, Richard A McIndoe}, journal = {BMC BioInformatics}, year = {2008}, month = {april}, note* = {ParaKMeans en C#} } @Article{KMeansParallelInC, title = {Parallel K-Means Data Clustering}, author = {Liao W}, journal = {}, year = {2005}, month = {}, note* = {Code en C} } @MISC{HPS_1, author = {On Ibm Sp (draft and Gang Cheng and Marek Podgorny}, title = {The High Performance Switch and Programming Interfaces on IBM SP2}, year = {1995} } @Article{HPS_2, author = {Vasilios Georgitsis, John Sobolewski}, title = {Performance of MPL and MPICH on the SP2 System}, } @misc{SortBenchmark, title = {Sort Benchmark Home Page}, author = {}, note = {\url{http://sortbenchmark.org/}}, } @misc{TeraSortWinnerYahoo, title = {TeraByte Sort on Apache Hadoop}, author = {Owen O Malley}, note = {\url{http://www.hpl.hp.com/hosted/sortbenchmark/YahooHadoop.pdf}} } @misc{AzureStorageResources, title = {Azure Storage Resources}, author = {}, note = {\url{http://blogs.msdn.com/b/windowsazurestorage/archive/2010/03/28/windows-azure-storage-resources.aspx}} } @misc{AzureScope, title = {Azure Scope}, author = {}, note = {\url{http://azurescope.cloudapp.net/}} } @techreport{Commodity_Grid_With_Amazon, title = {Commodity grid computing with Amazon S3 and EC2}, author = {Simson Garfinkel}, institution = {Harvard University} } @techreport{The_Impact_Of_Virtualization, title = {The Impact of Virtualization on Network Performance of Amazon EC2 Data Center}, author = {Guohui Wang, T. S. Eugene Ng}, institution = {Dept. of Computer Science, Rice University}, note* = {technical paper about benchmarking UDP and TCP layers performance in communication, and evaluating of Virtualization is impacting theses layers.} } @INPROCEEDINGS{Lin09bruteforce, author = {Jimmy Lin}, title = {Brute force and indexed approaches to pairwise document similarity comparisons with mapreduce}, booktitle = {In Proceedings of the 32nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2009}, year = {2009}, pages = {155--162}, note* = { Bio-Statistics research article, on queries run on a 240 machines cluster using Hadoop} } @TECHREPORT{Saini96nasparallel, author = {Subhash Saini and David H. Bailey and Sgi Origin}, title = {NAS Parallel Benchmark (Version 1.0) Results 11-96}, institution = {}, year = {1996} } @Article {BridgingtheGap, title = {Bridging the Gap between the Cloud and an eScience Application Platform}, author = {Yogesh Simmhan, Catharine van Ingen, Girish Subramanian, Jie Li}, note* = {Investigates different issues of porting an app into the cloud. Mostly Azure oriented.} } @Article {Eucalyptus, title = {The Eucalyptus Open-source Cloud-computing System}, author = {Daniel Nurmi, Rich Wolski, Chris Grzegorczyk Graziano Obertelli, Sunil Soman, Lamia Youseff, Dmitrii Zagorodnov}, note* = {The Eucalyptus Open-Source Cloud-Computing System is an opensource software framework for cloud computing that implements Infrastructure as a Service (IaaS). Describes the basic principles of the EUCALYPTUS design, and discuss architectural trade-offs they made}, } @Article = {StragglerIssuesInMapReduce, title = {The Curse of Zipf and Limits to Parallelization: A Look at the Stragglers Problem in MapReduce}, author = {Jimmy Lin}, note* = {Discussion on the straggler issue, good description of why is there an issue, etc... Very naive solution but the interest is explaining rather than proposing a solution} } @techreport = {BenchmarkingAmazonEC2, title = {Benchmarking Amazon EC2 for high-performance scientific computing}, author = {Edward Walker}, note* = {EC2 agains a HPC cluster : Abe. Performance in term of runtime, bandwidth, latency etc...} } @Article = {Early_Observations_On_Azure, title = {Early Observations on the Performance of Windows Azure}, author = {Zach Hill, Jie Li, Ming Mao, Arkaitz Ruiz-Alvarez, and Marty Humphrey}, note* = {benchmark on Azure performances. Good graphics about bandwidth limitations, good explanations of VM boot latencies, etc...} } @misc = {Azure_Pricing, title = {Azure Pricing}, author = {}, note = {\url{http://www.microsoft.com/windowsazure/pricing/}} } @misc = {Azure SLA, title = {Azure Service Level Agreement}, note = {\url{http://www.microsoft.com/windowsazure/sla/}} } @Article{CAP_Theorem, author = {Gilbert, Seth and Lynch, Nancy}, title = {Brewer's conjecture and the feasibility of consistent, available, partition-tolerant web services}, journal = {SIGACT News}, volume = {33}, number = {2}, year = {2002}, issn = {0163-5700}, pages = {51--59}, doi = {http://doi.acm.org/10.1145/564585.564601}, publisher = {ACM}, address = {New York, NY, USA}, note*={Demonstration of the CAP Theorem} } @Article {BenchmarkingCloudServices, title = {Benchmarking Cloud Serving Systems with YCSB}, author = {Brian F. Cooper, Adam Silberstein, Erwin Tam, Raghu Ramakrishnan, Russell Sears}, note*={excellent description of different tradeoffs in storage design} } @misc={CLAP_Presentation, author = {Daniel Abadi}, title = {Problems with CAP, and Yahoo's little known NoSQL System }, journal = {http://dbmsmusings.blogspot.com/2010/04/problems-with-cap-and-yahoos-little.html}, note* = {excellent analyse of what's missing in CAP theorem}, } @Article = {BASE, title = {BASE : an ACID alternative}, author = {Dan Pritchett} } @misc = {NoSQL, title = {http://nosql-database.org/}, note = {\url{http://nosql-database.org/ censored NoSQL databases}} } @Article = {Eventually_Consistent, title = {Eventually Consistent}, author = {Werner Wogels} } @misc = {Slaying_Relational_Dragons, title = {Slaying Relational Dragons}, author = {Ayende}, journal = {http://ayende.com/Blog/archive/2010/02/22/slaying-relational-dragons.aspx} } @Misc = {AzureQueuesConnotBeLarger, title = {Azure queues cannot be larger than 8192 bytes}, author = {Rinat Abdullin} } @Article = {AboveTheCloud, title = {Above the Clouds: A Berkeley View of Cloud Computing}, author = {Michael Armbrust, Armando Fox, Rean Griffith, Anthony D. Joseph, Randy Katz, Andy Konwinski, Gunho Lee, David Patterson, Ariel Rabkin, Ion Stoica, and Matei Zaharia} } @Article = {HighPerformanceComputingWithClouds, title = {High Performance Computing with Clouds}, author = {Raihan Masud} } @inproceedings{TOP500, author = {Napper, Jeffrey and Bientinesi, Paolo}, title = {Can cloud computing reach the top500?}, booktitle = {UCHPC-MAW \'09: Proceedings of the combined workshops on UnConventional high performance computing workshop plus memory access workshop}, year = {2009}, isbn = {978-1-60558-557-4}, pages = {17--20}, location = {Ischia, Italy}, doi = {http://doi.acm.org/10.1145/1531666.1531671}, publisher = {ACM}, address = {New York, NY, USA}, } @Inproceedings{lenk2009witcaamotcl, author = {Alexander Lenk and Markus Klems and Jens Nimis and Stefan Tai and Thomas Sandholm}, booktitle = {ICSE Workshop on Software Engineering Challenges of Cloud Computing, 2009. CLOUD 09.}, month = {Mai}, publisher = {IEEE Press}, title = {What's inside the Cloud? An Architectural Map of the Cloud Landscape}, year = {2009}, }