@inproceedings{DhillonModha2000, author = {Inderjit Dhillon and Dharmendra Modha}, title = {A Data-Clustering Algorithm on Distributed Memory Multiprocessors}, booktitle = {Revised Papers from Large-Scale Parallel Data Mining, Workshop on Large-Scale Parallel KDD Systems, SIGKDD}, year = {2000}, isbn = {3-540-67194-3}, pages = {245--260}, publisher = {Springer-Verlag}, address = {London, UK}, } @inproceedings{DataClusteringReview, author = {A K Jain and M N Murty and P. J. Flynn}, title = {Data Clustering: A Review}, booktitle = {ACM computing surveys, Vol.31, no.3, September}, year = {1999} } @Book{MPI1996, author = {Snir, Marc and Otto, Steve and Huss-Lederman, Steven and David, Walker and Dongarra, Jack}, title = {{MPI}: The Complete Reference}, publisher = {MIT Press}, year = 1996, address = {Boston}} @inproceedings{MapReduce2004, author = {Dean, Jeffrey and Ghemawat, Sanjay}, title = {MapReduce: simplified data processing on large clusters}, booktitle = {OSDI'04: Proceedings of the 6th conference on Symposium on Opearting Systems Design \& Implementation}, year = {2004}, pages = {10--10}, location = {San Francisco, CA}, publisher = {USENIX Association}, address = {Berkeley, CA, USA}, file = {hpc/google/mapreduce-osdi04.pdf}, } @inproceedings{MapReduce2008, author = {Dean, Jeffrey and Ghemawat, Sanjay}, title = {MapReduce: simplified data processing on large clusters}, booktitle = {Communications of the ACM, vol. 51, no. 1}, year = {(2008}, pages = {107--113}, } @inproceedings{Dryad2007, author = {Isard, Michael and Budiu, Mihai and Yu, Yuan and Birrell, Andrew and Fetterly, Dennis}, title = {Dryad: distributed data-parallel programs from sequential building blocks}, booktitle = {EuroSys '07: Proceedings of the 2nd ACM SIGOPS/EuroSys European Conference on Computer Systems 2007}, year = {2007}, isbn = {978-1-59593-636-3}, pages = {59--72}, location = {Lisbon, Portugal}, doi = {http://doi.acm.org/10.1145/1272996.1273005}, publisher = {ACM}, address = {New York, NY, USA}, file={hpc/dryad/dryad-eurosys07.pdf}, } @inproceedings{Graphlab2010, title = {GraphLab: A New Parallel Framework for Machine Learning}, author = {Yucheng Low and Joseph Gonzalez and Aapo Kyrola and Danny Bickson and Carlos Guestrin and Joseph M. Hellerstein}, booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)}, month = {July}, year = {2010}, address = {Catalina Island, California}, wwwfilebase = {uai2010-low-gonzalez-kyrola-bickson-guestrin-hellerstein}, wwwtopic = {Parallel Learning}, } @TechReport{CloudMapReduce2010, author = {Liu, Huan and Orban, Dan}, title = {Cloud MapReduce: a MapReduce Implementation on top of a Cloud Operating System}, institution = {Accenture Technology Labs}, year = 2009, type = {Technical report}, note = {\url{http://code.google.com/p/cloudmapreduce/}}} @inproceedings{DhillonModha2000, author = {Dhillon, Inderjit S. and Modha, Dharmendra S.}, title = {A Data-Clustering Algorithm on Distributed Memory Multiprocessors}, booktitle = {Revised Papers from Large-Scale Parallel Data Mining, Workshop on Large-Scale Parallel KDD Systems, SIGKDD}, year = {2000}, isbn = {3-540-67194-3}, pages = {245--260}, publisher = {Springer-Verlag}, address = {London, UK}, } @inproceedings{MapReduce2004, author = {Dean, Jeffrey and Ghemawat, Sanjay}, title = {MapReduce: simplified data processing on large clusters}, booktitle = {OSDI'04: Proceedings of the 6th conference on Symposium on Opearting Systems Design \& Implementation}, year = {2004}, pages = {10--10}, location = {San Francisco, CA}, publisher = {USENIX Association}, address = {Berkeley, CA, USA}, file = {hpc/google/mapreduce-osdi04.pdf}, } @inproceedings{Graphlab2010, title = {GraphLab: A New Parallel Framework for Machine Learning}, author = {Yucheng Low and Joseph Gonzalez and Aapo Kyrola and Danny Bickson and Carlos Guestrin and Joseph M. Hellerstein}, booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)}, month = {July}, year = {2010}, address = {Catalina Island, California}, wwwfilebase = {uai2010-low-gonzalez-kyrola-bickson-guestrin-hellerstein}, wwwtopic = {Parallel Learning}, } @TechReport{CloudMapReduce2010, author = {Liu, Huan and Orban, Dan}, title = {Cloud MapReduce: a MapReduce Implementation on top of a Cloud Operating System}, institution = {Accenture Technology Labs}, year = 2009, type = {Technical report}, note = {\url{http://code.google.com/p/cloudmapreduce/}}} @Article{Dhillon, title = {A Data-Clustering Algorithm on Distributed Memory Multiprocessors}, author = {Inderjit S. Dhillon and Dharmendra S. Modha}, journal = {Large-scale Parallel KDD Systems Workshop, ACM SIGKDD}, year = {1999}, month = {August}, note* = {document "fondateur" du KMeans distribué via MPI} } @Article{ParallelMiningAssociationRules, title = {Parallel Mining of association rules: Design, implementation, and experience}, author = {R. Agrawal and J. C. Shafer}, journal = {IEEE Trans. Knowledge and Data Eng., 8(6):962-969}, year = {1996} } @Article{ScalableParallelClassifier, title = {A scalable parallel classifier for data mining}, author = {J. C. Shafer, R. Agrawal, and M. Mehta}, journal = {Proc. 22nd International Conference on VLDB, Mumbai, India}, year = {1996} } @Article{MapReduceForMachineLearning, title = {MapReduce For Machine Learning on Multicore}, author = {Cheng-Tao Chu, Sang Kyun Kim, Yi-An Lin, YuanYuan Yu, Gary Bradski, Andrew Y. Ng, Kunle Olukotun}, journal = {-}, year = {1996}, month = {-}, note* = {document intéressant pour se donner une idée des perfs de scaling sur du 64 processeurs} } @Article{KMeansOnNOWs, title = {Parallel K-Means Clustering Algorithm on NOWs}, author = {Sanpawat Kantabutra and Alva L. Couch}, journal = {NecTec Technical Journal}, year = {2000}, month = {January}, note* = {Only article I read where parallelisation is 1 worker per cluster instead of 1 worker for N/P points} } @Article{Pollard, title = {Strong consistency of K-Means clustering}, author = {David Pollard}, journal = {The Annals of Statistics}, year = {1981}, month = {January}, note* = {Historical article on the consistency of Online K-Means algorithm} } @Article{BiauKMeans, title = {On the performance of clustering in Hilbert spaces}, author = {Gérard Biau, Luc Devroye, Gábor Lugosi}, journal = {?}, year = {-}, month = {-}, note* = {K-Means theoretical performances} } @Article{KMeansIsNPhard, title = {The Planar k-means Problem is NP-hard}, author = {Meena Mahajan, Prajakta Nimbhorkar, Kasturi Varadarajan}, journal = {?}, year = {-}, month = {-}, note* = {2D K-Means is NP-Hard} } @INPROCEEDINGS{Hennig99, author = {C. Hennig}, title = {Models And Methods For Clusterwise Linear Regression}, booktitle = {Proceedings in Computational Statistics}, year = {1999}, pages = {3--0}, publisher = {Springer} } @Article{KMeansJoshi, title = {Parallel K-means Algorithm on Distributed Memory Multiprocessors}, author = {Manasi N.Joshi}, journal = {?}, year = {2003}, month = {spring}, note* = {un papier sur un K-Means distribué} } @Article{ParaKMeans, title = {ParaKMeans : implementation of a parallelised K-Means algorithm suitable for laboratory use}, author = {Piotr Kraj, Ashok Sharma, Nikhil Garge, Robert Podolsky, Richard A McIndoe}, journal = {BMC BioInformatics}, year = {2008}, month = {april}, note* = {ParaKMeans en C#} } @Article{KMeansParallelInC, title = {Parallel K-Means Data Clustering}, author = {Liao W}, journal = {}, year = {2005}, month = {}, note* = {Code en C} } @MISC{HPS_1, author = {On Ibm Sp (draft and Gang Cheng and Marek Podgorny}, title = {The High Performance Switch and Programming Interfaces on IBM SP2}, year = {1995} } @Article{HPS_2, author = {Vasilios Georgitsis, John Sobolewski}, title = {Performance of MPL and MPICH on the SP2 System}, } @MISC{MPI_performance, author = {Torsten Hoefler and Rajeev Thakur and Jesper Larsson Träff}, title = {Toward Performance Models of MPI Implementations for Understanding Application Scaling Issues}, year = {} } @INPROCEEDINGS{MPI_performance_measurements, author = {William Gropp and Ewing Lusk}, title = {Reproducible Measurements of MPI Performance Characteristics}, booktitle = {}, year = {1999}, pages = {11--18}, publisher = {Springer-Verlag} } @misc{SortBenchmark, title = {Sort Benchmark Home Page}, author = {}, note = {\url{http://sortbenchmark.org/}}, } @misc{TeraSortWinnerYahoo, title = {TeraByte Sort on Apache Hadoop}, author = {Owen O Malley}, note = {\url{http://www.hpl.hp.com/hosted/sortbenchmark/YahooHadoop.pdf}} } @misc{AzureStorageResources, title = {Azure Storage Resources}, author = {}, note = {\url{http://blogs.msdn.com/b/windowsazurestorage/archive/2010/03/28/windows-azure-storage-resources.aspx}} } @misc{KMeansSlow, title = {How Slow is the k-Means Method?}, author = {David Arthur and Sergei Vassilvitskii}, } @ARTICLE{PVM, author = {V. S. Sunderam}, title = {PVM: A Framework for Parallel Distributed Computing}, journal = {Concurrency: Practice and Experience}, year = {1990}, volume = {2}, pages = {315--339} } @misc{AzureScope, title = {Azure Scope}, author = {}, note = {\url{http://azurescope.cloudapp.net/}} } @misc{WebSiteMonitoring, title = {WebSiteMonitoring}, author = {}, note = {\url{http://www.website-monitoring.com/}} } @techreport{Commodity_Grid_With_Amazon, title = {Commodity grid computing with Amazon S3 and EC2}, author = {Simson Garfinkel}, institution = {Harvard University} } @techreport{The_Impact_Of_Virtualization, title = {The Impact of Virtualization on Network Performance of Amazon EC2 Data Center}, author = {Guohui Wang, T. S. Eugene Ng}, institution = {Dept. of Computer Science, Rice University}, note* = {technical paper about benchmarking UDP and TCP layers performance in communication, and evaluating of Virtualization is impacting theses layers.} } @INPROCEEDINGS{Lin09bruteforce, author = {Jimmy Lin}, title = {Brute force and indexed approaches to pairwise document similarity comparisons with mapreduce}, booktitle = {In Proceedings of the 32nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2009}, year = {2009}, pages = {155--162}, note* = { Bio-Statistics research article, on queries run on a 240 machines cluster using Hadoop} } @TECHREPORT{Saini96nasparallel, author = {Subhash Saini and David H. Bailey and Sgi Origin}, title = {NAS Parallel Benchmark (Version 1.0) Results 11-96}, institution = {}, year = {1996} } @Article {BridgingtheGap, title = {Bridging the Gap between the Cloud and an eScience Application Platform}, author = {Yogesh Simmhan, Catharine van Ingen, Girish Subramanian, Jie Li}, note* = {Investigates different issues of porting an app into the cloud. Mostly Azure oriented.} } @Article {Eucalyptus, title = {The Eucalyptus Open-source Cloud-computing System}, author = {Daniel Nurmi, Rich Wolski, Chris Grzegorczyk Graziano Obertelli, Sunil Soman, Lamia Youseff, Dmitrii Zagorodnov}, note* = {The Eucalyptus Open-Source Cloud-Computing System is an opensource software framework for cloud computing that implements Infrastructure as a Service (IaaS). Describes the basic principles of the EUCALYPTUS design, and discuss architectural trade-offs they made}, } @Article{StragglerIssuesInMapReduce, title = {The Curse of Zipf and Limits to Parallelization: A Look at the Stragglers Problem in MapReduce}, author = {Jimmy Lin}, note* = {Discussion on the straggler issue, good description of why is there an issue, etc... Very naive solution but the interest is explaining rather than proposing a solution} } @techreport{BenchmarkingAmazonEC2, title = {Benchmarking Amazon EC2 for high-performance scientific computing}, author = {Edward Walker}, note* = {EC2 agains a HPC cluster : Abe. Performance in term of runtime, bandwidth, latency etc...} } @Article{Early_Observations_On_Azure, title = {Early Observations on the Performance of Windows Azure}, author = {Zach Hill, Jie Li, Ming Mao, Arkaitz Ruiz-Alvarez, and Marty Humphrey}, note* = {benchmark on Azure performances. Good graphics about bandwidth limitations, good explanations of VM boot latencies, etc...} } @misc{Azure_Pricing, title = {Azure Pricing}, author = {}, note = {\url{http://www.microsoft.com/windowsazure/pricing/}} } @misc{Azure SLA, title = {Azure Service Level Agreement}, note = {\url{http://www.microsoft.com/windowsazure/sla/}} } @Article{CAP_Theorem, author = {Gilbert, Seth and Lynch, Nancy}, title = {Brewer's conjecture and the feasibility of consistent, available, partition-tolerant web services}, journal = {SIGACT News}, volume = {33}, number = {2}, year = {2002}, issn = {0163-5700}, pages = {51--59}, doi = {http://doi.acm.org/10.1145/564585.564601}, publisher = {ACM}, address = {New York, NY, USA}, note*={Demonstration of the CAP Theorem} } @Article{BenchmarkingCloudServices, title = {Benchmarking Cloud Serving Systems with YCSB}, author = {Brian F. Cooper, Adam Silberstein, Erwin Tam, Raghu Ramakrishnan, Russell Sears}, note*={excellent description of different tradeoffs in storage design} } @misc{CLAP, title = {Problems with CAP, and Yahoo's little known NoSQL System}, author = {Daniel Abadi}, note = {\url{http://dbmsmusings.blogspot.com/2010/04/problems-with-cap-and-yahoos-little.html} read the 28/03/2012}, } @Article{BASE, title = {BASE : an ACID alternative}, author = {Dan Pritchett} } @misc{NoSQL, title = {http://nosql-database.org/}, note = {\url{http://nosql-database.org/ censored NoSQL databases}} } @Article{Eventually_Consistent, title = {Eventually Consistent}, author = {Werner Wogels} } @misc{Slaying_Relational_Dragons, title = {Slaying Relational Dragons}, author = {Ayende}, journal = {http://ayende.com/Blog/archive/2010/02/22/slaying-relational-dragons.aspx} } @Article{AboveTheCloud, title = {Above the Clouds: A Berkeley View of Cloud Computing}, author = {Michael Armbrust, Armando Fox, Rean Griffith, Anthony D. Joseph, Randy Katz, Andy Konwinski, Gunho Lee, David Patterson, Ariel Rabkin, Ion Stoica, and Matei Zaharia} } @Article{HighPerformanceComputingWithClouds, title = {High Performance Computing with Clouds}, author = {Raihan Masud} } @inproceedings{TOP500, author = {Napper, Jeffrey and Bientinesi, Paolo}, title = {Can cloud computing reach the top500?}, booktitle = {UCHPC-MAW \'09: Proceedings of the combined workshops on UnConventional high performance computing workshop plus memory access workshop}, year = {2009}, isbn = {978-1-60558-557-4}, pages = {17--20}, location = {Ischia, Italy}, doi = {http://doi.acm.org/10.1145/1531666.1531671}, publisher = {ACM}, address = {New York, NY, USA}, } @Inproceedings{lenk2009witcaamotcl, author = {Alexander Lenk and Markus Klems and Jens Nimis and Stefan Tai and Thomas Sandholm}, booktitle = {ICSE Workshop on Software Engineering Challenges of Cloud Computing, 2009. CLOUD 09.}, month = {Mai}, publisher = {IEEE Press}, title = {What's inside the Cloud? An Architectural Map of the Cloud Landscape}, year = {2009}, } @inproceedings{piglatin, author = {Olston, Christopher and Reed, Benjamin and Srivastava, Utkarsh and Kumar, Ravi and Tomkins, Andrew}, title = {Pig latin: a not-so-foreign language for data processing}, booktitle = {Proceedings of the 2008 ACM SIGMOD international conference on Management of data}, series = {SIGMOD '08}, year = {2008}, isbn = {978-1-60558-102-6}, location = {Vancouver, Canada}, pages = {1099--1110}, numpages = {12}, url = {http://doi.acm.org/10.1145/1376616.1376726}, doi = {http://doi.acm.org/10.1145/1376616.1376726}, acmid = {1376726}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {dataflow language, pig latin}, } @INPROCEEDINGS{bigtable, author = {Fay Chang and Jeffrey Dean and Sanjay Ghemawat and Wilson C. Hsieh and Deborah A. Wallach and Mike Burrows and Tushar Chandra and Andrew Fikes and Robert E. Gruber}, title = {Bigtable: A distributed storage system for structured data}, booktitle = {IN PROCEEDINGS OF THE 7TH CONFERENCE ON USENIX SYMPOSIUM ON OPERATING SYSTEMS DESIGN AND IMPLEMENTATION - VOLUME 7}, year = {2006}, pages = {205--218}, publisher = {} } @MISC{GoogleFileSystem, author = {Sanjay Ghemawat Howard and Howard Gobioff and Shun-tak Leung}, title = {The Google File System}, year = {2003} } @Book{Hadoop, author = {Tom White}, title = {Hadoop : The Definitive Guide}, publisher = {O'Reilly}, year = Mai 2009} @MISC{AMP, author = {Maurice Herlihy and Nir Shavit}, title = {The Art of Multiprocessor Programming}, year = {2008} } @ARTICLE{Herlihy95scalableconcurrent, author = {Maurice Herlihy and Beng-hong Lim and Nir Shavit}, title = {Scalable Concurrent Counting}, journal = {ACM Transactions on Computer Systems}, year = {1995}, volume = {13}, pages = {343--364} } @ARTICLE{HotSpot, author = {P.C Yew, N.F. Tzeng and D.H. Lawrie}, title = {Distributing hot-spot addressing in large-scale multiprocessors}, journal = {IEEE Transactions on Computers}, year = {April 1987}, pages = {388--395} } @INPROCEEDINGS{Goodman89efficientsynchronization, author = {James R. Goodman and Mary K. Vernon and Philip J. Wwst}, title = {Efficient Synchronization Primitives for large-scale cache-coherent multiprocessors}, booktitle = {}, year = {1989}, pages = {64--75} } @INPROCEEDINGS{BOINC, author = {David P. Anderson}, title = {Boinc: A system for public-resource computing and storage}, booktitle = {5th IEEE/ACM International Workshop on Grid Computing}, year = {2004}, pages = {4--10} } @ARTICLE{Condor, author = {Douglas Thain and Todd Tannenbaum and Miron Livny}, title = {Distributed Computing in Practice: The Condor Experience}, journal = {Concurrency and Computation: Practice and Experience}, year = {2005}, volume = {17}, pages = {2--4} } @MISC{folding@home, author = {Stefan M. Larson and Christopher D. Snow and Michael Shirts and Vijay S. P and Vijay S. Pande}, title = {Folding@Home and Genome@Home: Using distributed computing to tackle previously intractable problems in computational biology}, year = {} } @INPROCEEDINGS{freenet, author = {Ian Clarke and Oskar Sandberg and Brandon Wiley and Theodore W. Hong}, title = {Freenet: A Distributed Anonymous Information Storage and Retrieval System}, booktitle = {INTERNATIONAL WORKSHOP ON DESIGNING PRIVACY ENHANCING TECHNOLOGIES: DESIGN ISSUES IN ANONYMITY AND UNOBSERVABILITY}, year = {2001}, pages = {46--66}, publisher = {Springer-Verlag New York, Inc.} } @MISC{SCOPE, author = {R.Chaiken, B. Jenkins, P. Larson, B. Ramsey, D. Shakib, S. Weaver, J. Zhou}, title = {SCOPE Easy and Efficient Parallel Processing of Massive Data Sets}, year={} } @INPROCEEDINGS{sawzall, author = {Rob Pike and Sean Dorward and Robert Griesemer and Sean Quinlan and Google Inc}, title = {Interpreting the Data: Parallel Analysis with Sawzall}, booktitle = {Scientific Programming Journal, Special Issue on Grids and Worldwide Computing Programming Models and Infrastructure}, year = {}, pages = {227--298} } @MISC{DryadLINQ, author = {Yuan Yu and Michael Isard and Dennis Fetterly and Mihai Budiu and Úlfar Erlingsson and Pradeep Kumar and Gunda Jon Currey}, title = {DryadLINQ: A System for General-Purpose Distributed Data-Parallel Computing Using a High-Level Language}, year = {} } @MISC{LokadCloud, title = {Lokad-Cloud}, note = {\url{http://code.google.com/p/lokad-cloud/}} } @MISC{LokadCQRS, title = {Lokad-CQRS}, note = {\url{http://lokad.github.com/lokad-cqrs/}} } @techreport{ARM1, Author = {Armbrust, Michael and Fox, Armando and Griffith, Rean and Joseph, Anthony D. and Katz, Randy H. and Konwinski, Andrew and Lee, Gunho and Patterson, David A. and Rabkin, Ariel and Stoica, Ion and Zaharia, Matei}, Title = {Above the Clouds: A Berkeley View of Cloud Computing}, Institution = {EECS Department, University of California, Berkeley}, Year = {2009}, Month = {Feb}, URL = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2009/EECS-2009-28.html}, Number = {UCB/EECS-2009-28}, } @MISC{datacenterConsumption, Author={Jonathan Koomey}, Year = {2008}, Title = {Worldwide electricity used in data centers}, Booktitle = {Environmental Research Letters, vol. 3, no 034008, September 23} } @MISC{CloudCost, author = {Albert Greenberg and James Hamilton and David A. Maltz and Parveen Patel}, title = {The Cost of a Cloud: Research Problems in Data Center Networks}, year = {} } @MISC{CloudAutomation, author = {M. Isard}, title = {Autopilot: Automatic datra center management}, book = {Operating Systems Review, 41(2)} year = {2007} } @TECHREPORT{KMeans_Initialization, author = {Peterson, A. D. and Ghosh, A. P. and Maitra, R.}, title = {A systematic evaluation of different methods for initializing the $k$-means clustering algorithm}, year = {2010} } @MISC{Helland, Author={Pat Helland}, Title = {Life beyond Distributed Transactions: an Apostate's Opinion}, } @INBOOK{CloudDP, author/editor= {Jinquan Dai and Bo Huang}, title = {New Frontiers in Information and Software as Services} , chapter/pages = {Design Patterns for Cloud Services}, publisher = {Lecture Notes in Business Information Processing, 2011, Volume 74, Part 1, p31-56 }, year = {2011} } Lecture Notes in Business Information Processing, 2011, Volume 74, Part 1, 31-56, DOI: 10.1007/978-3-642-19294-4_2 @MISC{Drepper, author = {Ulrich Drepper}, title = {What Every Programmer Should Know About Memory}, year = {2007} } @INPROCEEDINGS{TwoPhaseCommitProtocol, author = {Yoav Raz}, title = {The Dynamic Two Phase Commitment (D2PC) protocol}, booktitle = {Database Theory - ICDT '95, Lecture Notes in Computer Science, Volume 893 Springer, ISBN 978-3-540-58907-5}, year = {1995}, pages = {162-176}, } @MISC{CloudConsistency, author = {Hiroshi Wada, Alan Fekete, Liang Zhao, Kevin Lee, Anna Liu}, title = {Data Consistency Properties and the Trade-offs in Commercial Cloud Storages: the Consumers' Perspective}, year = {2007} } @Proceedings{DatacentersDesign, author = {Albert Greenberg et al.}, title = {VL2: A Scalable and Flexible Data Center Network}, booktitle = {Communications of the ACM, vol 54, no. 3}, year = {2011}, pages = {95-104} } @MISC{grid5000, author = {Stephane Lanteri and Julien Leduc and Nouredine Melab and Guillaume Mornet and Raymond Namyst and Benjamin Quetier and Olivier Richard}, title = {Grid5000: a large scale and highly reconfigurable Grid experimental testbed}, year = {} } @inproceedings{AzureStorage, author = {Calder, Brad and Wang, Ju and Ogus, Aaron and Nilakantan, Niranjan and Skjolsvold, Arild and McKelvie, Sam and Xu, Yikang and Srivastav, Shashwat and Wu, Jiesheng and Simitci, Huseyin and Haridas, Jaidev and Uddaraju, Chakravarthy and Khatri, Hemal and Edwards, Andrew and Bedekar, Vaman and Mainali, Shane and Abbasi, Rafay and Agarwal, Arpit and Haq, Mian Fahim ul and Haq, Muhammad Ikram ul and Bhardwaj, Deepali and Dayanand, Sowmya and Adusumilli, Anitha and McNett, Marvin and Sankaran, Sriram and Manivannan, Kavitha and Rigas, Leonidas}, title = {Windows Azure Storage: a highly available cloud storage service with strong consistency}, booktitle = {Proceedings of the Twenty-Third ACM Symposium on Operating Systems Principles}, series = {SOSP '11}, year = {2011}, isbn = {978-1-4503-0977-6}, location = {Cascais, Portugal}, pages = {143--157}, numpages = {15}, url = {http://doi.acm.org/10.1145/2043556.2043571}, doi = {http://doi.acm.org/10.1145/2043556.2043571}, acmid = {2043571}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {Windows Azure, cloud storage, distributed storage systems}, } @INPROCEEDINGS{BottouBengioKMeans, author = {Léon Bottou and Yoshua Bengio}, title = {Convergence Properties of the K-Means Algorithms}, booktitle = {Advances in Neural Information Processing Systems 7}, year = {1995}, pages = {585--592}, publisher = {MIT Press} } @INPROCEEDINGS{SlowLearners, author = {Martin Zinkevich, Alex Smola, and John Langford}, title = {Slow learners are fast}, booktitle = {Advances in Neural Information Processing Systems 22}, year = {2009}, pages = {2331-2339}, } % %@INPROCEEDINGS{DekelShamir, % author = {Ofer Dekel, Ran Gilad-Bachrach, Ohad Shamir, and Lin Xiao}, % title = {Optimal distributed online prediction using mini-batches}, % booktitle = {Advances in Neural Information Processing Systems 22}, % year = {2009}, % pages = {2331-2339}, %} @TECHREPORT{PET1, author = {Peterson, A. D. and Ghosh, A. P. and Maitra, R.}, title = {A systematic evaluation of different methods for initializing the $k$-means clustering algorithm}, year = {2010} } @book{MIR1, author = {Mirkin, B.}, title = {Clustering for data mining: a data recovery approach}, year = {2005}, publisher = {Chapman \& Hall/CRC}, } @INPROCEEDINGS{BRA1, author = {Bradley, P. S. and Fayyad, U. M.}, title = {Refining initial points for $k$-means clustering}, booktitle = {In Proceedings of the Fifteenth International Conference on Machine Learning}, year = {1998}, } @article{MIL1, author = {Milligan, G. W. and Isaac, P. D.}, title = {The validation of four ultrametric clustering algorithms}, journal = {Pattern Recognition}, volume = {12}, pages = {41--50}, year = {1980}, } @article{DekelShamir, author = {Ofer Dekel, Ran Gilad-Bachrach, Ohad Shamir, and Lin Xiao}, title = {Optimal Distributed Online Prediction Using Mini-Batches}, journal = {Journal of Machine Learning Research}, issue_date = {3/1/2012}, volume = {13}, month = mar, year = {2012}, issn = {1532-4435}, pages = {165--202}, numpages = {38}, url = {http://dl.acm.org/citation.cfm?id=2188385.2188391}, acmid = {2188391}, publisher = {JMLR.org}, } @article{Delalleau_Bengio_2007, title={Parallel Stochastic Gradient Descent}, volume={6711}, url={http://link.aip.org/link/PSISDG/v6711/i1/p67110F/s1&Agg=doi}, journal={Proceedings of SPIE}, publisher={Spie}, author={Delalleau, Olivier and Bengio, Yoshua}, year={2007}, pages={67110F--67110F-14} } @INPROCEEDINGS{Zealous, author = {G. Louppe and P. Geurts}, title = {A zealous parallel gradient descent algorithm}, booktitle = {NIPS 2010 Workshop on Learning on Cores, Clusters and Clouds}, year = {2010}, } @misc{CloudSurvey, title = {Cloud Survey}, author = {}, note = {\url{http://assets1.csc.com/newsroom/downloads/CSC_Cloud_Usage_Index_Report.pdf} read the 28/03/2012}, } @misc{CloudInvestment, title = {Microsoft Cloud Investment}, author = {}, note = {\url{http://www.bloomberg.com/news/2011-04-06/microsoft-s-courtois-says-to-spend-90-of-r-d-on-cloud-strategy.html} read the 28/03/2012}, } @misc{NoSQLmeansNoAcid, title = {Problems with acid and how to fix them}, author = {}, note = {\url{http://dbmsmusings.blogspot.com/2010/08/problems-with-acid-and-how-to-fix-them.html} read the 28/03/2012}, } @misc{AmazonBusiness, title = {Amazon Cloud To Break The 1 Billion dollars Barrier?}, author = {}, note = {\url{http://www.crn.com/news/cloud/231002515/amazon-cloud-to-break-the-1-billion-barrier.htm} read the 29/03/2012}, } @INPROCEEDINGS{MathematicsNature, author = {Eugene Wigner}, title = {The Unreasonable Effectiveness of Mathematics in the Natural Sciences}, booktitle = {Communications in Pure and Applied Mathematics vol. 13, No. I February}, year = {1960}, } @article{DataEffectiveness, author = {Alon Halevy and Peter Norvig and Fernando Pereira}, title = {The Unreasonable Effectiveness of Data}, journal ={IEEE Intelligent Systems}, volume = {24}, issn = {1541-1672}, year = {2009}, pages = {8-12}, doi = {http://doi.ieeecomputersociety.org/10.1109/MIS.2009.36}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, } @book{FourthParadigm, author = {Tony Hey, Stewart Tansley, and Kristin Tolle}, title = {The Fourth Paradigm: Data-Intensive Scientific Discovery}, publisher = {Microsoft Research, Redmond, Washington}, year = {2009} } @book{MapReduceForTextMining, author = {Jimmy Lin, Chris Dyer}, title = {Data-intensive text processing with MapReduce}, publisher = {Morgan \& Claypool Publishers}, year = {2010} } @INPROCEEDINGS{BridgingModel, author = {Leslie G. Valiant}, title = {A bridging model for parallel computation}, booktitle = {Communications of the ACM, 33(8) p103-111 }, year = {1990}, } @book{HadoopGuide, author = {Tom White}, title = {Hadoop: The Definitive Guide}, publisher = { O'Reilly, Sebastopol, California}, year = {2009} } @misc{AzurePricingCalculator, title = {Azure Pricing Calculator}, author = {}, note = {\url{http://www.windowsazure.com/en-us/pricing/calculator/advanced/} read the 25/04/2012}, } @misc{AzurePricingDetails, title = {Azure Pricing Details}, author = {}, note = {\url{http://www.windowsazure.com/en-us/pricing/details/} read the 25/04/2012}, } @misc{AsyncAPI, title = {Should I expose synchronous wrappers for asynchronous methods?}, author = {}, note = {\url{http://blogs.msdn.com/b/pfxteam/archive/2012/04/13/10293638.aspx} read the 26/04/2012}, } @misc{AppDomainTrick, title = {AppDomain Trick}, author = {}, note = {\url{http://code.google.com/p/lokad-cloud/wiki/ExceptionHandling} read the 26/04/2012}, } @book{CloudMarinescu, title = {Cloud Computing: Theory and Practice}, author = {Dan C.Marinescu}, booktitle = {Lecture notes of the University of Central Florida, Orlando. p.218} year = {2012} } @misc{MpiEc2, title = {MPI cluster on EC2}, author = {}, note = {\url{http://datawrangling.s3.amazonaws.com/elasticwulf_pycon_talk.pdf} read the 03/05/2012}, } @misc{ShardingCounters, title = {Sharding counters}, author = {Joe Gregorio}, note = {\url{https://developers.google.com/appengine/articles/sharding_counters} read the 17/05/2012}, } @BOOK{GER1, AUTHOR = {Gersho, A. and Gray, R. M.}, TITLE = {Vector quantization and signal compression}, PUBLISHER = {Kluwer}, YEAR = {1992}, } @article {KOH1, AUTHOR = {Kohonen, T.}, TITLE = {Analysis of a simple self-organizing process}, JOURNAL = {Biological Cybernetics}, VOLUME = {44}, YEAR = {1982}, PAGES = {135--140}, } @book{BEN1, author = {Benveniste, A. and M\'{e}tivier, M. and Priouret, P.}, title = {Adaptive algorithms and stochastic approximations}, year = {1990}, publisher = {Springer-Verlag}, } @book{BBL, author = {Ron Bekkerman, Mikhail Bilenko, John Langford}, title = {Scaling up Machine Learning}, year = {2012}, publisher = {Cambridge University Press}, } @INPROCEEDINGS{LOU1, author = {Louppe, G. and Geurts, P.}, title = {A zealous parallel gradient descent algorithm}, booktitle = {NIPS 2010 Workshop on Learning on Cores, Clusters and Clouds}, year = {2010}, } @inproceedings{LAN2, author = {Zinkevich, M. and Weimer, M. and Smola, A. and Li, L.}, Booktitle = {Advances in Neural Information Processing Systems 23}, Title = {Parallelized stochastic gradient descent}, Year = {2010} } @ARTICLE{PAG1, AUTHOR = {Pag\`es, G.}, TITLE = {A space vector quantization for numerical integration}, JOURNAL = {Journal of Applied and Computational Mathematics}, YEAR = {1997}, VOLUME = {89}, PAGES = {1--38}, } @article{BER3, title = {The effect of finite sample size on on-line $k$-means}, journal = {Neurocomputing}, volume = {48}, pages = {511--539}, year = {2002}, author = {Bermejo, S. and Cabestany, J.}, } @incollection{BOT5, author = {Bottou, L. and LeCun, Y.}, title = {Large scale online learning}, booktitle = {Advances in Neural Information Processing Systems 16}, publisher = {MIT Press}, year = {2004}, } @article{BOT6, author = {Bottou, L. and LeCun, Y.}, title = {On-line learning for very large datasets}, journal = {Applied Stochastic Models in Business and Industry}, year = {2005}, volume = {21}, pages = {137-151}, } @BOOK{KUS1, AUTHOR = {Kushner, H. J. and Clark, D. S.}, TITLE = {Stochastic approximation for constrained and unconstrained systems}, PUBLISHER = {Springer-Verlag}, YEAR = {1978}, } @ARTICLE{TSI1, author = {Tsitsiklis, J. and Bertsekas, D. and Athans, M.}, journal = {IEEE Transactions on Automatic Control}, pages = {803-812}, title = {Distributed asynchronous deterministic and stochastic gradient optimization algorithms}, volume = {31}, year = {1986}, } @inproceedings{MacQueen, author = {MacQueen, J. B.}, booktitle = {Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability}, title = {Some methods of classification and analysis of multivariate observations}, year = {1967} } @article{Lloyd, author = {Lloyd, S.}, journal = {IEEE Transactions on Information Theory}, pages = {129--137}, title = {Least squares quantization in PCM}, volume = {28}, year = {2003} } @InProceedings{ROS1, author = {Rossi, Fabrice and Conan-Guez, Brieuc and El Golli, A\"{\i}cha }, title = {Clustering Functional Data with the SOM algorithm}, booktitle = {Proceedings of ESANN 2004}, year = {2004}, } @book{DEB1, author = {de Boor, C.}, publisher = {Springer-Verlag}, title = {A practical guide to splines}, year = {1978} } @Book{GRE, author = {Greub, W. H.}, title = {Linear algebra}, edition = {4th}, publisher = {Springer-Verlag}, year = {1975}, } @article{DEB, author = {de Boor, C.}, title = {On calculating with B-splines}, journal = {Journal of Approximation Theory}, volume = {6}, pages = {50--62}, year = {1972}, } @ARTICLE{PatraDALVQ, author = {{Patra}, B.}, title = "{Convergence of distributed asynchronous learning vector quantization algorithms}", journal = {ArXiv e-prints}, archivePrefix = "arXiv", eprint = {1012.5150}, primaryClass = "math.ST", keywords = {Mathematics - Statistics Theory}, year = 2010, month = dec, adsurl = {http://adsabs.harvard.edu/abs/2010arXiv1012.5150P}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } @inproceedings{AsynchronismGPU, author = {Contassot-Vivier,Sylvain and Jost,Thomas and Vialle,Stéphane}, title = {Impact of asynchronism on GPU accelerated parallel iterative computations}, booktitle = {PARA 2010: State of the Art in Scientific and Parallel Computing}, optpages = {--}, year = {2011}, editor = {Kristján Jónasson}, optvolume = {}, optnumber = {}, series = {LNCS}, publisher = {Springer, Heidelberg}, note = {To be published} } @article{AsynchronousMultiThreads, author = {Bo Hong and Zhengyu He}, title = {An Asynchronous Multithreaded Algorithm for the Maximum Network Flow Problem with Nonblocking Global Relabeling Heuristic}, journal ={IEEE Transactions on Parallel and Distributed Systems}, volume = {22}, issn = {1045-9219}, year = {2011}, pages = {1025-1033}, doi = {http://doi.ieeecomputersociety.org/10.1109/TPDS.2010.156}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, } @article{AsynchronousHeterogeneousClusters, author = {Bahi, J. and Contassot-Vivier, S. and Couturier, R.}, title = {Evaluation of the Asynchronous Iterative Algorithms in the Context of Distant Heterogeneous Clusters}, journal = {Parallel Computing}, volume = {31}, number = {5}, year = {2005}, pages = {439-461} } @ARTICLE{TSI1, author = {Tsitsiklis, J. and Bertsekas, D. and Athans, M.}, journal = {IEEE Transactions on Automatic Control}, pages = {803-812}, title = {Distributed asynchronous deterministic and stochastic gradient optimization algorithms}, volume = {31}, year = {1986}, } @book{FRE1, title={Pro .NET 4 Parallel Programming in C\#}, author={Freeman, A.}, year={2010}, publisher={Apress} } @inproceedings{LooseSynchronization, author = {Albrecht, Jeannie and Tuttle, Christopher and Snoeren, Alex C. and Vahdat, Amin}, title = {Loose synchronization for large-scale networked systems}, booktitle = {Proceedings of the annual conference on USENIX '06 Annual Technical Conference}, series = {ATEC '06}, year = {2006}, location = {Boston, MA}, pages = {28--28}, numpages = {1}, url = {http://dl.acm.org/citation.cfm?id=1267359.1267387}, acmid = {1267387}, publisher = {USENIX Association}, address = {Berkeley, CA, USA}, } @article{ABR, title = {Unsupervised Curve Clustering Using B-Splines}, author = {Abraham, C. and Cornillon, P. A. and Matzner-Lřber, E. and Molinari, N.}, journal = {Scandinavian Journal of Statistics}, volume = {30}, pages = {581--595}, year = {2003}, } @article{BatchVsOnline, title = "The general inefficiency of batch training for gradient descent learning", journal = "Neural Networks", volume = "16", number = "10", pages = "1429 - 1451", year = "2003", note = "", issn = "0893-6080", doi = "10.1016/S0893-6080(03)00138-2", url = "http://www.sciencedirect.com/science/article/pii/S0893608003001382", author = "D.Randall Wilson and Tony R. Martinez", } @MISC{BatchVsOnline2, author = {J.C. Fort, M. Cottrell, P. Letremy}, title = {Stochastic On-Line Algorithm versus Batch Algorithm for Quantization and Self Organizing Maps}, year = {} }