@techreport{AboveTheCloud, Author = {Armbrust, M. and Fox, A. and Griffith, R. and Joseph, A. D. and Katz, R. and Konwinski, A. and Lee, G. and Patterson, D. and Rabkin, A. and Stoica, I. and Zaharia, M.}, Title = {Above the Clouds: A Berkeley View of Cloud Computing}, Institution = {EECS Department, University of California, Berkeley}, Year = {2009}, Month = {Feb}, URL = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2009/EECS-2009-28.html}, Number = {UCB/EECS-2009-28}, } @article{ABR, title = {Unsupervised Curve Clustering Using {B-Splines}}, author = {Abraham, C. and Cornillon, P. A. and Matzner-Løber, E. and Molinari, N.}, journal = {Scandinavian Journal of Statistics}, volume = {30}, pages = {581--595}, year = {2003}, } @misc{AmazonBusiness, title = {Amazon Cloud To Break The 1 Billion dollars Barrier?}, author = {}, note = {\url{http://www.crn.com/news/cloud/231002515/amazon-cloud-to-break-the-1-billion-barrier.htm} read the 29/03/2012}, } @book{AMP, author = {Herlihy, M. and Shavit, N.}, title = {The Art of Multiprocessor Programming}, year = {2008}, isbn = {0123705916, 9780123705914}, publisher = {Morgan Kaufmann Publishers Inc.}, address = {San Francisco, CA, USA}, } @misc{AppDomainTrick, title = {AppDomain Trick}, author = {}, note = {\url{http://code.google.com/p/lokad-cloud/wiki/ExceptionHandling} read the 26/04/2012}, } @misc{AsyncAPI, title = {Should {I} expose synchronous wrappers for asynchronous methods?}, author = {}, note = {\url{http://blogs.msdn.com/b/pfxteam/archive/2012/04/13/10293638.aspx} read the 26/04/2012}, } @inproceedings{AsynchronismGPU, author = {Contassot-Vivier, S. and Jost, T. and Vialle, S.}, title = {Impact of asynchronism on {GPU} accelerated parallel iterative computations}, booktitle = {PARA 2010: State of the Art in Scientific and Parallel Computing}, year = {2011}, series = {LNCS}, publisher = {Springer, Heidelberg}, } @article{AsynchronousHeterogeneousClusters, author = {Bahi, J. and Contassot-Vivier, S. and Couturier, R.}, title = {Evaluation of the Asynchronous Iterative Algorithms in the Context of Distant Heterogeneous Clusters}, journal = {Parallel Computing}, volume = {31}, number = {5}, year = {2005}, pages = {439-461} } @article{AsynchronousMultiThreads, author = {Hong, B. and He, Z.}, title = {An Asynchronous Multithreaded Algorithm for the Maximum Network Flow Problem with Nonblocking Global Relabeling Heuristic}, journal ={IEEE Transactions on Parallel and Distributed Systems}, volume = {22}, issn = {1045-9219}, year = {2011}, pages = {1025-1033}, doi = {http://doi.ieeecomputersociety.org/10.1109/TPDS.2010.156}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, } @misc{Azure_Pricing, title = {Azure Pricing}, author = {}, note = {\url{http://www.microsoft.com/windowsazure/pricing/}} } @misc{AzurePricingCalculator, title = {Azure Pricing Calculator}, author = {}, note = {\url{http://www.windowsazure.com/en-us/pricing/calculator/advanced/} read the 25/04/2012}, } @misc{AzurePricingDetails, title = {Azure Pricing Details}, author = {}, note = {\url{http://www.windowsazure.com/en-us/pricing/details/} read the 25/04/2012}, } @misc{AzureScope, title = {{Azure Scope}}, author = {}, note = {\url{http://azurescope.cloudapp.net/}} } @misc{AzureSLA, title = {{Azure Service Level Agreement}}, note = {\url{http://www.microsoft.com/windowsazure/sla/}} } @inproceedings{AzureStorage, author = {Calder, B. and Wang, J. and Ogus, A. and Nilakantan, N. and Skjolsvold, A. and McKelvie, S. and Xu, Y. and Srivastav, S. and Wu, J. and Simitci, H. and Haridas, J. and Uddaraju, C. and Khatri, H. and Edwards, A. and Bedekar, V. and Mainali, S. and Abbasi, R. and Agarwal, A. and Haq, M. F. and Haq, M. I. and Bhardwaj, D. and Dayanand, S. and Adusumilli, A. and McNett, M. and Sankaran, S. and Manivannan, K. and Rigas, L.}, title = {{Windows Azure Storage}: a highly available cloud storage service with strong consistency}, booktitle = {Proceedings of the Twenty-Third ACM Symposium on Operating Systems Principles}, series = {SOSP '11}, year = {2011}, isbn = {978-1-4503-0977-6}, location = {Cascais, Portugal}, pages = {143--157}, numpages = {15}, url = {http://doi.acm.org/10.1145/2043556.2043571}, doi = {http://doi.acm.org/10.1145/2043556.2043571}, acmid = {2043571}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {Windows Azure, cloud storage, distributed storage systems}, } @misc{AzureStorageResources, title = {Azure Storage Resources}, author = {}, note = {\url{http://blogs.msdn.com/b/windowsazurestorage/archive/2010/03/28/windows-azure-storage-resources.aspx}} } @article{BASE, author = {Pritchett, D.}, title = {BASE: An Acid Alternative}, journal = {Queue}, issue_date = {May/June 2008}, volume = {6}, number = {3}, month = may, year = {2008}, issn = {1542-7730}, pages = {48--55}, numpages = {8}, url = {http://doi.acm.org/10.1145/1394127.1394128}, doi = {10.1145/1394127.1394128}, acmid = {1394128}, publisher = {ACM}, address = {New York, NY, USA}, } @article{BatchVsOnline, title = "The general inefficiency of batch training for gradient descent learning", journal = "Neural Networks", volume = "16", number = "10", pages = "1429 - 1451", year = "2003", note = "", issn = "0893-6080", doi = "10.1016/S0893-6080(03)00138-2", url = "http://www.sciencedirect.com/science/article/pii/S0893608003001382", author = "D.Randall Wilson and Tony R. Martinez", } @article{BatchVsOnline2, title={Stochastic on-line algorithm versus batch algorithm for quantization and self organizing maps}, volume={00}, number={C}, journal={Neural Networks for Signal Processing XI Proceedings of the 2001 IEEE Signal Processing Society Workshop IEEE Piscataway NJ USA}, publisher={Ieee}, author={Fort, J.C. and Cottrell, M. and Letremy, P.}, year={2001}, pages={43--52} } @book{BBL, author = {Bekkerman, R. and Bilenko, M. and Langford, J.}, title = {Scaling up Machine Learning}, year = {2012}, publisher = {Cambridge University Press}, } @article{DEB, author = {de Boor, C.}, title = {On calculating with B-splines}, journal = {Journal of Approximation Theory}, volume = {6}, pages = {50--62}, year = {1972}, } @book{DEB1, author = {de Boor, C.}, publisher = {Springer-Verlag}, title = {A practical guide to splines}, year = {1978} } @article{BenchmarkingAmazonEC2, author = {Walker, E.}, citeulike-article-id = {6867304}, journal = {LOGIN}, keywords = {cloud}, month = oct, number = {5}, pages = {18--23}, posted-at = {2010-03-17 19:48:19}, priority = {2}, title = {{Benchmarking Amazon EC2 for high-performance scientific computing}}, volume = {33}, year = {2008} } @inproceedings{BenchmarkingCloudServices, author = {Cooper, B. F. and Silberstein, A. and Tam, E. and Ramakrishnan, R. and Sears, R.}, title = {Benchmarking cloud serving systems with {YCSB}}, booktitle = {Proceedings of the 1st ACM symposium on Cloud computing}, series = {SoCC '10}, year = {2010}, isbn = {978-1-4503-0036-0}, location = {Indianapolis, Indiana, USA}, pages = {143--154}, numpages = {12}, url = {http://doi.acm.org/10.1145/1807128.1807152}, doi = {10.1145/1807128.1807152}, acmid = {1807152}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {benchmarking, cloud serving database}, } @book{BEN1, author = {Benveniste, A. and M\'{e}tivier, M. and Priouret, P.}, title = {Adaptive algorithms and stochastic approximations}, year = {1990}, publisher = {Springer-Verlag}, } @article{BER3, title = {The effect of finite sample size on on-line $k$-means}, journal = {Neurocomputing}, volume = {48}, pages = {511--539}, year = {2002}, author = {Bermejo, S. and Cabestany, J.}, } @article{BiauKMeans, author = {G{\'e}rard Biau and Luc Devroye and G{\'a}bor Lugosi}, title = {On the Performance of Clustering in Hilbert Spaces}, journal = {IEEE Transactions on Information Theory}, volume = {54}, number = {2}, year = {2008}, pages = {781-790}, ee = {http://dx.doi.org/10.1109/TIT.2007.913516}, bibsource = {DBLP, http://dblp.uni-trier.de} } @INPROCEEDINGS{bigtable, author = {Chang, F. and Dean, J. and Ghemawat, S. and Hsieh, W. C. and Wallach, D. A. and Burrows, M. and Chandra, T. and Fikes, A. and Gruber, R. E.}, title = {Bigtable: A distributed storage system for structured data}, booktitle = {in proceedings of the 7th conference on usenix symposium on operating systems design and implementation - volume 7}, year = {2006}, pages = {205--218}, publisher = {} } @INPROCEEDINGS{BOINC, author = {Anderson, D. P.}, title = {Boinc: A system for public-resource computing and storage}, booktitle = {5th IEEE/ACM International Workshop on Grid Computing}, year = {2004}, pages = {4--10} } @INPROCEEDINGS{BottouBengioKMeans, author = {Bottou, L. and Bengio, Y.}, title = {Convergence Properties of the K-Means Algorithms}, booktitle = {Advances in Neural Information Processing Systems 7}, year = {1995}, pages = {585--592}, publisher = {MIT Press} } @incollection{BOT5, author = {Bottou, L. and LeCun, Y.}, title = {Large scale online learning}, booktitle = {Advances in Neural Information Processing Systems 16}, publisher = {MIT Press}, year = {2004}, } @article{BOT6, author = {Bottou, L. and LeCun, Y.}, title = {On-line learning for very large datasets}, journal = {Applied Stochastic Models in Business and Industry}, year = {2005}, volume = {21}, pages = {137-151}, } @inbook{BRA1, title={Refining Initial Points for K-Means Clustering}, volume={727}, booktitle={Proc 15th International Conf on Machine Learning}, publisher={Morgan Kaufmann, San Francisco, CA}, author={Bradley, P. S. and Fayyad, U. M.}, year={1998}, pages={91--99} } @article{BridgingModel, title={A bridging model for parallel computation}, volume={33}, number={8}, journal={Communications of the ACM}, publisher={ACM}, author={Valiant, L. G.}, year={1990}, pages={103--111} } @article{BridgingtheGap, title={Bridging the Gap between the Cloud and an eScience Application Platform}, volume={64}, url={http://research.microsoft.com/pubs/118329/Simmhan2010CloudSciencePlatform.pdf}, number={1}, journal={Science}, author={Simmhan, Y. and van Ingen, C. and Subramanian, G. and Li, J.}, year={2003}, pages={56--68} } @Article{CAP_Theorem, author = {Gilbert, S. and Lynch, N.}, title = {Brewer's conjecture and the feasibility of consistent, available, partition-tolerant web services}, journal = {SIGACT News}, volume = {33}, number = {2}, year = {2002}, issn = {0163-5700}, pages = {51--59}, doi = {http://doi.acm.org/10.1145/564585.564601}, publisher = {ACM}, address = {New York, NY, USA}, note*={Demonstration of the CAP Theorem} } @TechReport{CloudMapReduce2010, author = {Liu, H. and Orban, D.}, title = {Cloud MapReduce: a MapReduce Implementation on top of a Cloud Operating System}, institution = {Accenture Technology Labs}, year = 2009, type = {Technical report}, note = {\url{http://code.google.com/p/cloudmapreduce/}} } @book{CloudMarinescu, title = {Cloud Computing: Theory and Practice}, author = {Marinescu, D. C.}, booktitle = {Lecture notes of the University of Central Florida, Orlando. p.218}, year = {2012} } @misc{CLAP, title = {Problems with {CAP}, and {Yahoo's} little known {NoSQL} System}, author = {Abadi, D.}, note = {\url{http://dbmsmusings.blogspot.com/2010/04/problems-with-cap-and-yahoos-little.html} read the 28/03/2012}, } @article{CloudAutomation, author = {Isard, M.}, title = {Autopilot: automatic data center management}, journal = {SIGOPS Oper. Syst. Rev.}, issue_date = {April 2007}, volume = {41}, number = {2}, month = apr, year = {2007}, issn = {0163-5980}, pages = {60--67}, numpages = {8}, url = {http://doi.acm.org/10.1145/1243418.1243426}, doi = {10.1145/1243418.1243426}, acmid = {1243426}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {automatic management, cluster computing}, } @article{CloudConsistency, title={Data Consistency Properties and the Trade-offs in Commercial Cloud Storages : the Consumers'Perspective}, journal={Reading}, author={Wada, H. and Fekete, A. and Zhao, L. and Lee, K. and Liu, A.}, year={2011}, pages={134--143} } @article{CloudCost, author = {Greenberg, A. and Hamilton, J. and Maltz, D. A. and Patel, P.}, title = {The cost of a cloud: research problems in data center networks}, journal = {SIGCOMM Comput. Commun. Rev.}, issue_date = {January 2009}, volume = {39}, number = {1}, month = dec, year = {2008}, issn = {0146-4833}, pages = {68--73}, numpages = {6}, url = {http://doi.acm.org/10.1145/1496091.1496103}, doi = {10.1145/1496091.1496103}, acmid = {1496103}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {cloud-service data centers, costs, network challenges}, } @INBOOK{CloudDP, volume={74}, url={http://www.springerlink.com/index/L311566657T812KW.pdf}, number={880}, title = {Design Patterns for Cloud Services}, journal={New Frontiers in Information and Software as Services}, publisher={Springer Berlin Heidelberg}, author={Dai, J. and Huang, B.}, editor={Agrawal, Divyakant and Candan, K Selçuk and Li, Wen-SyanEditors}, year={2011}, pages={31--56} } @misc{CloudInvestment, title = {Microsoft Cloud Investment}, author = {}, note = {\url{http://www.bloomberg.com/news/2011-04-06/microsoft-s-courtois-says-to-spend-90-of-r-d-on-cloud-strategy.html} read the 28/03/2012}, } @misc{CloudSurvey, title = {Cloud Survey}, author = {}, note = {\url{http://assets1.csc.com/newsroom/downloads/CSC_Cloud_Usage_Index_Report.pdf} read the 28/03/2012}, } @article{Commodity_Grid_With_Amazon, title={Commodity grid computing with amazon s3 and ec2}, volume={32}, journal={Usenix}, publisher={Naval postgraduate school monterey CA dept of computer science}, author={Garfinkel, S.}, year={2007}, pages={7--13} } @ARTICLE{Condor, author = {Thain, D. and Tannenbaum, T. and Livny, M.}, title = {Distributed Computing in Practice: The Condor Experience}, journal = {Concurrency and Computation: Practice and Experience}, year = {2005}, volume = {17}, pages = {2--4} } @article{datacenterConsumption, title={Worldwide electricity used in data centers}, volume={3}, url={http://stacks.iop.org/1748-9326/3/i=3/a=034008?key=crossref.976165ab72937d3bd0a21f91e350c756}, number={3}, journal={Environmental Research Letters}, author={Koomey, J. G.}, year={2008}, pages={034008} } @Proceedings{DatacentersDesign, author = {Greenberg, A. and Hamilton, J. R. and Jain, N. and Kandula, S. and Kim, C and Lahiri, P. and Maltz, D. A. and Patel, P. and Sengupta, S.}, title = {VL2: A Scalable and Flexible Data Center Network}, booktitle = {Communications of the ACM, vol 54, no. 3}, year = {2011}, pages = {95-104} } @inproceedings{DataClusteringReview, author = {Jain, A. K. and Murty, M. N. and Flynn, P. J. }, title = {Data Clustering: A Review}, booktitle = {ACM computing surveys, Vol.31, no.3, September}, year = {1999} } @article{DataEffectiveness, author = {Halevy, A. and Norvig, P. and Pereira, F.}, title = {The Unreasonable Effectiveness of Data}, journal ={IEEE Intelligent Systems}, volume = {24}, issn = {1541-1672}, year = {2009}, pages = {8-12}, doi = {http://doi.ieeecomputersociety.org/10.1109/MIS.2009.36}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, } @article{DekelShamir, author = {Dekel, O. and Gilad-Bachrach, R. and Shamir, O and Xiao, L.}, title = {Optimal Distributed Online Prediction Using Mini-Batches}, journal = {Journal of Machine Learning Research}, issue_date = {3/1/2012}, volume = {13}, month = mar, year = {2012}, issn = {1532-4435}, pages = {165--202}, numpages = {38}, url = {http://dl.acm.org/citation.cfm?id=2188385.2188391}, acmid = {2188391}, publisher = {JMLR.org}, } @article{Delalleau_Bengio_2007, title={Parallel Stochastic Gradient Descent}, volume={6711}, url={http://link.aip.org/link/PSISDG/v6711/i1/p67110F/s1&Agg=doi}, journal={Proceedings of SPIE}, publisher={Spie}, author={Delalleau, O. and Bengio, Y.}, year={2007}, pages={67110F--67110F-14} } @inproceedings{DhillonModha2000, author = {Dhillon, I. S. and Modha, D. S.}, title = {A Data-Clustering Algorithm on Distributed Memory Multiprocessors}, booktitle = {Revised Papers from Large-Scale Parallel Data Mining, Workshop on Large-Scale Parallel KDD Systems, SIGKDD}, year = {2000}, isbn = {3-540-67194-3}, pages = {245--260}, publisher = {Springer-Verlag}, address = {London, UK}, } @misc{Drepper, title={What Every Programmer Should Know About Memory}, author={Drepper, U.}, year={2007}, } @inproceedings{Dryad2007, author = {Isard, M. and Budiu, M. and Yu, Y. and Birrell, A. and Fetterly, D.}, title = {Dryad: distributed data-parallel programs from sequential building blocks}, booktitle = {EuroSys '07: Proceedings of the 2nd ACM SIGOPS/EuroSys European Conference on Computer Systems 2007}, year = {2007}, isbn = {978-1-59593-636-3}, pages = {59--72}, location = {Lisbon, Portugal}, doi = {http://doi.acm.org/10.1145/1272996.1273005}, publisher = {ACM}, address = {New York, NY, USA}, file={hpc/dryad/dryad-eurosys07.pdf}, } @inproceedings{DryadLINQ, author = {Yu, Y. and Isard, M. and Fetterly, D. and Budiu, M. and Erlingsson, U. and Kumar, P. and Currey, G. J.}, title = {DryadLINQ: a system for general-purpose distributed data-parallel computing using a high-level language}, booktitle = {Proceedings of the 8th USENIX conference on Operating systems design and implementation}, series = {OSDI'08}, year = {2008}, location = {San Diego, California}, pages = {1--14}, numpages = {14}, url = {http://dl.acm.org/citation.cfm?id=1855741.1855742}, acmid = {1855742}, publisher = {USENIX Association}, address = {Berkeley, CA, USA}, } @article{Early_Observations_On_Azure, author = {Hill, Z. and Li, J. and Mao, M. and Ruiz-Alvarez, A. and Humphrey, M.}, title = {Early observations on the performance of {Windows Azure}}, journal = {Sci. Program.}, issue_date = {April 2011}, volume = {19}, number = {2-3}, month = apr, year = {2011}, issn = {1058-9244}, pages = {121--132}, numpages = {12}, url = {http://dl.acm.org/citation.cfm?id=2019396.2019401}, acmid = {2019401}, publisher = {IOS Press}, address = {Amsterdam, The Netherlands, The Netherlands}, keywords = {Cloud computing, Windows Azure, eScience, performance}, } @article{Eventually_Consistent, author = {Vogels, W.}, title = {Eventually Consistent}, journal = {Queue}, issue_date = {October 2008}, volume = {6}, number = {6}, month = oct, year = {2008}, issn = {1542-7730}, pages = {14--19}, numpages = {6}, url = {http://doi.acm.org/10.1145/1466443.1466448}, doi = {10.1145/1466443.1466448}, acmid = {1466448}, publisher = {ACM}, address = {New York, NY, USA}, } @inproceedings{Eucalyptus, author = {Nurmi, D. and Wolski, R. and Grzegorczyk, C. and Obertelli, G. and Soman, S. and Youseff, L. and Zagorodnov, D. }, title = {The Eucalyptus Open-Source Cloud-Computing System}, booktitle = {Proceedings of the 2009 9th IEEE/ACM International Symposium on Cluster Computing and the Grid}, series = {CCGRID '09}, year = {2009}, isbn = {978-0-7695-3622-4}, pages = {124--131}, numpages = {8}, url = {http://dx.doi.org/10.1109/CCGRID.2009.93}, doi = {10.1109/CCGRID.2009.93}, acmid = {1577895}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, keywords = {cloud computing, virtualization}, } @article{folding@home, title={Folding@Home and Genome@Home: Using distributed computing to tackle previously intractable problems in computational biology}, url={http://arxiv.org/abs/0901.0866}, journal={Security}, publisher={Horizon Press}, author={Larson, S. M. and Snow, C. D. and Shirts, M. and Pande, V. S.}, editor={Grant, Richard PEditor}, year={2009} } @book{FourthParadigm, author = {Hey, T. and Tansley, S. and Tolle, K.}, title = {The Fourth Paradigm: Data-Intensive Scientific Discovery}, publisher = {Microsoft Research, Redmond, Washington}, year = {2009} } @book{FRE1, title={Pro .NET 4 Parallel Programming in C\#}, author={Freeman, A.}, year={2010}, publisher={Apress} } @INPROCEEDINGS{freenet, author = {Clarke, I. and Sandberg, O. and Wiley, B. and Hong, T. W.}, title = {Freenet: A Distributed Anonymous Information Storage and Retrieval System}, booktitle = {International workshop on designing privacy enhancing technologies: design issues in anonymity and unobservability}, year = {2001}, pages = {46--66}, publisher = {Springer-Verlag New York, Inc.} } @BOOK{GER1, AUTHOR = {Gersho, A. and Gray, R. M.}, TITLE = {Vector quantization and signal compression}, PUBLISHER = {Kluwer}, YEAR = {1992}, } @article{Goodman89efficientsynchronization, author = {Goodman, James R. and Vernon, Mary K. and Woest, Philip J.}, title = {Efficient synchronization primitives for large-scale cache-coherent multiprocessors}, journal = {SIGARCH Comput. Archit. News}, issue_date = {April 1989}, volume = {17}, number = {2}, month = apr, year = {1989}, issn = {0163-5964}, pages = {64--75}, numpages = {12}, url = {http://doi.acm.org/10.1145/68182.68188}, doi = {10.1145/68182.68188}, acmid = {68188}, publisher = {ACM}, address = {New York, NY, USA}, } @article{GoogleFileSystem, author = {Ghemawat, S. and Gobioff, H. and Leung, S.}, title = {The Google file system}, journal = {SIGOPS Oper. Syst. Rev.}, issue_date = {December 2003}, volume = {37}, number = {5}, month = oct, year = {2003}, issn = {0163-5980}, pages = {29--43}, numpages = {15}, url = {http://doi.acm.org/10.1145/1165389.945450}, doi = {10.1145/1165389.945450}, acmid = {945450}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {clustered storage, data storage, fault tolerance, scalability}, } @Book{GRE, author = {Greub, W. H.}, title = {Linear algebra}, edition = {4th}, publisher = {Springer-Verlag}, year = {1975}, } @inproceedings{Graphlab2010, title = {GraphLab: A New Parallel Framework for Machine Learning}, author = {Low, Y. and Gonzalez, J. and Kyrola, A. and Bickson, D. and Guestrin, C. and Hellerstein, J. M.}, booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)}, month = {July}, year = {2010}, address = {Catalina Island, California}, wwwfilebase = {uai2010-low-gonzalez-kyrola-bickson-guestrin-hellerstein}, wwwtopic = {Parallel Learning}, } @inproceedings{grid5000, author = {Cappello, F. and Caron, E. and Dayde, M. and Desprez, F. and Jegou, Y. and Primet, P. and Jeannot, E. and Lanteri, S. and Leduc, J. and Melab, N. and Mornet, G. and Namyst, R. and Quetier, B. and Richard, O.}, title = {Grid'5000: A Large Scale and Highly Reconfigurable Grid Experimental Testbed}, booktitle = {Proceedings of the 6th IEEE/ACM International Workshop on Grid Computing}, series = {GRID '05}, year = {2005}, isbn = {0-7803-9492-5}, pages = {99--106}, numpages = {8}, url = {http://dx.doi.org/10.1109/GRID.2005.1542730}, doi = {10.1109/GRID.2005.1542730}, acmid = {1253484}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, } @book{Hadoop, author = {White, Tom}, title = {Hadoop: The Definitive Guide}, year = {2009}, isbn = {0596521979, 9780596521974}, edition = {1st}, publisher = {O'Reilly Media, Inc.}, } @inproceedings{Helland, author={Helland, P.}, title = {Life beyond Distributed Transactions: an Apostate's Opinion}, booktitle = {{CIDR} 2007, Third Biennial Conference on Innovative Data Systems Research, Asilomar, {CA, USA}, January 7-10, 2007, Online Proceedings}, pages = {132-141}, } @proceedings{DBLP:conf/cidr/2007, title = {CIDR 2007, Third Biennial Conference on Innovative Data Systems Research, Asilomar, CA, USA, January 7-10, 2007, Online Proceedings}, booktitle = {CIDR}, publisher = {www.cidrdb.org}, year = {2007}, bibsource = {DBLP, http://dblp.uni-trier.de} } @INPROCEEDINGS{Hennig99, author = {Hennig, C.}, title = {Models And Methods For Clusterwise Linear Regression}, booktitle = {Proceedings in Computational Statistics}, year = {1999}, pages = {3--0}, publisher = {Springer} } @ARTICLE{Herlihy95scalableconcurrent, author = {Herlihy, M. and Lim, B. H. and Shavit, N.}, title = {Scalable Concurrent Counting}, journal = {ACM Transactions on Computer Systems}, year = {1995}, volume = {13}, pages = {343--364} } @Article{HighPerformanceComputingWithClouds, title = {High Performance Computing with Clouds}, author = {Masud, R.} } @ARTICLE{HotSpot, author = {Yew, P.C. and Tzeng, N.F. and Lawrie, D.H.}, title = {Distributing hot-spot addressing in large-scale multiprocessors}, journal = {IEEE Transactions on Computers}, year = {April 1987}, pages = {388--395} } @article{HPS_1, title={Modeling the Communication and Computation Performance of the {IBM SP2}}, number={April}, journal={Proceedings of the 10th International Parallel Processing Symposium}, author={Abandah, G. A and Davidson, E. S.}, year={1996} } @article{HPS_2, title={Modeling communication overhead: {MPI} and {MPL} performance on the {IBM SP2}}, volume={4}, number={1}, journal={Parallel Distributed Technology Systems Applications IEEE}, author={Zhiwei, X. and Hwang, K.}, year={1996}, pages={9--23} } @inproceedings{KMeansIsNPhard, author = {Mahajan, M. and Nimbhorkar, P. and Varadarajan, K.}, title = {The Planar k-Means Problem is NP-Hard}, booktitle = {Proceedings of the 3rd International Workshop on Algorithms and Computation}, series = {WALCOM '09}, year = {2009}, isbn = {978-3-642-00201-4}, location = {Kolkata, India}, pages = {274--285}, numpages = {12}, url = {http://dx.doi.org/10.1007/978-3-642-00202-1_24}, doi = {10.1007/978-3-642-00202-1_24}, acmid = {1507122}, publisher = {Springer-Verlag}, address = {Berlin, Heidelberg}, } @Techreport{KMeansJoshi, title = {Parallel {K-means} Algorithm on {Distributed Memory Multiprocessors}}, author = {Joshi, M. N.}, year = {2003}, institution = {Computer Science Department University of {Minnesota}, Twin Cities} } @Article{KMeansOnNOWs, title = {Parallel K-Means Clustering Algorithm on NOWs}, author = {Kantabutra, S. and Couch, A. L.}, journal = {NecTec Technical Journal}, year = {2000}, month = {January}, note* = {Only article I read where parallelisation is 1 worker per cluster instead of 1 worker for N/P points} } @Article{KMeansParallelInC, title = {Parallel K-Means Data Clustering}, author = {Liao, W.}, journal = {}, year = {2005}, month = {}, note* = {Code en C} } @article{KMeansSlow, title={How Slow is the k-Means Method?}, journal={Construction}, author={Arthur, D. and Vassilvitskii, S.}, year={2006} } @article {KOH1, AUTHOR = {Kohonen, T.}, TITLE = {Analysis of a simple self-organizing process}, JOURNAL = {Biological Cybernetics}, VOLUME = {44}, YEAR = {1982}, PAGES = {135--140}, } @BOOK{KUS1, AUTHOR = {Kushner, H. J. and Clark, D. S.}, TITLE = {Stochastic approximation for constrained and unconstrained systems}, PUBLISHER = {Springer-Verlag}, YEAR = {1978}, } @inproceedings{LAN2, author = {Zinkevich, M. and Weimer, M. and Smola, A. and Li, L.}, Booktitle = {Advances in Neural Information Processing Systems 23}, Title = {Parallelized stochastic gradient descent}, Year = {2010} } @Inproceedings{lenk2009witcaamotcl, author = {Lenk, A. and Klems, M. and Nimis, J. and Tai, S. and Sandholm, T.}, booktitle = {ICSE Workshop on Software Engineering Challenges of Cloud Computing, 2009. CLOUD 09.}, month = {Mai}, publisher = {IEEE Press}, title = {What's inside the Cloud? An Architectural Map of the Cloud Landscape}, year = {2009}, } @INPROCEEDINGS{Lin09bruteforce, author = {Lin, J.}, title = {Brute force and indexed approaches to pairwise document similarity comparisons with mapreduce}, booktitle = {In Proceedings of the 32nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2009}, year = {2009}, pages = {155--162}, note* = { Bio-Statistics research article, on queries run on a 240 machines cluster using Hadoop} } @article{Lloyd, author = {Lloyd, S.}, journal = {IEEE Transactions on Information Theory}, pages = {129--137}, title = {Least squares quantization in {PCM}}, volume = {28}, year = {1982} } @MISC{LokadCloud, title = {Lokad-Cloud}, note = {\url{http://code.google.com/p/lokad-cloud/}} } @MISC{LokadCQRS, title = {Lokad-CQRS}, note = {\url{http://lokad.github.com/lokad-cqrs/}} } @inproceedings{LooseSynchronization, author = {Albrecht, J. and Tuttle, C. and Snoeren, A. C. and Vahdat, A.}, title = {Loose synchronization for large-scale networked systems}, booktitle = {Proceedings of the annual conference on USENIX '06 Annual Technical Conference}, series = {ATEC '06}, year = {2006}, location = {Boston, MA}, pages = {28--28}, numpages = {1}, url = {http://dl.acm.org/citation.cfm?id=1267359.1267387}, acmid = {1267387}, publisher = {USENIX Association}, address = {Berkeley, CA, USA}, } @INPROCEEDINGS{LOU1, author = {Louppe, G. and Geurts, P.}, title = {A zealous parallel gradient descent algorithm}, booktitle = {NIPS 2010 Workshop on Learning on Cores, Clusters and Clouds}, year = {2010}, } @inproceedings{MacQueen, author = {MacQueen, J. B.}, booktitle = {Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability}, title = {Some methods of classification and analysis of multivariate observations}, year = {1967} } @inproceedings{MapReduce2004, author = {Dean, J. and Ghemawat, S.}, title = {MapReduce: simplified data processing on large clusters}, booktitle = {OSDI'04: Proceedings of the 6th conference on Symposium on Opearting Systems Design \& Implementation}, year = {2004}, pages = {10--10}, location = {San Francisco, CA}, publisher = {USENIX Association}, address = {Berkeley, CA, USA}, file = {hpc/google/mapreduce-osdi04.pdf}, } @inproceedings{MapReduce2008, author = {Dean, J. and Ghemawat, S.}, title = {MapReduce: simplified data processing on large clusters}, booktitle = {Communications of the ACM, vol. 51, no. 1}, year = {2008}, pages = {107--113}, } @inproceedings{MapReduceForMachineLearning, author = {Chu, C. T. and Kim, S. K. and Lin, Y. A. and Yu, Y. and Bradski, G. R. and Ng, A. Y. and Olukotun, K.}, booktitle = {NIPS}, citeulike-article-id = {2308503}, citeulike-linkout-0 = {http://www.cs.stanford.edu/people/ang//papers/nips06-mapreducemulticore.pdf}, citeulike-linkout-1 = {http://dblp.uni-trier.de/rec/bibtex/conf/nips/ChuKLYBNO06}, editor = {Sch\"{o}lkopf, Bernhard and Platt, John C. and Hoffman, Thomas}, keywords = {google, map-reduce}, pages = {281--288}, posted-at = {2008-03-07 03:16:12}, priority = {0}, publisher = {MIT Press}, title = {{Map-Reduce for Machine Learning on Multicore}}, url = {http://www.cs.stanford.edu/people/ang//papers/nips06-mapreducemulticore.pdf}, year = {2006} } @book{MapReduceForTextMining, author = {Lin, J. and Dyer, C.}, title = {Data-intensive text processing with MapReduce}, publisher = {Morgan \& Claypool Publishers}, year = {2010} } @INPROCEEDINGS{MathematicsNature, author = {Wigner, E.}, title = {The Unreasonable Effectiveness of Mathematics in the Natural Sciences}, booktitle = {Communications in Pure and Applied Mathematics vol. 13, No. I February}, year = {1960}, } @article{MIL1, author = {Milligan, G. W. and Isaac, P. D.}, title = {The validation of four ultrametric clustering algorithms}, journal = {Pattern Recognition}, volume = {12}, pages = {41--50}, year = {1980}, } @book{MIR1, author = {Mirkin, B.}, title = {Clustering for data mining: a data recovery approach}, year = {2005}, publisher = {Chapman \& Hall/CRC}, } @Book{MPI1996, author = {Snir, M. and Otto, S. and Huss-Lederman, S. and David, W. and Dongarra, J.}, title = {{MPI}: The Complete Reference}, publisher = {MIT Press}, year = 1996, address = {Boston} } @misc{MpiEc2, title = {{MPI} cluster on {EC2}}, author = {}, note = {\url{http://datawrangling.s3.amazonaws.com/elasticwulf_pycon_talk.pdf} read the 03/05/2012}, } @inproceedings{MPI_performance, author = {Hoefler, T. and Gropp, W. and Thakur, R. and Träff, J. L.}, title = {Toward performance models of {MPI} implementations for understanding application scaling issues}, booktitle = {Proceedings of the 17th European {MPI} users' group meeting conference on Recent advances in the message passing interface}, series = {EuroMPI'10}, year = {2010}, isbn = {3-642-15645-2, 978-3-642-15645-8}, location = {Stuttgart, Germany}, pages = {21--30}, numpages = {10}, url = {http://dl.acm.org/citation.cfm?id=1894122.1894126}, acmid = {1894126}, publisher = {Springer-Verlag}, address = {Berlin, Heidelberg}, } @inproceedings{MPI_performance_measurements, author = {Gropp, W. and Lusk, E.}, title = {Reproducible Measurements of {MPI} Performance Characteristics}, booktitle = {Proceedings of the 6th European PVM/MPI Users' Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface}, year = {1999}, isbn = {3-540-66549-8}, pages = {11--18}, numpages = {8}, url = {http://dl.acm.org/citation.cfm?id=648136.748782}, acmid = {748782}, publisher = {Springer-Verlag}, address = {London, UK, UK}, } @book{Nesterov, title={Introductory lectures on convex optimization: A basic course}, volume={87}, url={http://books.google.com/books?hl=zh-CN&lr=&id=VyYLem-l3CgC&oi=fnd&pg=PA1&dq=Y+nesterov&ots=YbTwXHUy7C&sig=uDbNnpT44Hm5MHUmDNzHWIITvT4}, publisher={Kluwer Academic Publishers}, author={Nesterov, Y.}, year={2004}, pages={254} } @misc{NoSQL, title = {http://nosql-database.org/}, note = {\url{http://nosql-database.org/ censored NoSQL databases}} } @misc{NoSQLmeansNoAcid, title = {Problems with {ACID} and how to fix them}, author = {}, note = {\url{http://dbmsmusings.blogspot.com/2010/08/problems-with-acid-and-how-to-fix-them.html} read the 28/03/2012}, } @ARTICLE{PAG1, AUTHOR = {Pag\`es, G.}, TITLE = {A space vector quantization for numerical integration}, JOURNAL = {Journal of Applied and Computational Mathematics}, YEAR = {1997}, VOLUME = {89}, PAGES = {1--38}, } @Article{ParaKMeans, title = {ParaKMeans : implementation of a parallelised K-Means algorithm suitable for laboratory use}, author = {Kraj, P. and Sharma, A. and Garge, N. and Podolsky, R. and McIndoe, R. A.}, journal = {BMC BioInformatics}, year = {2008}, month = {april}, note* = {ParaKMeans en C#} } @Article{ParallelMiningAssociationRules, title = {Parallel Mining of association rules: Design, implementation, and experience}, author = {Agrawal, R. and Shafer, J. C.}, journal = {IEEE Trans. Knowledge and Data Eng., 8(6):962-969}, year = {1996} } @article{PatraDALVQ, title={Convergence of distributed asynchronous learning vector quantization algorithms}, volume={12}, url={http://arxiv.org/abs/1012.5150}, journal={Journal of Machine Learning Research}, author={{Patra}, B.}, year={2010}, pages={3431--3466} } @TECHREPORT{PET1, author = {Peterson, A. D. and Ghosh, A. P. and Maitra, R.}, title = {A systematic evaluation of different methods for initializing the $k$-means clustering algorithm}, year = {2010} } @inproceedings{piglatin, author = {Olston, C. and Reed, B. and Srivastava, U. and Kumar, R. and Tomkins, A.}, title = {Pig latin: a not-so-foreign language for data processing}, booktitle = {Proceedings of the 2008 ACM SIGMOD international conference on Management of data}, series = {SIGMOD '08}, year = {2008}, isbn = {978-1-60558-102-6}, location = {Vancouver, Canada}, pages = {1099--1110}, numpages = {12}, url = {http://doi.acm.org/10.1145/1376616.1376726}, doi = {http://doi.acm.org/10.1145/1376616.1376726}, acmid = {1376726}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {dataflow language, pig latin}, } @Article{Pollard, title = {Strong consistency of K-Means clustering}, author = {Pollard, D.}, journal = {The Annals of Statistics}, year = {1981}, month = {January}, note* = {Historical article on the consistency of Online K-Means algorithm} } @ARTICLE{PVM, author = {Sunderam, V. S.}, title = {{PVM}: A Framework for Parallel Distributed Computing}, journal = {Concurrency: Practice and Experience}, year = {1990}, volume = {2}, pages = {315--339} } @InProceedings{ROS1, author = {Rossi, F. and Conan-Guez, B. and El Golli, A. }, title = {Clustering Functional Data with the {SOM} algorithm}, booktitle = {Proceedings of ESANN 2004}, pages = {305--312}, year = 2004, address = {Bruges, Belgium}, month = {April}, note = {\url{http://apiacoa.org/publications/2004/som-esann04.pdf}} } @TECHREPORT{Saini96nasparallel, author = {Saini, S. and Bailey, D. H. and Origin, S.}, title = {NAS Parallel Benchmark (Version 1.0) Results 11-96}, institution = {}, year = {1996} } @article{sawzall, author = {Pike, R. and Dorward, S. and Griesemer, R. and Quinlan, S.}, title = {Interpreting the data: Parallel analysis with Sawzall}, journal = {Sci. Program.}, issue_date = {October 2005}, volume = {13}, number = {4}, month = oct, year = {2005}, issn = {1058-9244}, pages = {277--298}, numpages = {22}, url = {http://dl.acm.org/citation.cfm?id=1239655.1239658}, acmid = {1239658}, publisher = {IOS Press}, address = {Amsterdam, The Netherlands, The Netherlands}, } @Article{ScalableParallelClassifier, title = {A scalable parallel classifier for data mining}, author = {Shafer, J. C. and Agrawal, R. and Mehta, M.}, journal = {Proc. 22nd International Conference on VLDB, Mumbai, India}, year = {1996} } @article{SCOPE, author = {Chaiken, R. and Jenkins, B. and Larson, P. and Ramsey, B. and Shakib, D. and Weaver, S. and Zhou, J.}, title = {SCOPE: easy and efficient parallel processing of massive data sets}, journal = {Proc. VLDB Endow.}, issue_date = {August 2008}, volume = {1}, number = {2}, month = aug, year = {2008}, issn = {2150-8097}, pages = {1265--1276}, numpages = {12}, url = {http://dx.doi.org/10.1145/1454159.1454166}, doi = {10.1145/1454159.1454166}, acmid = {1454166}, publisher = {VLDB Endowment}, } @misc{ShardingCounters, title = {Sharding counters}, author = {Gregorio, J.}, note = {\url{https://developers.google.com/appengine/articles/sharding_counters} read the 17/05/2012}, } @misc{Slaying_Relational_Dragons, title = {Slaying Relational Dragons}, author = {Ayende, S.}, journal = {http://ayende.com/Blog/archive/2010/02/22/slaying-relational-dragons.aspx} } @INPROCEEDINGS{SlowLearners, author = {Zinkevich, M. and Smola, A. and Langford, J.}, title = {Slow learners are fast}, booktitle = {Advances in Neural Information Processing Systems 22}, year = {2009}, pages = {2331-2339}, } @misc{SortBenchmark, title = {Sort Benchmark Home Page}, author = {}, note = {\url{http://sortbenchmark.org/}}, } @misc{Spolsky_LeakyAbstractions, title = {Leaky Abstractions}, author = {}, note = {\url{http://www.joelonsoftware.com/articles/LeakyAbstractions.html}}, } @inproceedings{StragglerIssuesInMapReduce, author = {Lin, J.}, booktitle = {LSDS-IR workshop}, citeulike-article-id = {6091323}, citeulike-linkout-0 = {http://scholar.google.com.sg/scholar?cluster=3230873860067816724\&\#38;hl=en}, keywords = {map, parallelization, reduce, straggler}, posted-at = {2009-11-10 04:34:25}, priority = {2}, title = {{The Curse of Zipf and Limits to Parallelization: A Look at the Stragglers Problem in MapReduce}}, url = {http://scholar.google.com.sg/scholar?cluster=3230873860067816724\&\#38;hl=en}, year = {2009} } @INPROCEEDINGS{TwoPhaseCommitProtocol, author = {Raz, Y.}, title = {The Dynamic Two Phase Commitment (D2PC) protocol}, booktitle = {Database Theory - ICDT '95, Lecture Notes in Computer Science, Volume 893 Springer, ISBN 978-3-540-58907-5}, year = {1995}, pages = {162-176}, } @inproceedings{TOP500, author = {Napper, J. and Bientinesi, P.}, title = {Can cloud computing reach the top500?}, booktitle = {UCHPC-MAW \'09: Proceedings of the combined workshops on UnConventional high performance computing workshop plus memory access workshop}, year = {2009}, isbn = {978-1-60558-557-4}, pages = {17--20}, location = {Ischia, Italy}, doi = {http://doi.acm.org/10.1145/1531666.1531671}, publisher = {ACM}, address = {New York, NY, USA}, } @misc{TeraSortWinnerYahoo, title = {TeraByte Sort on Apache Hadoop}, author = {O Malley, O.}, note = {\url{http://www.hpl.hp.com/hosted/sortbenchmark/YahooHadoop.pdf}} } @inproceedings{The_Impact_Of_Virtualization, author = {Wang, Guohui and Ng, T. S. Eugene}, title = {The impact of virtualization on network performance of amazon EC2 data center}, booktitle = {Proceedings of the 29th conference on Information communications}, series = {INFOCOM'10}, year = {2010}, isbn = {978-1-4244-5836-3}, location = {San Diego, California, USA}, pages = {1163--1171}, numpages = {9}, url = {http://dl.acm.org/citation.cfm?id=1833515.1833691}, acmid = {1833691}, publisher = {IEEE Press}, address = {Piscataway, NJ, USA}, keywords = {cloud service, measurement, networking performance, virtualization}, } @ARTICLE{TSI1, author = {Tsitsiklis, J. and Bertsekas, D. and Athans, M.}, journal = {IEEE Transactions on Automatic Control}, pages = {803-812}, title = {Distributed asynchronous deterministic and stochastic gradient optimization algorithms}, volume = {31}, year = {1986}, } @misc{WebSiteMonitoring, title = {WebSiteMonitoring}, author = {}, note = {\url{http://www.website-monitoring.com/}} }