WASET
	@article{(Open Science Index):https://publications.waset.org/pdf/6608,
	  title     = {High Performance in Parallel Data Integration: An Empirical Evaluation of the Ratio Between Processing Time and Number of Physical Nodes},
	  author    = {Caspar von Seckendorff and  Eldar Sultanow},
	  country	= {},
	  institution	= {},
	  abstract     = {Many studies have shown that parallelization decreases efficiency [1], [2]. There are many reasons for these decrements. This paper investigates those which appear in the context of parallel data integration. Integration processes generally cannot be allocated to packages of identical size (i. e. tasks of identical complexity). The reason for this is unknown heterogeneous input data which result in variable task lengths. Process delay is defined by the slowest processing node. It leads to a detrimental effect on the total processing time. With a real world example, this study will show that while process delay does initially increase with the introduction of more nodes it ultimately decreases again after a certain point. The example will make use of the cloud computing platform Hadoop and be run inside Amazon-s EC2 compute cloud. A stochastic model will be set up which can explain this effect.
},
	    journal   = {International Journal of Computer and Information Engineering},
	  volume    = {3},
	  number    = {8},
	  year      = {2009},
	  pages     = {2073 - 2077},
	  ee        = {https://publications.waset.org/pdf/6608},
	  url   	= {https://publications.waset.org/vol/32},
	  bibsource = {https://publications.waset.org/},
	  issn  	= {eISSN: 1307-6892},
	  publisher = {World Academy of Science, Engineering and Technology},
	  index 	= {Open Science Index 32, 2009},
	}