WASET
	@article{(Open Science Index):https://publications.waset.org/pdf/10013375,
	  title     = {Performance Analysis and Optimization for Diagonal Sparse Matrix-Vector Multiplication on Machine Learning Unit},
	  author    = {Qiuyu Dai and  Haochong Zhang and  Xiangrong Liu},
	  country	= {},
	  institution	= {},
	  abstract     = {Efficient matrix-vector multiplication with diagonal sparse matrices is pivotal in a multitude of computational domains, ranging from scientific simulations to machine learning workloads. When encoded in the conventional Diagonal (DIA) format, these matrices often induce computational overheads due to extensive zero-padding and non-linear memory accesses, which can hamper the computational throughput, and elevate the usage of precious compute and memory resources beyond necessity. The ’DIA-Adaptive’ approach, a methodological enhancement introduced in this paper, confronts these challenges head-on by leveraging the advanced parallel instruction sets embedded within Machine Learning Units (MLUs). This research presents a thorough analysis of the DIA-Adaptive scheme’s efficacy in optimizing Sparse Matrix-Vector Multiplication (SpMV) operations. The scope of the evaluation extends to a variety of hardware architectures, examining the repercussions of distinct thread allocation strategies and cluster configurations across multiple storage formats. A dedicated computational kernel, intrinsic to the DIA-Adaptive approach, has been meticulously developed to synchronize with the nuanced performance characteristics of MLUs. Empirical results, derived from rigorous experimentation, reveal that the DIA-Adaptive methodology not only diminishes the performance bottlenecks associated with the DIA format but also exhibits pronounced enhancements in execution speed and resource utilization. The analysis delineates a marked improvement in parallelism, showcasing the DIA-Adaptive scheme’s ability to adeptly manage the interplay between storage formats, hardware capabilities, and algorithmic design. The findings suggest that this approach could set a precedent for accelerating SpMV tasks, thereby contributing significantly to the broader domain of high-performance computing and data-intensive applications.},
	    journal   = {International Journal of Computer and Information Engineering},
	  volume    = {17},
	  number    = {11},
	  year      = {2023},
	  pages     = {665 - 676},
	  ee        = {https://publications.waset.org/pdf/10013375},
	  url   	= {https://publications.waset.org/vol/203},
	  bibsource = {https://publications.waset.org/},
	  issn  	= {eISSN: 1307-6892},
	  publisher = {World Academy of Science, Engineering and Technology},
	  index 	= {Open Science Index 203, 2023},
	}