@inproceedings{yu2025tangible,title={Tangible-MakeCode: Bridging Physical Coding Blocks with a Web-Based Programming Interface for Collaborative and Extensible Learning},author={Yu, Jin and Garg, Poojita and Synn, DoangJoo and Oh, HyunJoo},booktitle={Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems},year={2025},month=apr,publisher={ACM},address={Yokohama, Japan},doi={10.1145/3706598.3713260},isbn={979-8-4007-1394-1/25/04},}
Recently deep learning has become widely used in many research fields and businesses. To improve the performance of deep learning, one of the critical challenges is to determine the optimal values of many parameters that can be adjusted. This paper focuses on the adjustable dataloader parameters that affect the overall training time and proposes an automated dataloader parameter tuning framework, called ADaPT, to determine the optimal values of the dataloader parameters. The proposed ADaPT uses the characteristic of the AVL tree and attempts to determine optimal dataloader parameters in a small amount of time to accelerate the data loading. The results show that the proposed method effectively accelerates the data loading speed compared to the default parameter values and values that are recommended by PyTorch and the speed is comparable to the optimal.
@article{ryu2025adapt,title={ADaPT: An Automated Dataloader Parameter Tuning Framework using AVL Tree-based Search Algorithms},author={Ryu, MyungHoon and Piao, XinYu and Park, JooYoung and Synn, DoangJoo and Kim, Jong-Kook},journal={The Transactions of the Korea Information Processing Society},volume={14},number={1},pages={1--8},year={2025},month=jan,doi={10.3745/TKIPS.2025.14.1.1},url={https://tkips.kips.or.kr/digital-library/101925},publisher={Korea Information Processing Society}}
Deep learning models have been growing in size and complexity, requiring more computational resources for training. Distributed training has emerged as a solution to this challenge, allowing models to be trained across multiple devices. However, traditional distributed training approaches often face challenges related to memory constraints, especially when dealing with large batch sizes. In this paper, we propose a novel approach called Micro Batch Streaming (MBS) for distributed training of deep learning models. MBS addresses memory limitations by dividing large batches into smaller micro-batches and processing them sequentially while accumulating gradients. We evaluate our approach on various deep learning tasks and demonstrate its effectiveness in enabling distributed training with large batch sizes on memory-constrained systems. Our results show that MBS can achieve comparable or better performance than traditional distributed training methods while using less memory per device.
@article{synn2023distributed,title={Distributed Training of Deep Learning Models Using Micro Batch Streaming},author={Synn, DoangJoo and Piao, XinYu and Park, JuYoung and Kim, Jong-Kook},journal={KIPS Transactions on Software and Data Engineering},volume={12},number={3},pages={101--108},year={2023},month=mar,doi={10.3745/KTSDE.2023.12.3.101},url={https://tkips.kips.or.kr/digital-library/101925},publisher={Korea Information Processing Society}}
@inproceedings{ahn2021nips,title={Protopia AI: Taking on Missing Link in Inference Privacy},author={Ahn, Byung Hoon and Synn, DoangJoo and Derkani, Masih and Ebrahimi, Eiman and Esmaeilzadeh, Hadi},booktitle={Neural Information Processing Systems Demonstrations},year={2021},organization={Neural Information Processing Systems},}
In training a deep learning model, it is crucial to tune various hyperparameters and gain speed and accuracy. Although the hyperparameters that mathematically affect the convergence significantly affect the training speed, the system parameters that affect the host-to-device data set transmission time also occupy a specific part in the overall time acceleration. Therefore, it is important to properly tune and select parameters that can affect the data loader as a system parameter in overall time acceleration. We propose an automated framework called Num Worker Tuner (NWT) to address this problem. This method finds the appropriate number of multi-processing subprocesses through the search space and accelerates the learning through the number of subprocesses. Furthermore, this method allows memory efficiency and speed-up by tuning the system-dependent parameter, the number of multi-process spawns.
@inproceedings{synn2021nwt,title={Num Worker Tuner: An Automated Spawn Parameter Tuner for Multi-Processing DataLoaders},author={Synn, DoangJoo and Kim, Jong-Kook},booktitle={Proceedings of ACK 2021},year={2021},month=oct,organization={Korea Information Processing Society}}
@inproceedings{park2021survey,title={A Survey on the Advancement of Virtualization Technology},author={Park, JooYoung and Synn, DoangJoo and Kim, Jong-Kook},booktitle={Proceedings of Korea Information Processing Society 2021},year={2021},month=mar,organization={Korea Information Processing Society}}
Many natural language processing (NLP) models utilize pre-trained word embeddings to leverage latent information. One of the most successful word embedding model is the Skip-gram (SG). In this paper, we propose a Skipgram drop (SG-Drop) model, which is a variation of the SG model. The SG-Drop model is designed to reduce training time efficiently. Furthermore, the SG-Drop allows controlling training time with its hyperparameter. It could train word embedding faster than reducing training epochs while better preserving the quality.
@article{kim2020sg,title={SG-Drop: Faster Skip-Gram by Dropping Context Words},author={Kim, DongJae and Synn, DoangJoo and Kim, Jong-Kook},journal={Proceedings of Korea Information Processing Society 2020},year={2020},month=nov,publisher={Korea Information Processing Society}}