@inproceedings{zhao-etal-2024-mccstn,
title = "{M}cc{STN}: Multi-Scale Contrast and Fine-Grained Feature Fusion Networks for Subject-driven Style Transfer",
author = "Zhao, Honggang and
Xiao, Chunling and
Yang, Jiayi and
Jin, Guozhu and
Li, Mingyong",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.967",
pages = "11090--11100",
abstract = "Stylistic transformation of artistic images is an important part of the current image processing field. In order to access the aesthetic artistic expression of style images, recent research has applied attention mechanisms to the field of style transfer. This approach transforms style images into tokens by calculating attention and then migrating the artistic style of the image through a decoder. Due to the very low semantic similarity between the original image and the style image, this results in many fine-grained style features being discarded. This can lead to discordant artifacts or obvious artifacts. To address this problem, we propose MccSTN, a novel style representation and transfer framework that can be adapted to existing arbitrary image style transfers. Specifically, we first introduce a feature fusion module (Mccformer) to fuse aesthetic features in style images with fine-grained features in content images. Feature maps are obtained through Mccformer. The feature map is then fed into the decoder to get the image we want. In order to lighten the model and train it quickly, we consider the relationship between specific styles and the overall style distribution. We introduce a multi-scale augmented contrast module that learns style representations from a large number of image pairs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2024-mccstn">
<titleInfo>
<title>MccSTN: Multi-Scale Contrast and Fine-Grained Feature Fusion Networks for Subject-driven Style Transfer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Honggang</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chunling</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiayi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guozhu</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyong</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Stylistic transformation of artistic images is an important part of the current image processing field. In order to access the aesthetic artistic expression of style images, recent research has applied attention mechanisms to the field of style transfer. This approach transforms style images into tokens by calculating attention and then migrating the artistic style of the image through a decoder. Due to the very low semantic similarity between the original image and the style image, this results in many fine-grained style features being discarded. This can lead to discordant artifacts or obvious artifacts. To address this problem, we propose MccSTN, a novel style representation and transfer framework that can be adapted to existing arbitrary image style transfers. Specifically, we first introduce a feature fusion module (Mccformer) to fuse aesthetic features in style images with fine-grained features in content images. Feature maps are obtained through Mccformer. The feature map is then fed into the decoder to get the image we want. In order to lighten the model and train it quickly, we consider the relationship between specific styles and the overall style distribution. We introduce a multi-scale augmented contrast module that learns style representations from a large number of image pairs.</abstract>
<identifier type="citekey">zhao-etal-2024-mccstn</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.967</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>11090</start>
<end>11100</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MccSTN: Multi-Scale Contrast and Fine-Grained Feature Fusion Networks for Subject-driven Style Transfer
%A Zhao, Honggang
%A Xiao, Chunling
%A Yang, Jiayi
%A Jin, Guozhu
%A Li, Mingyong
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F zhao-etal-2024-mccstn
%X Stylistic transformation of artistic images is an important part of the current image processing field. In order to access the aesthetic artistic expression of style images, recent research has applied attention mechanisms to the field of style transfer. This approach transforms style images into tokens by calculating attention and then migrating the artistic style of the image through a decoder. Due to the very low semantic similarity between the original image and the style image, this results in many fine-grained style features being discarded. This can lead to discordant artifacts or obvious artifacts. To address this problem, we propose MccSTN, a novel style representation and transfer framework that can be adapted to existing arbitrary image style transfers. Specifically, we first introduce a feature fusion module (Mccformer) to fuse aesthetic features in style images with fine-grained features in content images. Feature maps are obtained through Mccformer. The feature map is then fed into the decoder to get the image we want. In order to lighten the model and train it quickly, we consider the relationship between specific styles and the overall style distribution. We introduce a multi-scale augmented contrast module that learns style representations from a large number of image pairs.
%U https://aclanthology.org/2024.lrec-main.967
%P 11090-11100
Markdown (Informal)
[MccSTN: Multi-Scale Contrast and Fine-Grained Feature Fusion Networks for Subject-driven Style Transfer](https://aclanthology.org/2024.lrec-main.967) (Zhao et al., LREC-COLING 2024)
ACL