@Book{HiT-IT:2017,
  editor    = {Irina Temnikova  and  Constantin Orasan  and  Gloria Corpas Pastor  and  Stephan Vogel},
  title     = {Proceedings of the Workshop Human-Informed Translation and Interpreting Technology},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {Association for Computational Linguistics, Shoumen, Bulgaria},
  url       = {https://doi.org/10.26615/978-954-452-042-7_}
}

@InProceedings{scansani-EtAl:2017:HiT-IT,
  author    = {Scansani, Randy  and  Bernardini, Silvia  and  Ferraresi, Adriano  and  Gaspari, Federico  and  Soffritti, Marcello},
  title     = {Enhancing Machine Translation of Academic Course Catalogues with Terminological Resources},
  booktitle = {Proceedings of the Workshop Human-Informed Translation and Interpreting Technology},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {Association for Computational Linguistics, Shoumen, Bulgaria},
  pages     = {1--10},
  abstract  = {This paper describes an approach to translating course unit descriptions from
	Italian and German into English, using a phrase-based machine translation (MT)
	system. The genre is very prominent among those requiring translation by
	universities in European countries in which English is a non-native language.
	For each language combination, an in-domain bilingual corpus including course
	unit and degree program descriptions is used to train an MT engine, whose
	output is then compared to a baseline engine trained on the Europarl corpus. In
	a subsequent experiment, a bilingual terminology database is added to the
	training sets in both engines and its impact on the output quality is evaluated
	based on BLEU and post-editing score. Results suggest that the use of
	domain-specific corpora boosts the engines quality for both language
	combinations, especially for German-English, whereas adding terminological
	resources does not seem to bring notable benefits.},
  url       = {https://doi.org/10.26615/978-954-452-042-7_001}
}

@InProceedings{toledobaez-schaeffer-carl:2017:HiT-IT,
  author    = {Toledo B\'{a}ez, Cristina  and  Schaeffer, Moritz  and  Carl, Michael},
  title     = {Experiments in Non-Coherent Post-editing},
  booktitle = {Proceedings of the Workshop Human-Informed Translation and Interpreting Technology},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {Association for Computational Linguistics, Shoumen, Bulgaria},
  pages     = {11--20},
  abstract  = {Market pressure on translation productivity joined with technological
	innovation is likely to fragment and decontextualise translation jobs even more
	than is cur-rently the case. Many different translators increasingly work on
	one document at different places, collaboratively working in the cloud. This
	paper investigates the effect of decontextualised source texts on behaviour by
	comparing post-editing of sequentially ordered sentences with shuffled
	sentences from two different texts. The findings suggest that there is little
	or no effect of the decontextualised source texts on behaviour.},
  url       = {https://doi.org/10.26615/978-954-452-042-7_002}
}

@InProceedings{ahrenberg:2017:HiT-IT,
  author    = {Ahrenberg, Lars},
  title     = {Comparing Machine Translation and Human Translation: A Case Study},
  booktitle = {Proceedings of the Workshop Human-Informed Translation and Interpreting Technology},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {Association for Computational Linguistics, Shoumen, Bulgaria},
  pages     = {21--28},
  abstract  = {As machine translation technology improves comparisons to human performance are
	often made in quite general and exaggerated terms. Thus, it is important to be
	able to account for differences accurately. This paper reports a simple,
	descriptive scheme for comparing translations and applies it to two
	translations of a British opinion article published in March, 2017. One is a
	human translation (HT) into Swedish, and the other a machine translation (MT).
	While the comparison is limited to one text, the results are indicative of
	current limitations in MT.},
  url       = {https://doi.org/10.26615/978-954-452-042-7_003}
}

@InProceedings{ustaszewski-stauder:2017:HiT-IT,
  author    = {Ustaszewski, Michael  and  Stauder, Andy},
  title     = {TransBank: Metadata as the Missing Link between NLP and Traditional Translation Studies},
  booktitle = {Proceedings of the Workshop Human-Informed Translation and Interpreting Technology},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {Association for Computational Linguistics, Shoumen, Bulgaria},
  pages     = {29--35},
  abstract  = {Despite the growing importance of data in translation, there is no data
	repository that equally meets the requirements of translation industry and
	academia alike. Therefore, we plan to develop a freely available, multilingual
	and expandable bank of translations and their source texts aligned at the
	sentence level. Special emphasis will be placed on the labelling of metadata
	that precisely describe the relations between translated texts and their
	originals. This metadata-centric approach gives users the opportunity to
	compile and download custom corpora on demand. Such a general-purpose data
	repository may help to bridge the gap between translation theory and the
	language industry, including translation technology providers and NLP.},
  url       = {https://doi.org/10.26615/978-954-452-042-7_004}
}

@InProceedings{temnikova-EtAl:2017:HiT-IT,
  author    = {Temnikova, Irina  and  Abdelali, Ahmed  and  Hedaya, Samy  and  Vogel, Stephan  and  Al Daher, Aishah},
  title     = {Interpreting Strategies Annotation in the WAW Corpus},
  booktitle = {Proceedings of the Workshop Human-Informed Translation and Interpreting Technology},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {Association for Computational Linguistics, Shoumen, Bulgaria},
  pages     = {36--43},
  abstract  = {With the aim to teach our automatic speech-to-text translation system human
	interpreting strategies, our first step is to identify which interpreting
	strategies are most often used in the language pair of our interest
	(English-Arabic). In this article we run an automatic analysis of a corpus of
	parallel speeches and their human interpretations, and provide the results of
	manually annotating the human interpreting strategies in a sample of the
	corpus. 
	We give a glimpse of the corpus, whose value surpasses the fact that it
	contains a high number of scientific speeches with their interpretations from
	English into Arabic, as it also provides rich information about the
	interpreters.
	We also discuss the difficulties, which we encountered on our way, as well as
	our solutions to them: our methodology for manual re-segmentation and alignment
	of parallel segments, the choice of annotation tool, and the annotation
	procedure.
	Our annotation findings explain the previously extracted specific statistical
	features of the interpreted corpus (compared with a translation one) as well as
	the quality of interpretation provided by different interpreters.},
  url       = {https://doi.org/10.26615/978-954-452-042-7_005}
}

@InProceedings{silvestrebaquero-mitkov:2017:HiT-IT,
  author    = {Silvestre Baquero, Andrea  and  Mitkov, Ruslan},
  title     = {Translation Memory Systems Have a Long Way to Go},
  booktitle = {Proceedings of the Workshop Human-Informed Translation and Interpreting Technology},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {Association for Computational Linguistics, Shoumen, Bulgaria},
  pages     = {44--51},
  abstract  = {The TM memory systems changed the work of translators and now the translators
	not benefiting from these tools are a tiny minority. These tools operate on
	fuzzy (surface) matching mostly and cannot benefit from already translated
	texts which are synonymous to (or paraphrased versions of) the text to be
	translated. The match score is mostly based on character-string similarity,
	calculated through Levenshtein distance. The TM tools have difficulties with
	detecting similarities even in sentences which represent a minor revision of
	sentences already available in the translation memory. This shortcoming of the
	current TM systems was the subject of the present study and was empirically
	proven in the experiments we conducted. To this end, we compiled a small
	translation memory (English-Spanish) and applied several lexical and syntactic
	transformation rules to the source sentences with both English and Spanish
	being the source language.
	The results of this study show that current TM systems have a long way to go
	and highlight the need for TM systems equipped with NLP capabilities which will
	offer the translator the advantage of he/she not having to translate a sentence
	again if an almost identical sentence has already been already translated.},
  url       = {https://doi.org/10.26615/978-954-452-042-7_006}
}

@InProceedings{elgabou-kazakov:2017:HiT-IT,
  author    = {Elgabou, Hani  and  Kazakov, Dimitar},
  title     = {Building Dialectal Arabic Corpora},
  booktitle = {Proceedings of the Workshop Human-Informed Translation and Interpreting Technology},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {Association for Computational Linguistics, Shoumen, Bulgaria},
  pages     = {52--57},
  abstract  = {The aim of this research is to identify local Arabic dialects in texts from
	social media (Twitter) and link them to specific geographic areas. Dialect
	identification is studied as a subset of the task of language identification.
	The proposed method is based on unsupervised learning using simultaneously
	lexical and geographic distance. While this study focusses on Libyan dialects,
	the approach is general, and could produce resources to support human
	translators and interpreters when dealing with vernaculars rather than standard
	Arabic.},
  url       = {https://doi.org/10.26615/978-954-452-042-7_007}
}

@InProceedings{mrini-benjamin:2017:HiT-IT,
  author    = {Mrini, Khalil  and  Benjamin, Martin},
  title     = {Towards Producing Human-Validated Translation Resources for the Fula language through WordNet Linking},
  booktitle = {Proceedings of the Workshop Human-Informed Translation and Interpreting Technology},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {Association for Computational Linguistics, Shoumen, Bulgaria},
  pages     = {58--64},
  abstract  = {We propose methods to link automatically parsed linguistic data to the WordNet.
	We apply these methods on a trilingual dictionary in Fula, English and French.
	Dictionary entry parsing is used to collect the linguistic data. Then we
	connect it to the Open Multilingual WordNet (OMW) through two attempts, and use
	confidence scores to quantify accuracy. We obtained 11,000 entries in parsing
	and linked about 58% to the OMW on the first attempt, and an additional 14% in
	the second one. These links are due to be validated by Fula speakers before
	being added to the Kamusi Project’s database.},
  url       = {https://doi.org/10.26615/978-954-452-042-7_008}
}

