Content uploaded by Alexey Karpov
Author content
All content in this area was uploaded by Alexey Karpov on Jan 22, 2018
Content may be subject to copyright.
ab c!"#d
a Laboratory of Informatics of Grenoble, France
b North-West University, South Africa
c SPIIRAS Institute, Saint-Petersburg, Russia
d Karlsruhe Institute of Technology, Germany
Abstract
$
%&
'( % !
%!$')(
*%"$
$%&*$*
'(
%+*$
'(%
Keywords: '( *
,
%
1. Introduction
-$ %!
. '
( ' ( *
%!$/011$
,2!'2!(%!2!
$ $ * $
**%
%3$
45*
for under-resourced languages%
!$'('(
$$$%&.
2!. $%! #
$Introduction
Section 2*$$45$
%Section 3$*
%" Section 4,$
Section 5$$%6Section 6
$.%
1.1 Languages of the world
7*$$.%6
$.*$
*$*$%
*$*Ethnologue1$*%!
4one that has at least one speaker for whom it is their first language5%
.%
/010.$%!89:
%%$"only a few elderly speakers are still living".+
;**..%!
.$$*<.
*$%
$$%
7$$*"%!
$* = =111 $ * * **
'(*%Omniglot3,
$>111$
>)1$%
&..*4$5*
* $
Google Translate'/:8=1>=(? search(
=1>=),Siri ASR application (8 languages in 2012), Wiktionary5 (~80 languages in 2012), Google
Voice Search'=0=1>=(%
1.2 Language Extinction
+;*#$%7@7=111A
*$*%3
$$.%*Summer Institute of Linguistics
'+( 6* >000 * B> $ .B11
B11.:111>1%111.%!
*$#.+%+$0/C
$;.*8C%
[100 - 999] Mio
[100 - 999]
[100,000 - 1Mio]
0
200
400
600
800
1000
1200
1400
1600
1800
2000
875
264
892
1779
1967
1071
344
204
0
308
6>?+*
> DD $$$ % % D
= DD $$$ % % D %
: DD $$$ % %
8 DD $$$ % % %# D ** D=):0/D
BDD$$$%$.%D
2$$>11%111.
@7=111A%!
.% E ',. E-$ ? .
('*01C;$=11,(
%!
'*.(
$$'%%(%
2$ *$ $ $ % 6
*$
$%!%+
*
.**%!
$%7
* F )1%111 % 7 :111
$ F 911 G% 3# . 6
'6(-73"***
."%
1.3 Good reasons to address less prevalent languages
* 2 ! '2!(%
2$**
'.($%6
. %
"
through % 6 *
$%$*
%+$
$%7*.*
$%
2!#'(
$$%!
..%G
'.$(*
*$
%$2!**
*%+;$
$* %
.**$.
%%$.',.2
27/(%3
$.$.$
%% %6
**%2*
**%**
*$ * $ .$
$*%!2!
* % *
**$
'GH
$&*=1
.$(%
/DD%%DDD%IJ>:/918'K=1>1(
2. Under-Resourced (UR) languages
2.1 Definition
!45* @ $=11:< =118A
$'($. ,$*
$*..
*
**%!
low-density languages resource-poor languages low-data languages, less-resourced
languages%+$
.*%
.*%3
* $ ' 7
*Google SearchGoogle Translate(%7,
'$(%
2.2 Measure the Status of a Language
+ *" '
9($"*$-!'-$.
( ' ( @ $ =11:A%
6"**
* $ % $ @ =118A
*'.
$* (% @ =118A
.7*'/%=D=1(%!
H >1D=1%
$*$>1D=1%GG!-!'-$./1
:8($ ) “Languages in the
European Information Society”$$
!.%!.$
0 ' $
7+G(%
2.3 Challenges
E 2! '%% ( ,
,**%+$
$ ' $ * ##
$%(%!.,
'$@?=1>>A($
*$ '%% @# =11><
=110A(%+
**$$
'$$(
.'$(%
6**$'.(
'(%+ * .$
.%G
$*%!
* *$ .$
$ , ' *$ (
'*$
%(%G
9 DD $$$ %*. % D
)DD$$$%%D$D$
0DD$$$%%D$D.
$*I+
* I + ' (
$.$.
$ * % .
%
2.4 Short History on Under-Resourced Language Research
+*
*+G@7>009AF@>00/A-
@ >009A 7* @L >009A E @F >00BA G+! @? >00BA +G+
@ >00BA% ! . ?
K 6 G 7 #
%+
*%.@7>009A
@?.>009<#>00)<
#=119A@&>008A<@ M>00)A
,$@#>00)< M>00)A%3
$
@#=11>A%
+9;$$
.$*$%6
*$$$.
* # % 6 $. .
! '!( . =11) '2 H( =1>1
'EG(=1>='7!$(%+
! $ + =1>> >1%
&'!=1>= >>>98
$(773+-?$.$
' Workshop on Indian Language Data: Resources and Evaluation;
Workshop on Language Resources & Technologies for Turkic Languages; Workshop on Parsing in
Indian Languages; Workshop on South and Southeast Asian Natural Language Processing, etc.(%
2.5 Language resources
* *$ * , *
.$*
*%
&$*
**$
*>11'*B1B(%!
F7'F7(*
% !
'( *
$ % 3 . 2 * )1
>='K=1>:(3*
:B>: 'K=1>:(%-*
$**
** %+ $
.%
&$$$#
%
$,'(.
>1!+=1>>$##*%
>>DD$$$%%%D=1>=D
>=DD%*%D
>:DD$$$%%DE7 D
' ( %
*,$N$$N
* $ % 3 $
?*E # * @# =11=A% !
*.
$$%?*E
* $ '>11
.(,'('.
.%($%,
?*E * '>(
'=( ':(
.'8(.'B(
$ '/( %!
?*E=>*'G(7G7
77#6?2K EE'#(
'( $!!!.. H% +
811.*=111.
@#=1>:A$*>8
*.%
! @ =111A $# @ =110A
*-72!'**(@FH=1>:A%
!*
.%
3. Automatic Speech Recognition for Under-Resourced Languages (U-ASR)
3.1 Components of ASR systems
'( %%
,$*$$%
'$
*$('$*$$(
'$..*$$('%%
('%%(%
***D#@&.=11>A'
$(<'>1 $(<'>11 $(<D'O>11111
$,<
)11. *(< * ' $
(%G#*,2
G.G'2GG(@L=11)AF!&'F!&(FE
@K =1>1A F -$. 'F-( @ =11=A H G
'HG(@=119A*@!=11><?"=111A%
--$. '--( -- -- 'F
- -$. F-- F -$. F-( *.
@G=1>=< =1>>A @ =1>=< G.
=1>1A%
? 2GG*
6%=<@L=11)A'(
'D*(%
$.$%E
'.
, .( '
('#*.$
(% $
>8DD%%.%D?*E
* ' *
(.*$
. % H '%%
, 'G677( 'E7(
'EE(*.'G(%(
** %
*.'$*$(
$.'(%
%*
******
%
6=%
$
,$%2GG*
* . * H* @L=11)A%
*$D-*
* % *
.$**$**
*2! >BK>/>9!>)>0 F+=1
L!=>%
3.2 Collecting data for UR languages
:%'(
*%2$
* %2
%+$
>BDD.% %%%.
>/DD"%%"DP%
>9DD%%
>)DD%%.%D
>0DD$$$/%.%$%D$D
=1DD.%%D
=>DD%%D$.D*D$DE+7!+3-D&*2DL!%#
*
<$*$
%
+ *
*
' *$(% G * *
< $
#$'$($*
* *% 7$
* $ @E =1>1A< $ *
$$.*
* @? =1>>A%
.%
&4.5,B1.@
=110A***#$.%
&.*
*%!**$*
.%+$
'$**#$(%6
.
% .
$*,
@F =1>>A * $ $
.
%6'
.$ + H ( * @G >00=A%
+*.<
. * %
**'
(@#=11=A<*$.
*
'**(,%!
$*
@2 =1>1<FH=1>><FH=1>:A * * $
* $. *% + $.
*.
%
3 *
@?>00=A%2$
*
%!*
*%
3.3 Feature processing
+ $ - -$.$ % 6
'GE($$
'! ( @2. =111A '-. (
@?#=119AG677%+
GE*$*
*.*$%+
$ *
$
*%
$GE$$$
*@.=11/<!=11)A@E=1>>A%@!
=1>=A@!=1>=*A@H=1>=A$
%!
$.$*$
,%+@H=1>=AGE#
GE * +E % ! $
* * *
@H=1>=*A%
3.4 Acoustic modeling
* *
%2
%7$#
!@7=11)A<*E@=110A
H@H=1>>A%2$
*
***
% 6 @H=1>>A$
*%!
4G*5@H=1>1A%$$$
*$%+
$.GE$
,%+*$
*
'*"$*($*
*%+
**,$
%
$2
G.G
% *
%6
.$*
@&=11)A%
.**,$
*%6*
***
,%&$45***
%H
*@H2=1>1A
*@#=11><=110A
$@7=1>=A*'GE(
$%7*$
**,
%
*2
G. G % 6
* ' @?. =1>>A(
'$($%$
$***%
2G.G*
@!*=1>=<!*=1>:A<N
* %
@=1>:A*.$
*45.%*
' * @G# =11:A(
**+E@Q.
=11:A$,%!$.*@=1>:A
$$.@L=1>=A%
3.5 Lexical modeling
Grapheme-based approaches
*$
! @7$ =11/ < Q. =11)A @?#$=11)A
H@=110A@ =11:< .=11:A%+
*$<
* * %
$%
Bootstrapping G2P using MT approaches
3
@=110< =1>1A%24$5
4$5%45*
$ $
%$@7=1>>A%
Use of the Web
@? =110A @ =1>1A @ =1>:A *
$ & & &*%
&. ' $.* ( $
+ E * '+E( @ =1>1A $
+E&.%!
6?,,.%!,.
$ $
* *
&.$% 2$
,,&.%"
$&.%+@=1>=A?=E+
$$$/&.
>1 ?*E % &. ?=E
*
%@=1>=*A
*$$&&&*
$,%
3.6 Language modeling
**$,%3
* $ '*
(**$,%!**
*.%!.
** * * % !
,%
Word decomposition and use of syntactic information
6$*
'#(.
* G% ,$ *
**'33H($%2$
.*
*$$
$$#$'B>1(
,%G*$
'(6@7#=119A!.
@.=1>1<=11/7.=111A@ =11/A2@!"=1>1<#
=11:A7#@3=11)A@.=119A@#=119A?
@F.=11:A%E* G$ #
*@H=118<.=119A@E=110<
!*=1>=A @ #>000<=110A@*=1>1A'*
*G(% +$**
$ '.$*( '(
* @ =11/*A% !
$* $
%!
.$$*<
$$ * * % !
$$ $ G@7#=11BA
$6==%
'(
%!*$$
@* =11/A @E =11/ < !* =1>:A
2 @G". =119A%! $
* #% + @E =11)A
<,,
*%
'.
7#. %(# * $
$ . ?%
$ %
*
'($
**%!$..
*$$
$@7*=111A
@ " =11:< $ =1>=< =110< . =1>=<
=1>:A%***
@.R=11B<7.=11:<2
=1>1A*%
Web or translation-based text data collection
!'(
*&*@=11:<7=11)A
@-."=11=<K=11)<=110<
7=1>=A% 2$ * $
%6.!..
%$
*'(.
%6$
*,$*$%!.
***%!
'@7=1>:A(%3
#'***%($
==DD$$$%%%D"DD
$*%
$**$%
Word segmentation issues
! $ . 7 H ! . $
%!$
% ! $ 4$
5.$*'
$ (% 6 $ $ $ *
*$$$$
% .
**$
%***'%% "
K2" (.@F=11/A%
3.7 Evaluating ASR performance
&'&(,$
$,<$*$
**%G'%%!H(*
*$ $% #,
$ * *$% 6 .
6 $*'($*
#*
*&%6
,*D7'7(
@ =11/AE'E(*'(@2=111AG
@* =1>1A%!+&
'+&(@=11)< =1>>A.*&@-+!=110A
&&'&&(@-"=11BA%
4. Applications and Tools for U-ASR
4.1 Voice search in three South African languages
$$%
!"$**
* @ =1>1A *
*
%*'($
&**.,
S.@=1>1A%,
* * $ $
*?<$*$
*,*%
.<S$
*&**%
4.2 Interactive Voice Forum for Farmers in Rural India
!"'Avaaj Otalo($=11)""*$-
?3#++G+*%$
+'$ (
.$%H$$*
$**$%!
"$.,*$;,
'.$$#.$$
$*%(%
$$+G;&*H
'&H(%&H*#
*?""'.*TB1G
+(%6?"$
%&08C,$
*'@E=110A(%2$*$$
@E=1>1A*=:%
4.3 The PI project
!E+"'*6-NAgence Nationale de la Recherche($
H
7*%6$"
'"*N$N*$
"$*=8(%E+"$*
45'
B%###*E+"(%
4.4 The Rapid Language Adaptation Toolkit (RLAT)
!"E+7'-6=118=11)(!+7
G " 7 *'7(
**$%6!=B
$***
*$
$%!.
*%+
.$'>(*$$**
$'=(
#$*':(
$'8(*'B(
'/( *
'9(')(*
.*.@#=119A%!E+7*$
$***
%!$
' +!7G(%**
'>B($$.$.
*%
5. The future of U-ASR
5.1 Endangered languages
%&# =/%
$*T:11$.
'$.0BC(
$.T/B11
$
*.$'45(%&
$ *
%+
' %(
#%!
=:DD$$$%$*%D=1>=D1>D>/D$**D
=8DD%%D$.D*D$DE+7!+3-D
=B!.'!(DD%.%.%D%
=/45*76DD$$$%%D
*
%6@?=1>1A$
%
5.2 Non written languages
> $ $ $ $
$$%+
*$$G!
*%3$*
$'$
(%+
$$.$'$
(% * * * .
' :%(% + @ =11/A @Q. =110A
* $ ' $ ( $
.$$%!$*
' $( $ ' (% +
$'$(
$$$*%
$45*
@Q.=110A%@*=1>=AG:E
+G G: % + @* =1>:A
$ G:E $
%#>80*.$
$,**
# *$ $
,%
5.3 Tasks Beyond U-ASR
G $. * 2!.
*%6*
+@H-..=1>:< .=1>:A%
*%?
@F=1>1AH6@?*#*=1>=A
G!%!*$
%+
*$$
'* D
7 (% ! $ $
**$
' .
(%7=1>=$.=9 $
disconnected languages and styles%
5.4 Organizing the research community on U-ASR
!$.
*$$'(#
#*
•&.!'.!(=11)=)
•&.!=1>1=0
•2!=1>1F"*:1
=9DD$$$%%D=1>=D
=)DD$$$%%%DD
=0DD$$$%%%D=1>1D
:1DD$$$%%"D$D=1>1D>8D)%
•!!U!+7E=11)
•!!U!+=1>1
•+=1>>:>
•&.!=1>=:=
•&.EKE!-=1>='6(::
•3#:=1>=:8
3 $. * $. ! '.
!($8=1>8:B%
#2!
$!'!(#:/$
$'*&($6
% ! + 7 '+7(
!G+:9'!G(*
:%$*
D.*%
#%!*
Processing Under-Resourced Languages %
!'+?(+7%*
* " * # -
-73$
%
6. Conclusion
3 +
$
%!$$
*<$*
%
*#:
# $ * , % +
$$'
B($.<$
$*#.
*$%+$
$$
N.%
References
@* =11/A -% * E% - K6% 4 5
+7E;1/%=)0=0=E*E=11/%
@*=1>1A G"* ?-* G G . G ! $ .
2%G*G+E%+>1+7
E'+7E("7=1>1%B)>B)8%
@F.=11:AG%F.*?
H7E%=11:?$#=11:%=B9=/1%
:>DD$$$% =1>>%DD9%
:=DD$$$%%%D=1>=D
::DD$$$%"=1>=%DD!6=1>=D%
:8DD$$$%%%%D,D:P=1>=D%IJ-UEJE
:BDD$$$%%%D=1>8D
:/DD%
:9DD=%%%DD
@=1>=A%!%-%% *%*F--$.G%
+E%-72!=1>=&.G7=1>=%=1N=)%
@=11/A%F2%%*
!.E)/'>1(=)88=)/==11/%
@=110A%G%F7%245
E%+=110%=)89N=)B1%
@=1>1A%G%F?%%24!+
7 5 E + +
F'+F(E7%)>:G=1>1%
@>00/AK%7%??%?.%+L%$%G%E.%<
4GF5%Proc. ICSLPE>00/%=>0>=>08%
@=118AH%4Méthodes pour informatiser des langues et des groupes de langues peu dotées5
EF!K%6N?*+G=118%
@=11/AV!$$W,$SL,
?%+D7!=11/% *F*=11/%
@=11)A %G%%&+2+
F%+E%:+K7%-E+K7-E;1)+
=11)%)1B)>1%
@>009AK%G %GFK%S.?%GF%% %-%K%
G!>00/*7%Proc. Eurospeech>009%:/:
://%
@7 =11)A K% 74!* $ $**5
SLTU’082H=11)%
@7.=111A 7.E?!"#!.H7!$*
IEEE ICASSP 2000%
@7=11)A3%74
!5SLTU’082H=11)%
@7=1>=A72L%XF
%X +Proceedings of the 2nd ACM Symposium on Computing for Development
=1>=%>=%
@7.=11:A%7. % %L*
E%+YG!-$3=11:%818/%
@7$=11/AE% 7$ %2$!% #! ?*
2!7'2!(=11/%
@7*=111A 7%7* 6% K. 7 % H% >1
=111%=):N::=%
@7>009A7E%F%? K% G.$.G%-7%. % &!%
!$ # G % Proc. Automatic Speech Recognition and
Understanding (ASRU)%*7>009%B0>B0)%
@7>009A7%7?%37FF
+E% Proc. Automatic Speech Recognition and Understanding (ASRU)%*7>009%
/1//>:%
@7#=11BAG%7# %%
G >%1% + 7 + )> 2.
!6=11B%
@7#=119AG%7#!%2.G% %EK%E..H% G%H". %
G% %. G* * $
7G!EB'>(=119%
@7=111AF%7Language deathN7*7E=111%
@7=1>>A2%7%7%%#5+G!!
FG5E%=1>>2$
=1>>%
@7=1>=A277#%4FG
$5%+E73Z=1>==1>=%
@7 =1>:A 2 7 # 7 % 4G! F
G %5 < Speech Communication
Journal, Special Issue on Processing Under-Resourced Languages=1>:%
@F =1>>AG%2%F7%2-% %4[+
5E%+=1>>%:>B::>B/%
@F=11/A%FL%!
%-EG!.K=11/%9:>9:8%
@FH=1>>A-%K%FHK%G%2%F%%F&4&#
$5E%+=1>>%:>99:>)1%
@FH=1>:A-%K%FHG%2%FK%&F6&%%F&
*Speech Communication Journal,
Special Issue on Processing Under-Resourced Languages=1>:%
@F =1>1A !-F F 7 X-EH+F G! 63 3&
37F -?? E+X% + &. . !
'!(%EG=1>1%
@F>00BAF7%*Y% %!EH*
6?%Proc. EurospeechG>00B%>09=11%
@.=1>:A G.3& KL
+** Speech Communication Journal, Special Issue on Processing Under-Resourced Languages
=1>:%
@?"=111A?"%2.K%EK%2*HGD2GG
E!&.=111%B18B19%
@? =11/A L,?? $ S .G2 $ &
# S L F 7 E & S V +G G
G ! W 6 + &. G
!"$-7D2!=11/-$L.%K=11/%
@?*#* =1>=A G% ?*#* % % G !%
! &. . ! 7!$
=1>=%
@?=1>1A 2% ?% % 6% E%
H$E'81(%+>=-$G'(K
=1>1%
@?=1>>A2%?%!*%6%E%\$
%Interspeech 20116+%=):>=1>>%
@?. =1>>A* $ %K6%
?.2H%IEEE ASRU 2011%2$%
@?=1>=A?6Y&$%GFF
7+E"%E%7=1>=%+*%
@?=110A*?GK" GG.X&*
XIEEE ICASSP, 2009%
@?#$=11)A%?#$XGXSLTU 2008%
2H%
@?>00BA?K%6?%?F%EG%EK%.%%SH%
G.G+!H%Speech Communication>9>>)%
@?.>009A?.%?.K%GEG!$*
%Eroc. Automatic Speech Recognition and Understanding (ASRU),%*
7>009%B00/1:%
@?>00=AKK%?7%2KGF%X&+!723F!
%X + IEEE International Conference on Acoustics, Speech, and Signal Processing,
1992. %>%B>9B=1%
@?#=119A 6%?#%% E***.H7%+E%+7E
=119%
@2. =111A 2% 2.F% & % % !
2GG%+E%+7E!.=111%
@2=111A7 27KS 6%*
*G%+E%+-!E72=111"
7=111%)>))=>%
@2 =1>1A %2 ?% ? E% * G -*
67=8'8(=1>1%//:N/)8%
@2 =1>1A < !% 2 %-."%2E%GG%4*
,.5E%+G.K=1>1%>0>8>0>9%
@+E>000A+%E%2*.+E?
+E*%7*E>000%
@K*=1>=AK*6*%E*
%+=1>=E3'(=1>=%
@K=11)A%K4F+
5SLTU’082H=11)%
@K=1>1AKS%GS%*F!&E%+%7%
7G77G7=1>1H%B=1>1%:=1:=:%
@ "=11:AF%E% "% %E
E%!+6&..EG*+=11:%0:>11%
@ .=11:A% .2%-4GG?5%Interspeech 2003.
@ =1>1A E% % 47 G! G ?
EH5+!=1>1%>/9.".+=1>1%
@ =1>>A% +% .%#HH*.$
GE%+Z=1>>6+=1>>%:>/>N:>/8%
@ =1>:A G.+ .FH##%
*%Speech Communication
Journal, Special Issue on Processing Under-Resourced Languages=1>:%
@ # >000A F # !" # &* FF F
F H7Proceedings of the International Conference on Speech Processing>000%
%:=::=9%
@ =11:A G" * Q. !" # 4? 5%
Interspeech 2003.
@ .=1>=A+ . HH.GS#%
&FEH*7 %+
E%67+=1>=&E=1>=%9>09=B%
@ M>00)A MK% G EG6H*+
!.%Proc. ICASSP>00)%8>98=1%
@ $ =11:A % $ 4! ' ( 6 G
5% + Proceedings of the 2003 International Workshop Speech and Computer
SPECOM-2003G$=11:%)>B%
@ =110A 2% % K% % G %+%SL%%*
E%+&.;=110G+=110%:=9::=%
@ =11/AG% %E%H%!%2.K%E..!%G%
4 * 5 E% HLT-NAACL-$
L.=11/%
@ =11/*AG% %$G7%
E%+;1/E*E>1=>>1=8=11/%
@ >00BA % G% F. K%% ?% + H* G
% Proc. EurospeechG>00B%>)B>)0%
@ =110A % E% F] % G 4? G!
5+=110%91)9>>% %
@=11:AH+?+7+7!+&*
)77!
'Z1:(?$#%:>>9:>=1*=11:%
@ =110A V H
WH % + ! E%
H>9+)-%=110E'(>89>N>8)=%
@=110AF?27E**G G%+!
UE>9'B(08B0BB'=110(
@=110A47G!F
E 5 K 7 ? 2 -%+ =110%
%=110%
@.R =11BA G% .R G% ER. H% *^ G 6&3
F*E%!F;=11B-+:/B) H7#*
=11B%>81N>89%
@G#=11:A*.!"#6G#&*%G%
E%'+7EZ1:(% =11:++7E
=11:%
@G". =119A E% G". !% 6_ S% !Q. E%+ 4 G
N .25 Interspeech’07$
=119%
@G.=1>1A!%G.G% %K%7.% %$.*
%+E%+-!E72=1>1G.K=1>1%>18BN>18)%
@G=1>=A%G?%%F?%2%GF-$.%+
!E=1'>(=1>=%>8==%
@G>00=AL G74
%5+Second International Conference on Spoken Language Processing>00=
@-."=11=A2%-."2%L!%&*4G$!
?*G!573+-?=11=%=%9>/9==!!$%
@-"=11BA2%-"!% $%-$GG.F
3%+E%++7
E+7E=11BE=11B%>1B:>1B/%
@-+!=110A!-+!=110'!10(!GE=110
@3=11)A+% 33% ?*.%K% `.aG
+E%+&..!!;1)?
+=11)%
@E=1>1A?*EG.#%!$*$!
;*%+E+&..
!:>=N:>9.7F*=1>1%
@E=1>1A -% E F%7% K E%F !%% E.4"
+572+%7G=1>1%9::N98=%
@E=110A-%E%$-%"%-E%F!%%E.%
+% + 72+ ;10 E =9
2B>NB8-$L.-L=110%7G%
@E =11/A !% E % 4+ F
5ICSLP’06E*=11/%
@E=11)A!%E%4
I5SLTU’082H=11)%
@E=110A!%E%&FG
%+!UE>9'B()/:)9:
'=110(%
@E=1>>A7%E%2%-%7E*7--$.
66?H7%+E%=1>>%
@$ =1>=A % $ G% F# % 6
G * E% B1 G
77;=1>=K" =1>=%>9BN>):%
@#=119A%# % % H+% E + %
>9'=(=119%:=>::/%
@.=119A!%.G%%GS% *
%7%80'/(=119%%8:9N8B=%
@ =111A K7 7KE#F*
% + Proceedings of the Second International Conference on Language
Resources and Evaluation, %09B0)1=111%
@.=1>1A2.G!?QMG**$
!.%+7E=1>1B81=B81B%
@.=119A%.G%L%?%K'KGG(
*%+E%+7E;19=119H%8%>)>>)8%
@=1>1A ! * 3 !" # 4&.
E5+=1>1G.K=/:1*=1>1%
@=1>=A!*3!"#4?EG?
+5+7E=1>= K=B:1G=1>=%
@ =1>=*A ! * 3 - ! H !" # 4
EF5+=1>=E30>:*=1>=%
@ =1>:A ! * 3 !" #% &**
% Speech Communication Journal, Special Issue on Processing Under-
Resourced Languages=1>:%
@# >00)A # !% &*% + H7% E%
+7E>00)%>)>0>)==%
@#=11>A!%#%&*4
57%:B%:>NB>=11>%
@#=11=A!%#4?*EG!F*F
5+7E=11=%:8B:8)%
@#=11/A4Multilingual Speech Processing5!"# '%(
E+->:09)1>=1))B1>B%=11/%
@#=119A4E+7&**! E5!%
#%&%.%.G%2.K% .+=119%
@#=1>:A4?*EG!UF*=15!"#-
!H!+7E=1>:%
@$#=11/A$#E%G".E%7.K%2$.
E%++7E+7E=11/%
@=1>>A6%?%Y%7F%L%67FF--$.
%+E%=1>>+&.2$=1>>%
=8=0%
@ =1>:A * G K !*"b 72
*#..%7U
=9'>(=10==9'=1>:(
@=11=A!%%K%GFG%2%F-$.
$EH*!+=8=11=
>1%
@=119A%Gc+F%?c%E#G7%Fc#
Gc6%*HG780'8(=119%=B:=/9%
@*=1>=A6%*!%%H!%#%&7
&E%+E!6+&..!
'!=1>=(G6=BF*=1>=%
@*=1>:A6%*!%%H!%#%EE
, 7 &E % + E ! > +
7E'E=1>:(!=0:>K=1>:%
@. =11/A % . 6% ?# GL2$ Y% -% G F% H% 7
**%+E%+7E=11/%
@Q.=11:A%Q.!%#6%G#%&*G6+7E=11:%
@Q.=11)A%Q.4+!*G$.
5SLTU’082H=11)%
@Q. =110A * Q. &*% 42 ! ?
F5+=110 =110%
@ =110A% % K% * 4# .
2$.5+=110%>89B>89)% %
@# =11:A # G% 6 % 46 * $
2H75E%+7E2 7:/):9>=11:%
@!*=1>=AG!*%!%*%4*2*
G5SLTU - Workshop on Spoken Language Technologies for
Under-Resourced Languages7!$=1>=%
@!*=1>:AG!*!*%4
N5%< Speech Communication
Journal, Special Issue on Processing Under-Resourced Language=1>:%
@!" =1>1A !"% G".E% 3 G H7 + E% = +%
&..!!=1>1G%>1N
>/=1>1%
@!=1>=A %! % ? 2% 2.% G GE6 6 $
H7%+E%+7EK=1>=%
@!=1>=*A%! %?%K2%2.%FE 6
$%+E%+=1>=%
@!=11)A%!K%6.?%?#% %7*GE*
2%+E%+=11)%
@! =11>A ! % ? G% 2* --D2GG G
-:9'>(%0>>=/%
@H2=1>1A7%2-% %G%FXE
XE&..!
'!=1>1(EGG=1>1%>9=:%
@H -.. =1>:A F -.. E L*
Speech Communication Journal, Special Issue on Processing Under-Resourced
Languages=1>:%
@H=118A HF% % F % .% G G
*+E%+7E;18=118%==8B==8)%
@H=1>=A %H G% 6%?#G% K%%! *.
%+E%!=1>=%
@H =1>1A -%!%H 6% !% #% G * $
+E%!=1>1%
@H=1>>A-%!%H6% !%#%*
*G!%+E%++=1>>%
@H =1>=A -%!% H 6% G# !% #% G *. % +
E%!=1>=%
@H=1>=*A-%!%H&%6%G#!%#%++#G
E!GFE%+E%+
=1>=%
@&>008A&% %&%GL%7
62GGF$%Proc. ICASSP>008%=:9=81%
@&.=111A%&%F%&.
%EF7*%=111>81%
@&=11)AF&XH$%X
H%=/-%=%=BB=/B=11)
@L>009AL%K%F.G%*Y%F7%?K%% $F%K%%
$ F% % E F% * % K% . 2% K% G% & E% 7% G
H*!\E"%Computer, Speech, and Language>>9:)0%
@L =11)A L % 2GG ! 2*.
EH2*=11)%B:0BB9%
@L=1>=AFL*GF72*
$$.*%E%+7E=1>=%8>/0
8>9=%