Acoustic and auditory properties of speech sounds#

[6]

LING 497 Phonetic Analysis: Articulation, Acoustics, Audition

The Pennsylvania State University

Prof. Deborah Morton


Revised

31 May 2023


Programming Environment#

Hide code cell source
R.version.string # R.Version()
.libPaths()

packages <- c(
  'gridExtra',
  'magrittr',
  'phonR',
  'repr',
  'scales',
  'tidyverse',
  'vowels'
)

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages])
}

# Load packages
invisible(lapply(packages, library, character.only = TRUE))
'R version 4.3.0 (2023-04-21)'
'/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library'
── Attaching core tidyverse packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
 dplyr     1.1.2      readr     2.1.4
 forcats   1.0.0      stringr   1.5.0
 ggplot2   3.4.3      tibble    3.2.1
 lubridate 1.9.2      tidyr     1.3.0
 purrr     1.0.2     
── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
 readr::col_factor() masks scales::col_factor()
 dplyr::combine()    masks gridExtra::combine()
 purrr::discard()    masks scales::discard()
 tidyr::extract()    masks magrittr::extract()
 dplyr::filter()     masks stats::filter()
 dplyr::lag()        masks stats::lag()
 purrr::set_names()  masks magrittr::set_names()
 Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Zürich German Vowels#

F1, F2, F3 observations#

Hide code cell source
zurich_obs <- tibble(
  vowel=c(
    'i',
    'ə',
    'e',
    'ə',
    'ɛ',
    'æ',
    'y',
    'i',
    'ø',
    'ə',
    'œ',
    'u',
    'o',
    'e',
    'ɒ',
    #'ə',
    'ɛ',
    'ə'
  ),
  F1=c(
    338.16187273454796,
    480.7556674103058,
    424.66126677493634,
    538.1591157503162,
    463.38752300516285,
    598.999109334512,
    302.9120756513271,
    329.88108480049857,
    353.53535533589655,
    446.42169736756864,
    556.2410818865662,
    321.5929606643981,
    449.0585597667322,
    548.722113940993,
    713.4208029323668,
    #187.75184574884526,
    554.6995123825042,
    596.080905107178
  ),
  F2=c(
    2311.1416044007174,
    1652.5340593428432,
    2151.9276688278355,
    1728.5344176651327,
    1729.503265799448,
    1485.5932775168874,
    1627.843171042086,
    2399.5911272550698,
    1654.6924476415286,
    1762.7512749856016,
    1437.2258145596832,
    684.098976469662,
    642.100611328256,
    1450.566949891477,
    1564.501521174171,
    #1712.5591032845734,
    1966.4625997243281,
    1591.8669959965687
  ),
  F3=c(
    3176.602722306821,
    2390.0982442637264,
    2750.2485307364627,
    2455.5321017313545,
    2380.506763486331,
    2374.372678732871,
    2094.16654646834,
    3370.1561689997457,
    2219.494510301964,
    2305.844777075152,
    2492.393145497456,
    2599.4447548048465,
    2626.705175493852,
    2537.6593241203104,
    3090.8585123150892,
    #2703.87531734647,
    2496.1083788172814,
    2502.8814824649626
  )
)
zurich_obs
A tibble: 17 × 4
vowelF1F2F3
<chr><dbl><dbl><dbl>
i338.16192311.14163176.603
ə480.75571652.53412390.098
e424.66132151.92772750.249
ə538.15911728.53442455.532
ɛ463.38751729.50332380.507
æ598.99911485.59332374.373
y302.91211627.84322094.167
i329.88112399.59113370.156
ø353.53541654.69242219.495
ə446.42171762.75132305.845
œ556.24111437.22582492.393
u321.5930 684.09902599.445
o449.0586 642.10062626.705
e548.72211450.56692537.659
ɒ713.42081564.50153090.859
ɛ554.69951966.46262496.108
ə596.08091591.86702502.881

F1, F2, F3 averages#

zurich_avg <- zurich_obs %>%
                group_by(vowel) %>%
                  summarize(F1=mean(F1),F2=mean(F2),F3=mean(F3))
zurich_avg
A tibble: 11 × 4
vowelF1F2F3
<chr><dbl><dbl><dbl>
e486.69171801.24732643.954
i334.02152355.36643273.379
o449.0586 642.10062626.705
u321.5930 684.09902599.445
y302.91211627.84322094.167
æ598.99911485.59332374.373
ø353.53541654.69242219.495
œ556.24111437.22582492.393
ɒ713.42081564.50153090.859
ə515.35431683.92172413.589
ɛ509.04351847.98292438.308

Visualization of the F1-F2 acoustic vowel space#

options(repr.plot.width=10, repr.plot.height=10)

plt <- ggplot(zurich_avg, aes(x=F2, y=F1, label=vowel, color=vowel)) +
  geom_text(size=10) +
  scale_x_reverse(
      position='bottom',
      breaks=seq(0, 3000, 200)) +
  scale_y_reverse(
      position='left',
      breaks=seq(0, 1000, 100)) +
  labs(
    x='F2 [Hz]\n',
    y='F1 [Hz]\n',
    title='Zurich German [F1 vs F2]') +
  theme(
    legend.position='none',
    plot.title=element_text(hjust=0.5),
    text=element_text(size=20)
  )

suppressWarnings(print(plt))
../../../../_images/56f5b59a0facddaab26f373128a857b0f2bd82bd5d355f14a99b5acdf8cc2e18.png

Analysis#

[1] Which pairs of vowels look like they are close together and are potentially confusable? (No less than three pairs.)

[2] Examine F3 for each pair of vowels. Do you think F3 might help speakers distinguish the vowel pairs from one another? Why or why not?

Hide code cell source
zurich_avg %>%
  select(vowel,F3) %>%
    slice(1,5,6,7,8,11)
A tibble: 6 × 2
vowelF3
<chr><dbl>
e2643.954
y2094.167
æ2374.373
ø2219.495
œ2492.393
ɛ2438.308

[3] Do you think any other acoustic cues underlie the observed vowel differences? If so, which ones?

[4] Do you think an auditory plot would show differences not seen in the acoustic plot? Why or why not?


Quechua Stops#

Salasaca Quechua has a three-way contrast in stop phonemes between voiceless, voiceless aspirated, and voiced stops.

Measure the VOT

[p]

[ph]

[b]

[t]

[th]

[d]

[k]

[kh]

[g]

VOT observations#

Hide code cell source
quechua_obs <- tribble(
    ~token,~stop,     ~vot,
   'pungu',  'p', 0.015893,
   'pungu',  'g', 0.033120,
  'patsuk',  'p', 0.009725,
    'sipu',  'p', 0.021879,
   'phaki', 'ph', 0.039716,
   'phaki',  'k', 0.035454,
   'bunga',  'b',-0.132969,
   'bunga',  'g', 0.021412,
  'wasibi',  'b', 0.0,
    'taki',  't', 0.019589,
    'taki',  'k', 0.036247,
    'tuta',  't', 0.020096,
    'tuta',  't', 0.012146,
  'thuktu', 'th', 0.032266,
  'thuktu',  't', 0.015750,
    'dali',  'd',-0.105084,
  'tshida',  'd', 0.0,
  'kushni',  'k', 0.035806,
   'wajku',  'k', 0.025784,
   'khata', 'kh', 0.053653,
   'khata',  't', 0.014999,
     'gan',  'g',-0.165850,
   'tawga',  't', 0.015944,
   'tawga',  'g', 0.0,
)
quechua_obs
A tibble: 24 × 3
tokenstopvot
<chr><chr><dbl>
pungu p 0.015893
pungu g 0.033120
patsukp 0.009725
sipu p 0.021879
phaki ph 0.039716
phaki k 0.035454
bunga b -0.132969
bunga g 0.021412
wasibib 0.000000
taki t 0.019589
taki k 0.036247
tuta t 0.020096
tuta t 0.012146
thuktuth 0.032266
thuktut 0.015750
dali d -0.105084
tshidad 0.000000
kushnik 0.035806
wajku k 0.025784
khata kh 0.053653
khata t 0.014999
gan g -0.165850
tawga t 0.015944
tawga g 0.000000

VOT averages#

Hide code cell source
quechua_avg <- quechua_obs %>%
                group_by(stop) %>%
                  summarize(vot=mean(vot))
quechua_avg
A tibble: 9 × 2
stopvot
<chr><dbl>
b -0.06648450
d -0.05254200
g -0.02782950
k 0.03332275
kh 0.05365300
p 0.01583233
ph 0.03971600
t 0.01642067
th 0.03226600

Spectrograms#

Analysis#

[1] If you only saw the VOT patterns without any transcription, would you think there was a three-way contrast? Why or why not?

[2] Do you think that any acoustic cues other than VOT play a role in these contrasts?


Korean Fricatives#

Examine the phonetic correlates of a cross-linguistically unsual voiceless alveolar fricative constrast in Korean:

[sʰ] lenis aspirated fricative

[ss] fortis fricative

Which acoustic cues appear to be relevant to speaker-listeners’ perception in distinguishing the fricatives? Which ones don’t?

Acoustic cue measurements#

Hide code cell source
korean <- tribble(
  ~token,~duration_ms,~F0_transition_Hz,~spectral_peak_Hz,~spectral_peak_dB,~H1_Hz,~H1_dB,~H2_Hz,~H2_dB,
  '[sʰ]',    0.208705,              125,          4347.11,             28.5,235.19,  43.6,841.14,  39.6,
  '[ss]',    0.213125,              128,          4449.29,             36.6,226.67,  46.2,834.28,  46.1
)
korean %>%
  mutate(
    H1_minus_H2_dB = H1_dB - H2_dB
  )
A tibble: 2 × 10
tokenduration_msF0_transition_Hzspectral_peak_Hzspectral_peak_dBH1_HzH1_dBH2_HzH2_dBH1_minus_H2_dB
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
[sʰ]0.2087051254347.1128.5235.1943.6841.1439.64.0
[ss]0.2131251284449.2936.6226.6746.2834.2846.10.1

Spectrograms and power spectra#


Central Arrernte Nasals#

Central Arrernte is an aboriginal Australian language that has nasal phonemes at many places of articulation.

Examine the formant and antiformant properties of the nasal. (Note: A final epenthetic vowel [a] may be heard at the end of consonant-final words pronounced in isolation, but is not included in the transcription.)

[m] bilabial

[ŋ] velar

[n̪] dental

[n] apicoalveolar

[ɲ] palatal

[ɳ] retroflex

F1, F2, F3 observations#

Hide code cell source
arrernte_obs <- tribble(
    ~token,~nasal,            ~F1_Hz,            ~F2_Hz,           ~F3_Hz,
  '[aməŋ]',   'm',285.2174050473007,1542.6186253059136,2431.579956472647,
  '[aməŋ]',   'ŋ',405.2672201535346,2535.1062570576146,2605.047099678809,
  '[an̪ək]',   'n̪',280.2019747834746,1689.4191253832691,2615.092986757470,
  '[aɲək]',   'ɲ',329.7587005075927,2379.2648129406225,3247.250092150123,
  '[anək]',   'n',322.6723066332579,1699.6822986841912,2534.886655673502,
  '[aɳək]',   'ɳ',349.1755943432434,2380.2786511502973,4208.140362545317,
  '[aŋək]',   'ŋ',344.2634488457356,1173.2307520047038,2555.466245180057
)
arrernte_obs
A tibble: 7 × 5
tokennasalF1_HzF2_HzF3_Hz
<chr><chr><dbl><dbl><dbl>
[aməŋ]m285.21741542.6192431.580
[aməŋ]ŋ405.26722535.1062605.047
[an̪ək]280.20201689.4192615.093
[aɲək]ɲ329.75872379.2653247.250
[anək]n322.67231699.6822534.887
[aɳək]ɳ349.17562380.2794208.140
[aŋək]ŋ344.26341173.2312555.466

F1, F2, F3 averages#

Hide code cell source
arrernte_avg <- arrernte_obs %>%
                  group_by(nasal) %>%
                    summarize(F1_Hz=mean(F1_Hz),F2_Hz=mean(F2_Hz),F3_Hz=mean(F3_Hz))
arrernte_avg
A tibble: 6 × 4
nasalF1_HzF2_HzF3_Hz
<chr><dbl><dbl><dbl>
m285.21741542.6192431.580
n322.67231699.6822534.887
280.20201689.4192615.093
ŋ374.76531854.1692580.257
ɲ329.75872379.2653247.250
ɳ349.17562380.2794208.140

Spectrograms#


Terms#

  • [W] Arrernte

  • [W] Fortis

  • [W] Korean phonology

  • [W] Lenis

  • [W] Morphophonology

  • [W] Open Front Rounded Vowel

  • [W] Quechua

  • [W] Salasaca

  • [W] Sandhi

  • [W] Tone Sandhi

  • [W] Zürich German


Bibliography#

  • Breen & Dobson. (2005). [central arrernte nasals].

  • Fleischer, Jürg & Stephan Schmid. (2006). “Zurich German”. Journal of the International Phonetic Association.

  • Lee. (1999). [korean fricatives].

  • Masaquiza & Marlett. (2008). [quechua stops].