Computational Typology
  • Home
  • About
library(repr)
options(repr.plot.width=15, repr.plot.height=9)
library(tidyverse)
library(leaflet)
library(tmap)
library(sf)
library(svglite)
library(ggthemes)
library(viridis)
library(rnaturalearth)
library(RColorBrewer)
library(coda)
library(ape)
library(phytools)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Linking to GEOS 3.13.0, GDAL 3.10.0, PROJ 9.5.1; sf_use_s2() is TRUE

Loading required package: viridisLite


Attaching package: ‘ape’


The following object is masked from ‘package:dplyr’:

    where


Loading required package: maps


Attaching package: ‘maps’


The following object is masked from ‘package:viridis’:

    unemp


The following object is masked from ‘package:purrr’:

    map

d = read_csv("../data/affix_adposition.csv")
d %>% head()
Rows: 488 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): Glottocode, Name, Macroarea, Family
dbl (4): Longitude, Latitude, Affix, Adposition

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
A tibble: 6 × 8
Glottocode Name Macroarea Longitude Latitude Family Affix Adposition
<chr> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
aari1239 Aari Africa 36.583333 6.000000 Afro-Asiatic 2 1
abkh1244 Abkhaz Eurasia 41.000000 43.083333 Northwest Caucasian 4 1
acha1250 Achagua South America -72.250000 4.416667 Arawakan 4 1
acol1236 Acholi Africa 32.666667 3.000000 Eastern Sudanic 4 2
west2632 Acoma North America -107.583333 34.916667 Keresan 4 1
adio1239 Adioukrou Africa -4.583333 5.416667 Niger-Congo 5 1
cross_classification <- d %>%
    count(Affix, Adposition, name = "Count") %>%
    pivot_wider(names_from = Adposition, values_from = Count, values_fill = 0) %>%
    rename(postposition = `1`, preposition = `2`) 

cross_classification
A tibble: 5 × 3
Affix postposition preposition
<dbl> <int> <int>
2 175 53
3 34 30
4 42 52
5 21 38
6 7 36
ld_geo <- d %>% st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326)
# Change to Equal Earth projection centered at 160°E
target_crs <- st_crs("+proj=eqearth +lon_0=160 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")
# Define cutting polygon
offset <- 180 - 160
polygon <- st_polygon(x = list(rbind(
  c(-0.0001 - offset, 90),
  c(0 - offset, 90),
  c(0 - offset, -90),
  c(-0.0001 - offset, -90),
  c(-0.0001 - offset, 90)
))) %>%
  st_sfc() %>%
  st_set_crs(4326)
# Cut and transform
worldMap <- ne_countries(scale = "medium", returnclass = "sf") %>% 
    st_make_valid() %>% 
    st_difference(polygon) %>% 
    st_transform(crs = target_crs)
Warning message:
“attribute variables are assumed to be spatially constant throughout all geometries”
ocean <- st_polygon(
    list(cbind(c(
        seq(-20, 339, len = 100),
        rep(339, 100),
        seq(338, -19, len = 100),
        rep(-20, 100)),
        c(rep(-90, 100),
        seq(-89, 89, len = 100),
        rep(90, 100),
        seq(89, -90, len = 100)
    )))
  ) |>
  st_sfc(crs = "WGS84") |>
  st_as_sf()
ocean <- sf::st_transform(ocean, crs = target_crs);
affix_colors <- c(
  "Strongly suffixing" = "#2166ac",
  "Weakly suffixing" = "#67a9cf",
  "Equal prefixing and suffixing" = "#f7f7f7",  # neutral hellgrau
  "Weakly prefixing" = "#f4a582",
  "Strongly prefixing" = "#b2182b"
)
affix_levels <- c(
  "2" = "Strongly suffixing",
  "3" = "Weakly suffixing",
  "4" = "Equal prefixing and suffixing",
  "5" = "Weakly prefixing",
  "6" = "Strongly prefixing"
)
ld_geo$Affix <- factor(
  as.character(ld_geo$Affix),
  levels = names(affix_levels),
  labels = affix_levels
)
adposition_levels <- c(
    "1" = "Postposition",
    "2" = "Preposition"
)
ld_geo$Adposition <- factor(
  as.character(ld_geo$Adposition),
  levels = names(adposition_levels),
  labels = adposition_levels 
)
u27_map <- ggplot(data = worldMap) +
  geom_sf(data = ocean, fill = "#d0e6f7") +     # Ozean
  geom_sf(color = NA, fill = "gray70") +        # Länder

  # Datenpunkte mit benutzerdefinierter Farbe + Shape
  geom_sf(data = ld_geo, aes(shape = Adposition, fill = Affix),
          color = "black", stroke = 0.2, alpha = 0.5, size = 3) +

  # Farbskala manuell
  scale_fill_manual(
  name = "Affix type",
  values = affix_colors
    ) +

  # Adpositionstyp über Shape (Dreieck = Präposition, Kreis = Postposition)
  scale_shape_manual(
    name = "Adposition type",
    values = c("Postposition" = 24, "Preposition" = 21),  # 24 = Dreieck, 21 = Kreis
  ) +
  # Design
  theme_minimal() +
theme(
  panel.background = element_rect(fill = "white", color = NA),
  legend.position = "bottom",
  legend.box = "vertical",              # ⬅️ untereinander
  legend.direction = "horizontal",      # ⬅️ innerhalb jeder Legende horizontal
  legend.title = element_text(size = 12),
  legend.text = element_text(size = 10),
  legend.spacing.y = unit(0.3, "cm"),   # ⬅️ Abstand zwischen Shape- & Fill-Legende
  plot.title = element_text(size = 24, face = "bold", hjust = 0.5)
) +
  guides(
  shape = guide_legend(order = 1, override.aes = list(fill = "gray90", size = 3)),
  fill = guide_legend(order = 2, override.aes = list(shape = 21, color = "black", size = 3))
) + ggtitle("Affixing type by adposition")

# Plot anzeigen
u27_map

ggsave(u27_map, filename = "../img/affix_adposition_map.pdf", width = 10, height = 6, device = cairo_pdf, dpi = 300)
sound_inventory_population <- read_csv("../data/soundpop.csv")

head(sound_inventory_population)
Rows: 1508 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Glottocode, Macroarea, Family
dbl (4): nSegments, population, Longitude, Latitude

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
A tibble: 6 × 7
Glottocode nSegments population Macroarea Longitude Latitude Family
<chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
bafi1243 51 60000 Africa 11.17 5.00 Atlantic-Congo
nugu1242 35 35000 Africa 11.25 4.58 Atlantic-Congo
mmaa1238 34 8000 Africa 11.08 4.50 Atlantic-Congo
tuki1240 39 26000 Africa 11.50 4.58 Atlantic-Congo
ewon1239 34 578000 Africa 12.00 4.00 Atlantic-Congo
abar1238 36 1850 Africa 10.25 6.58 Atlantic-Congo
population_bins <- c(0, 100, 1000, 10000, 100000, 1000000, 10000000, max(sound_inventory_population$population))
sound_inventory_population <- sound_inventory_population %>%
    mutate(population_range = cut(sound_inventory_population$population, breaks = population_bins, include.lowest = TRUE, labels=F))
nsegment_bins <- c(0, 25, 30, 37, 45, max(sound_inventory_population$nSegments))
sound_inventory_population <- sound_inventory_population %>%
    mutate(nSegments_range = cut(sound_inventory_population$nSegments, breaks = nsegment_bins, include.lowest = TRUE, labels=F))
sound_inventory_population %>%
st_drop_geometry() %>%
    group_by(nSegments_range) %>%
    summarise(n = n(), min_nSegments = min(nSegments), max_nSegments = max(nSegments))
A tibble: 5 × 4
nSegments_range n min_nSegments max_nSegments
<int> <int> <dbl> <dbl>
1 346 11 25
2 226 26 30
3 324 31 37
4 307 38 45
5 305 46 161
sound_inventory_population %>%
    st_drop_geometry() %>%
    group_by(population_range) %>%
    summarise(n=n(), min_pop = min(population), max_pop = max(population))
A tibble: 7 × 4
population_range n min_pop max_pop
<int> <int> <dbl> <dbl>
1 117 1 100
2 148 110 1000
3 305 1005 10000
4 389 10100 100000
5 311 101000 1000000
6 173 1016650 9905700
7 65 10406422 921233120
sp_geo <- sound_inventory_population %>%
    st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326)
sp_plot <- ggplot(data = worldMap) +
  geom_sf(data = ocean, fill = "#d0e6f7") +
  geom_sf(color = NA, fill = "gray70") +
  geom_sf(data = sp_geo,
          aes(fill = as.factor(nSegments_range), size = as.factor(population_range)),
           shape = 21, color = "black", stroke = 0.2, alpha = 0.7) +

  scale_fill_brewer(
    palette = "RdYlBu", direction = -1,
    name = "Segment number range",
    labels = c("11-25", "26-30", "31-37", "38-45", "46-161")
  ) +
  scale_size_manual(
    name = "Population range",
    values = 0.5*(1:7),
    labels = c("<100", "100-1k", "1k-10k", "10k-100k", "100k-1M", "1M-10M", ">10M"),
  ) +

  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.box = "vertical",
    legend.spacing.y = unit(0.6, "cm")
  ) +
  theme(
  plot.title = element_text(
    size = 20,       # z. B. 20 pt – je nach Verhältnis zur Kartenbreite
    face = "bold",   # fett
    hjust = 0.5      # zentriert
  )
) +
guides(
  fill = guide_legend(
    override.aes = list(shape = 21, size = 5, stroke = 0.3, color = "black")
  ),
  size = guide_legend(
    override.aes = list(shape = 21, fill = "gray80", stroke = 0.3)
  )
) +  ggtitle("Phoneme inventory (color) and population size (size)")
sp_plot

ggsave(sp_plot, filename = "../img/segments_population_map.pdf", width = 10, height = 6, device = cairo_pdf, dpi = 300)
library(ggplot2)
library(dplyr)
library(scales)

# Colorblind-freundliche Okabe-Ito-Palette
okabe_ito <- c(
  "Africa"        = "#E69F00",  # Orange
  "Eurasia"       = "#56B4E9",  # Sky blue
  "Papunesia"     = "#009E73",  # Bluish green
  "Australia"     = "#F0E442",  # Yellow
  "North America" = "#0072B2",  # Blue
  "South America" = "#D55E00"   # Vermillion
)

# Custom Theme mit farbigem Hintergrund
theme_softbox <- theme_minimal(base_size = 14) +
  theme(
    plot.background  = element_rect(fill = "#f5f5f5", color = NA),
    panel.background = element_rect(fill = "#eef1f5", color = NA),
    panel.grid.major = element_line(color = "gray80", size = 0.3),
    panel.grid.minor = element_blank(),
    plot.title       = element_text(hjust = 0.5, face = "bold", size = 16),
    plot.subtitle    = element_text(hjust = 0.5, size = 12),
    legend.position  = "bottom",
    legend.background = element_rect(fill = "#f5f5f5", color = NA),
    legend.key        = element_rect(fill = "#f5f5f5", color = NA),
    legend.title      = element_text(face = "bold")
  )

# Der Plot
sp_scatterplot <- sound_inventory_population %>%
  ggplot(aes(x = population, y = nSegments, color = Macroarea)) +

  geom_point(alpha = 0.6, size = 0.8) +

  geom_smooth(
    aes(color = Macroarea),
    method = "lm",
    se = FALSE,
    linetype = "solid",
    linewidth = 1
  ) +

  geom_smooth(
    method = "lm",
    se = TRUE,
    color = "black",
    linetype = "solid",
    linewidth = 2
  ) +

  scale_x_log10(labels = comma_format()) +
  scale_y_log10(labels = number_format(accuracy = 1)) +

  scale_color_manual(values = okabe_ito) +


  labs(
    x = "Population Size (log scale)",
    y = "Phoneme Inventory Size (log scale)",
    color = "Macroarea",
    title = "Scaling of Phoneme Inventory Size with Population",
    subtitle = expression("log"[10]*"(Inventory size) ~ vs ~ log"[10]*"(Population size)")
  ) + theme_softbox +
  theme(
    legend.title = element_text(face = "bold"),
    legend.text  = element_text(size = 14)
  )


# Plot anzeigen
sp_scatterplot

Attaching package: ‘scales’


The following object is masked from ‘package:phytools’:

    rescale


The following object is masked from ‘package:viridis’:

    viridis_pal


The following object is masked from ‘package:purrr’:

    discard


The following object is masked from ‘package:readr’:

    col_factor


Warning message:
“The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.”
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

ggsave(sp_scatterplot, filename = "../img/segments_population_scatterplot.pdf", width = 10, height = 6)
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'