Computational Typology
  • Home
  • About

Prepare data

Make sure to set the working directory to the location of this file.

Activate the local environment.

cd(@__DIR__)
using Pkg
Pkg.activate(".")
Pkg.instantiate()
  Activating project at `/localscratch/nwsja01/projects/papers/computational_typology_routledge/code`

Load packages.

using CSV
using DataFrames
using Pipe
using ProgressMeter
using JSON
using HTTP
using Downloads
using Statistics
using RCall

using PyCall

ete3 = pyimport("ete3")
PyObject <module 'ete3' from '/localscratch/nwsja01/miniconda3/envs/worldtree_msa/lib/python3.11/site-packages/ete3/__init__.py'>

Download WALS dataset and unzip it.

# Define the URL and the destination file path
url = "https://zenodo.org/records/13950591/files/cldf-datasets/wals-v2020.4.zip?download=1"
destfile = "wals-v2020.4.zip"

# Download the file
Downloads.download(url, destfile)

# Unzip the file
run(`sh -c "unzip -o $destfile -d ../data/wals-v2020.4 > /dev/null 2>&1"`)

# Clean up
rm(destfile)

Download Phoible dataset.

url_ = "https://github.com/phoible/dev/blob/master/data/phoible.csv?raw=true"
destfile = "phoible.csv"

# Download the file
Downloads.download(url_, destfile)

# Define column types
col_types = Dict(
    "InventoryID" => Int,
    "Marginal" => Bool
)

# Read the CSV file with specified column types
phoible = CSV.File(destfile; types=col_types, missingstring="NA") |> DataFrame
first(phoible, 10)
10×49 DataFrame
Row InventoryID Glottocode ISO6393 LanguageName SpecificDialect GlyphID Phoneme Allophones Marginal SegmentClass Source tone stress syllabic short long consonantal sonorant continuant delayedRelease approximant tap trill nasal lateral labial round labiodental coronal anterior distributed strident dorsal high low front back tense retractedTongueRoot advancedTongueRoot periodicGlottalSource epilaryngealSource spreadGlottis constrictedGlottis fortis lenis raisedLarynxEjective loweredLarynxImplosive click
Int64 String15? String3 String String? String String31 String? Bool? String15 String7 String1 String1 String7 String3 String7 String3 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String7 String1 String7 String1 String7 String7 String1 String1 String7 String3 String3
1 1 kore1280 kor Korean missing 0068 h ç h ɦ missing consonant spa 0 - - - - - - + + - - - - - - 0 0 - 0 0 0 - 0 0 0 0 0 - - - - + - - - - - -
2 1 kore1280 kor Korean missing 006A j j missing consonant spa 0 - - - - - + + 0 + - - - - - 0 0 - 0 0 0 + + - + - + - - + - - - - - - - -
3 1 kore1280 kor Korean missing 006B k k̚ ɡ k missing consonant spa 0 - - - - + - - - - - - - - - 0 0 - 0 0 0 + + - - - 0 - - - - - - - - - - -
4 1 kore1280 kor Korean missing 006B+02B0 kʰ kʰ missing consonant spa 0 - - - - + - - - - - - - - - 0 0 - 0 0 0 + + - - - 0 - - - - + - - - - - -
5 1 kore1280 kor Korean missing 006B+02C0 kˀ kˀ missing consonant spa 0 - - - - + - - - - - - - - - 0 0 - 0 0 0 + + - - - 0 - - - - - + - - - - -
6 1 kore1280 kor Korean missing 006C l ɾ l lʲ missing consonant spa 0 - - - - + + + 0 + - - - + - 0 0 + + - - - 0 0 0 0 0 - - + - - - - - - - -
7 1 kore1280 kor Korean missing 006D m mb m missing consonant spa 0 - - - - + + - 0 - - - + - + - - - 0 0 0 - 0 0 0 0 0 - - + - - - - - - - -
8 1 kore1280 kor Korean missing 006E n nd nʲ n̚ n missing consonant spa 0 - - - - + + - 0 - - - + - - 0 0 + + - - - 0 0 0 0 0 - - + - - - - - - - -
9 1 kore1280 kor Korean missing 0070 p p b p̚ missing consonant spa 0 - - - - + - - - - - - - - + - - - 0 0 0 - 0 0 0 0 0 - - - - - - - - - - -
10 1 kore1280 kor Korean missing 0070+02B0 pʰ pʰ missing consonant spa 0 - - - - + - - - - - - - - + - - - 0 0 0 - 0 0 0 0 0 - - - - + - - - - - -

Extract number of segments from Phoible dataset.

number_of_segments = @pipe phoible |>
    groupby(_, :InventoryID) |>
    combine(_, nrow => :nSegments) |>
    leftjoin(_, phoible, on = :InventoryID => :InventoryID, makeunique=true) |>
    groupby(_, :Glottocode) |>
    combine(_, :nSegments => median => :nSegments) |>
    sort(_, :nSegments, rev=true) |>
    dropmissing(_, :Glottocode) ;

Download ASJP dataset and unzip it.

url = "https://zenodo.org/records/7079637/files/lexibank/asjp-v20.zip?download=1"
destfile = "../data/asjp-v20.zip"
# Download the file
Downloads.download(url, destfile)
# Unzip the file
run(`sh -c "unzip -o $destfile -d ../data/asjp-v20 > /dev/null 2>&1"`)

# Clean up
rm(destfile)

Extract population sizes from ASJP dataset.

languages_asjp = CSV.File("../data/asjp-v20/lexibank-asjp-f0f1d0d/cldf/languages.csv") |> DataFrame

asjp_raw = readlines("../data/asjp-v20/lexibank-asjp-f0f1d0d/raw/lists.txt")


# Find lines that start with "@"
at_indices = findall(occursin.("@", asjp_raw))

# Combine each "@" line with the next line, tab-separated
combined_lines = [asjp_raw[i] * "\t" * asjp_raw[i + 1] for i in at_indices]

# Create a DataFrame with a single column :raw
df = DataFrame(raw = combined_lines)

# Extract parts using regex
df.before_brace = replace.(df.raw, r"\{.*" => "")  # everything before first {
    df.inside_brace = getindex.(
        match.(r"\{([^}]*)\}", df.raw),
        Ref(1)
    )
df.after_brace = getindex.(
        match.(r"\}\t(.+)", df.raw),
        Ref(1)
    )

# Split the `after_brace` string into multiple columns

# Split after_brace into arrays
split_cols = split.(df.after_brace, r"\s+")

# Step 1: Split and pad to 6 columns per row
split_cols_fixed = [
    length(x) >= 6 ? x[1:6] : vcat(x, fill(missing, 6 - length(x)))
    for x in split.(df.after_brace, r"\s+")
]

# Step 2: Convert to Matrix of size (n_rows, 6)
cols_matrix = DataFrame(split_cols_fixed, :auto) |> permutedims


for i in 1:6
    df[!, Symbol("column$i")] = cols_matrix[:, i]
end

df = @pipe df |>
    select(_, :before_brace, :column5) |>
    rename(_, :before_brace => :ASJP_name, :column5 => :population) |>
    dropmissing(_, :population) |>
    transform(_, :population => ByRow(x -> parse(Int, x)) => :population)  |>
    filter(row -> row.population > 0, _) 

glottolog_population = @pipe languages_asjp |>
    innerjoin(_, df, on = :Name => :ASJP_name, makeunique=true) |>
    select(_, :Glottocode, :population) |>
    dropmissing(_, :Glottocode) |>
    sort(_, :population, rev=true) |>
    groupby(_, :Glottocode) |>
    combine(_, :population => maximum => :population) 


d2 = @pipe number_of_segments |>
    innerjoin(_, glottolog_population, on = :Glottocode)

asjp_meta = @pipe languages_asjp |>
    select(_, :Glottocode, :Macroarea, :Longitude, :Latitude, :Family) |>
    dropmissing(_, :Glottocode) |>
    unique(_, :Glottocode) 


d2 = @pipe d2 |>
    leftjoin(_, asjp_meta, on = :Glottocode, makeunique=true) ;

Load WALS data.

wals_values = CSV.File("../data/wals-v2020.4/cldf-datasets-wals-0f5cd82/cldf/values.csv") |> DataFrame

wals_languages = CSV.File("../data/wals-v2020.4/cldf-datasets-wals-0f5cd82/cldf/languages.csv") |> DataFrame

wals_parameters = CSV.File("../data/wals-v2020.4/cldf-datasets-wals-0f5cd82/cldf/parameters.csv") |> DataFrame

wals_codes = @pipe CSV.File("../data/wals-v2020.4/cldf-datasets-wals-0f5cd82/cldf/codes.csv") |> DataFrame |> select(_, :Parameter_ID, :Name, :Number);

Extract the values for the WALS feature “Affix” and “Adposition”.

d1 = @pipe wals_values |>
    filter(row -> row.Parameter_ID ∈ ["26A", "85A"]) |>
    select(_, :Language_ID, :Parameter_ID, :Code_ID) |>
    leftjoin(_, wals_values, on = [:Language_ID, :Code_ID, :Parameter_ID], makeunique=true) |>
    leftjoin(_, wals_codes, on = [:Parameter_ID => :Parameter_ID, :Code_ID => :Number], makeunique=true) |>
    select(_, Not(:ID, :Comment, :Code_ID, :Source, :Example_ID, :Name)) |>
    unstack(_, :Parameter_ID, :Value) |>
    dropmissing(_) |>
    rename(_, [:Language_ID, :Affix, :Adposition]) |>
    leftjoin(_, wals_languages, on = :Language_ID => :ID) |>
    select(_, :Glottocode, :Name, :Macroarea, :Longitude, :Latitude, :Family, :Affix, :Adposition) |>
    dropmissing(_, :Glottocode) |>
    unique(_, :Glottocode) ;

Filter WALS values

filter!(row -> row.Adposition ∈ [1,2], d1)
filter!(row -> row.Affix != 1, d1);

Get the world tree from OSF.

file_id = "sbh4q"
url = "https://api.osf.io/v2/files/$(file_id)/"
response = HTTP.get(url)
data = JSON.parse(String(response.body))

download_url = data["data"]["links"]["download"]

ml_tree = ete3.Tree(read(Downloads.download(download_url, timeout=1000), String))
PyObject Tree node '' (0x7f7d7c2d519)

Use mad.py to root the tree.

ml_tree.write(format=1, outfile="mltree.tre")

run(`python mad.py mltree.tre`)
run(`bash -c "head -n 1 mltree.tre.rooted > mltree.tre.rooted.head"`)
ml_tree = ete3.Tree("mltree.tre.rooted.head")

MAD phylogenetic rooting

Analyzing file 'mltree.tre'...
>> Warning: Trees with repeating branch lengths are suspicious (290 repeating values).
>> Warning: Root is polytomous.
>> [MAD=0.138_AI=1.000_CCV=18.5%_N=1/3]
>> Warning: Root is polytomous.
>> [MAD=0.138_AI=1.000_CCV=18.5%_N=2/3]
>> [MAD=0.138_AI=1.000_CCV=18.5%_N=3/3]

Minimal ancestor deviation, MAD = 0.138
           Ambiguity index,  AI = 1.000
                  Clock CV, CCV = 18.5%, 18.5%, 18.5%
Tied root positions,
3 rooted trees written to mltree.tre.rooted


    - Please cite DOI:10.1038/s41559-017-0193
PyObject Tree node '' (0x7f7d6ab6fe1)

Get the corresponding metadata for ASJP languages from OSR.

file_id = "w4jnf"
url = "https://api.osf.io/v2/files/$(file_id)/"
response = HTTP.get(url)
data = JSON.parse(String(response.body))

download_url = data["data"]["links"]["download"]


asjp_languages = @pipe CSV.read(
                           Base.download(download_url),
                           missingstring="",
                           DataFrame) |>
                       dropmissing(_, :classification_wals) |>
                       dropmissing(_, :Glottocode) |>
                       filter(row -> row.recently_extinct == 0, _) |>
                       filter(row -> row.long_extinct == 0, _) |>
                       select(_, [:Name, :Glottocode, :classification_wals]) |>
                       DataFrames.transform(_, [:classification_wals, :Name] => ByRow((x, y) -> string(x, ".", y)) => :longname) |>
                       select(_, Not(:classification_wals)) |>
                       DataFrames.transform(_, :longname => ByRow(x -> replace(x, "-" => "_")) => :longname) |>
                       dropmissing

CSV.write("../data/asjp_languages.csv", asjp_languages)
"../data/asjp_languages.csv"

Map ASJP longnames to Glottocodes.

longname2glottocode = Dict{String, String}(
    zip(asjp_languages.longname, asjp_languages.Glottocode)
)

glottocode2longname = Dict{String, String}(
    zip(asjp_languages.Glottocode, asjp_languages.longname)
)
Dict{String, String} with 5077 entries:
  "vili1238" => "NC.BANTOID.VILI_3"
  "avar1256" => "NDa.AVAR_ANDIC_TSEZIC.AVAR_ZAKATALY"
  "krun1240" => "AuA.BAHNARIC.BRAO_KRUNG"
  "gwii1239" => "KK.KHOE_KWADI.GWI"
  "sout2797" => "NC.GUR.SOUTHERN_TOUSSIAN"
  "kadu1253" => "ST.BURMESE_LOLO.KADUO"
  "kili1268" => "Hok.YUMAN.KILIWA"
  "hiww1237" => "An.OCEANIC.HIW"
  "mart1256" => "PN.WESTERN_PAMA_NYUNGAN.YULPARIJA"
  "cham1312" => "An.CHAMORRO.CHAMORRO"
  "akwa1248" => "NC.BANTOID.AKWA"
  "kups1238" => "ESu.NILOTIC.KUPSAPIINY"
  "towe1240" => "Pau.WESTERN_PAUWASI.TOWEI"
  "trum1247" => "Tru.TRUMAI.TRUMAI"
  "yimc1240" => "ST.KUKI_CHIN.NAGA_YIMCHUNGRU"
  "east2346" => "ST.BODIC.EASTERN_TAMANG"
  "lagw1237" => "AA.BIU_MANDARA.LAGWAN"
  "duru1249" => "NC.BANTOID.DURUMA"
  "leco1242" => "Lek.LEKO.LEKO"
  ⋮          => ⋮

Prune the world tree to the relevant languages

tree_taxa = intersect(ml_tree.get_leaf_names(), asjp_languages.longname)

ml_tree.prune([ml_tree & x for x in tree_taxa if x ∈ asjp_languages.longname])

Get the character matrix from OSF.

This is necessary because the ASJP world tree uses ASJP doculect identifiers, and there are often several ASJP doculects per Glottocode. For each Glottocode, we pick the doculect with fewest missing data.

file_id = "3em9h"
url = "https://api.osf.io/v2/files/$(file_id)/"
response = HTTP.get(url)
data = JSON.parse(String(response.body))

download_url = data["data"]["links"]["download"]


world_sc_ = DataFrame(
    hcat(
        split.(
            split(read(Base.download(download_url), String), "\n")[2:end]
        )...
    ) |> permutedims, :auto)


rename!(world_sc_, :x1 => :longname, :x2 => :characters)


world_sc = @pipe world_sc_.characters |>
                 mapslices(x -> split.(x, ""), _, dims=1) |>
                 hcat(_...) |>
                 permutedims |>
                 DataFrame(_, :auto) |>
                 insertcols!(_, 1, :longname => world_sc_.longname)
7432×1641 DataFrame
1541 columns and 7407 rows omitted
Row longname x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 x30 x31 x32 x33 x34 x35 x36 x37 x38 x39 x40 x41 x42 x43 x44 x45 x46 x47 x48 x49 x50 x51 x52 x53 x54 x55 x56 x57 x58 x59 x60 x61 x62 x63 x64 x65 x66 x67 x68 x69 x70 x71 x72 x73 x74 x75 x76 x77 x78 x79 x80 x81 x82 x83 x84 x85 x86 x87 x88 x89 x90 x91 x92 x93 x94 x95 x96 x97 x98 x99 ⋯
SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… ⋯
1 AA.DIZOID.NAO 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
2 AuA.KHASIAN.KHASI 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 ⋯
3 AuA.KHASIAN.KHASI_2 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 ⋯
4 AuA.KHASIAN.LYNGNGAM 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 ⋯
5 AuA.KHASIAN.PNAR_JOWAI 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 ⋯
6 AuA.KHASIAN.WAR_JAINTIA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 ⋯
7 Gun.GUNWINYGIC.BUAN 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
8 Hok.YUMAN.YAVAPAI 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 ⋯
9 Iwa.IWAIDJAN.AMURDAK 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 ⋯
10 Iwa.IWAIDJAN.IWAIDJA 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 ⋯
11 LSR.GRASS.ABU 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 ⋯
12 NC.KWA.AJAGBE 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
13 NC.NORTHERN_ATLANTIC.WOLOF_8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 - - - - - - - - - - - - - - - - - ⋯
⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱
7421 NC.BANTOID.NYANJA_NYASA - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 ⋯
7422 NC.KAINJI.KUKI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7423 NC.KAINJI.REGI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7424 NC.KAINJI.ROGO - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7425 NC.KAINJI.SHAMA - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7426 NC.KWA.AKPAFU - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7427 NC.PLATOID.BEROM_F - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7428 AA.BERBER.CHAOUI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 ⋯
7429 Man.WESTERN_MANDE.SEEKU - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7430 An.CELEBIC.TOLAKI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7431 AA.WEST_CHADIC.DERA - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7432 NC.KAINJI.SEGEMUK - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯

Make sure tree and metadata match.

filter!(row -> row.longname ∈ asjp_languages.longname, world_sc)
7034×1641 DataFrame
1541 columns and 7009 rows omitted
Row longname x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 x30 x31 x32 x33 x34 x35 x36 x37 x38 x39 x40 x41 x42 x43 x44 x45 x46 x47 x48 x49 x50 x51 x52 x53 x54 x55 x56 x57 x58 x59 x60 x61 x62 x63 x64 x65 x66 x67 x68 x69 x70 x71 x72 x73 x74 x75 x76 x77 x78 x79 x80 x81 x82 x83 x84 x85 x86 x87 x88 x89 x90 x91 x92 x93 x94 x95 x96 x97 x98 x99 ⋯
SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… SubStrin… ⋯
1 AA.DIZOID.NAO 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
2 AuA.KHASIAN.KHASI 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 ⋯
3 AuA.KHASIAN.KHASI_2 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 ⋯
4 AuA.KHASIAN.LYNGNGAM 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 ⋯
5 AuA.KHASIAN.PNAR_JOWAI 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 ⋯
6 AuA.KHASIAN.WAR_JAINTIA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 ⋯
7 Gun.GUNWINYGIC.BUAN 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
8 Hok.YUMAN.YAVAPAI 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 ⋯
9 Iwa.IWAIDJAN.AMURDAK 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 ⋯
10 Iwa.IWAIDJAN.IWAIDJA 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 ⋯
11 LSR.GRASS.ABU 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 ⋯
12 NC.KWA.AJAGBE 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
13 NC.NORTHERN_ATLANTIC.WOLOF_8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 - - - - - - - - - - - - - - - - - ⋯
⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱
7023 NC.BANTOID.NYANJA_NYASA - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 ⋯
7024 NC.KAINJI.KUKI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7025 NC.KAINJI.REGI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7026 NC.KAINJI.ROGO - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7027 NC.KAINJI.SHAMA - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7028 NC.KWA.AKPAFU - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7029 NC.PLATOID.BEROM_F - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7030 AA.BERBER.CHAOUI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 ⋯
7031 Man.WESTERN_MANDE.SEEKU - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7032 An.CELEBIC.TOLAKI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7033 AA.WEST_CHADIC.DERA - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯
7034 NC.KAINJI.SEGEMUK - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ⋯

Select the doculects with the fewest missing data.

insertcols!(world_sc, 1, :Glottocode => [longname2glottocode[row.longname] for row in eachrow(world_sc)])


best_languages = @pipe world_sc |> 
    DataFrame(
        longname = _.longname,
        Glottocode = _.Glottocode,
        nGaps = map(x -> sum(Array(x) .== "-"), eachrow(_))
    ) |> 
    sort(_, :nGaps) |>
    unique(_, :Glottocode).longname 
4261-element Vector{SubString{String}}:
 "AuA.KHASIAN.KHASI"
 "Hok.YUMAN.YAVAPAI"
 "Iwa.IWAIDJAN.IWAIDJA"
 "ST.BODIC.BUNAN"
 "ST.BODIC.EASTERN_BALTI"
 "ST.BODIC.GHACHOK"
 "ST.BODIC.HELAMBU_SHERPA"
 "ST.BODIC.KAGATE"
 "ST.BODIC.LHASA_TIBETAN"
 "ST.BODIC.LOWA"
 ⋮
 "TNG.BINANDEREAN.GAINA"
 "NDe.ATHAPASKAN.HAN"
 "TNG.BINANDEREAN.OROKAIVA_SOSE"
 "ESu.NILOTIC.SOGOO"
 "NC.BANTOID.KOSHIN"
 "CSu.BONGO_BAGIRMI.GULA_SARA"
 "An.GREATER_CENTRAL_PHILIPPINE.MANDAYAN_ISLAM_PISO"
 "An.OCEANIC.PENRHYN"
 "AA.BIU_MANDARA.VEMGO_MABAS_2"

Prune the tree to Glottocodes.

ml_tree.prune([ml_tree & x for x ∈ best_languages])



for l in ml_tree.get_leaves()
    l.name = longname2glottocode[l.name]
end

Select the typological data rows which match with a leaf in the tree.

tree_taxa = ml_tree.get_leaf_names()

filter!(row -> row.Glottocode ∈ tree_taxa, d1)
filter!(row -> row.Glottocode ∈ tree_taxa, d2)


CSV.write("../data/affix_adposition.csv", d1)
CSV.write("../data/soundpop.csv", d2)
"../data/soundpop.csv"

Create two trees – one for each dataset.

tree1 = ml_tree.copy()
tree1.prune([tree1 & x for x ∈ d1.Glottocode])

tree2 = ml_tree.copy()
tree2.prune([tree2 & x for x ∈ d2.Glottocode])

tree1.write(format=1, outfile="tree1.tre")
tree2.write(format=1, outfile="tree2.tre")

Root the trees.

R"""
library(ape)

# Load rooted tree
tree1 <- read.tree("tree1.tre")
tree2 <- read.tree("tree2.tre")

# Resolve zero-length internal nodes
tree1 <- di2multi(tree1)
tree2 <- di2multi(tree2)

# Replace very small or zero branch lengths
tree1$edge.length[tree1$edge.length <= 1e-3] <- 1e-3
tree2$edge.length[tree2$edge.length <= 1e-3] <- 1e-3

# Scale branch lengths to avoid massive numerical ranges
max_depth1 <- max(node.depth.edgelength(tree1))
scale_factor1 <- 1 / max_depth1
tree1$edge.length <- tree1$edge.length * scale_factor1

max_depth2 <- max(node.depth.edgelength(tree2))
scale_factor2 <- 1 / max_depth2
tree2$edge.length <- tree2$edge.length * scale_factor2
# Combine trees

ultra_tree1 <- chronos(
  tree1,
  lambda = 10,                # strong smoothing
  model = "correlated",       # more stable than "relaxed"
  control = chronos.control(epsilon = 1e-6, iter.max = 1000, eval.max = 5000)
)

write.tree(ultra_tree1, "../data/affix_adposition.tre")

ultra_tree2 <- chronos(
  tree2,
  lambda = 10,                # strong smoothing
  model = "correlated",       # more stable than "relaxed"
  control = chronos.control(epsilon = 1e-6, iter.max = 1000, eval.max = 5000)
)

write.tree(ultra_tree2, "../data/soundpop.tre")

"""

Setting initial dates...
Fitting in progress... get a first set of estimates
         (Penalised) log-lik = -85345.23 
Optimising rates... dates... -85345.23 
Optimising rates... dates... -296.365 
Optimising rates... dates... -249.5508 
Optimising rates... dates... -247.7283 
Optimising rates... dates... -247.5299 
Optimising rates... dates... -247.5265 
Optimising rates... dates... -247.5251 
Optimising rates... dates... -247.5244 
Optimising rates... dates... -247.5238 
Optimising rates... dates... -247.5236 
Optimising rates... dates... -247.5235 
Optimising rates... dates... -247.5235 
Optimising rates... dates... -247.5234 
Optimising rates... dates... -247.5234 
Optimising rates... dates... -247.5234 
Optimising rates... dates... -247.5234 
Optimising rates... dates... -247.5234 
Optimising rates... dates... -247.5234 
Optimising rates... dates... -247.5234 

log-Lik = -242.3904 
PHIIC = 3409.12 
┌ Warning: RCall.jl: Warning: function evaluation limit reached without convergence (9)
└ @ RCall /localscratch/nwsja01/.julia/packages/RCall/0ggIQ/src/io.jl:172

Setting initial dates...
Fitting in progress... get a first set of estimates
         (Penalised) log-lik = -9404449 
Optimising rates... dates... -9404449 
Optimising rates... dates... -2993.863 
Optimising rates... dates... -1094.443 
Optimising rates... dates... -740.5919 
Optimising rates... dates... -684.9216 
Optimising rates... dates... -665.2583 
Optimising rates... dates... -658.893 
Optimising rates... dates... -656.8436 
Optimising rates... dates... -656.0729 
Optimising rates... dates... -656.0729 

log-Lik = -655.0175 
PHIIC = 10350.64 
RObject{NilSxp}
NULL

Make a copy of the trees using longnames (for human readability).

tree1 = ete3.Tree("../data/affix_adposition.tre")
tree2 = ete3.Tree("../data/soundpop.tre")

for l in tree1.get_leaves()
    l.name = glottocode2longname[l.name]
end

for l in tree2.get_leaves()
    l.name = glottocode2longname[l.name]
end

tree1.write(format=1, outfile="../data/affix_adposition_longnames.tre")
tree2.write(format=1, outfile="../data/soundpop_longnames.tre")

Clean up.

rm("../data/asjp-v20", recursive=true, force=true)
rm("../data/wals-v2020.4", recursive=true, force=true)
rm("phoible.csv", force=true)
rm("mltree.tre", force=true)

rm("mltree.tre.rooted", force=true)
rm("mltree.tre.rooted.head", force=true)
rm("tree1.tre", force=true)
rm("tree2.tre", force=true)