cd(@__DIR__)
cd("../code/")
using Pkg
Pkg.activate(".")
Pkg.instantiate()
using CSV
using DataFrames
using Pipe
using ProgressMeter
Activating project at `~/projects/research/asjp20world_tree/code`
ASJP contains information about the Glottolog classification for each doculect. In this notebook, I use this to construct a tree over all doculects and save it as a Newick file.
Let’s go!
Setting the working directory, activate the local Julia environment, and load packages.
cd(@__DIR__)
cd("../code/")
using Pkg
Pkg.activate(".")
Pkg.instantiate()
using CSV
using DataFrames
using Pipe
using ProgressMeter
Activating project at `~/projects/research/asjp20world_tree/code`
Load the languages
DataFrame and restrict it to the doculects present in asjp20wide.csv
.
d = CSV.read("../data/languages.csv", DataFrame)
d[!, :longname] =
replace.(join.(eachrow(select(d, [:classification_wals, :ID])), "."), "-" => "_")
asjp = CSV.read("../data/asjp20wide.csv", DataFrame)
d = filter(x -> x.longname ∈ asjp.longname, d)
Row | ID | Name | Glottocode | Glottolog_Name | ISO639P3code | Macroarea | Latitude | Longitude | Family | classification_wals | classification_ethnologue | classification_glottolog | recently_extinct | long_extinct | year_of_extinction | code_wals | code_iso | transcribers | longname |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
String | String | String15? | String? | String3? | String15? | Float64? | Float64? | String31? | String | String? | String? | Bool | Bool | Int64? | String3? | String3? | String? | String | |
1 | A51_BAFIA_MAJA | A51_BAFIA_MAJA | lefa1242 | Lefa | lfa | Africa | 5.1 | 11.2 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Northwest,A,Bafia(A.51) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,BantuA-B10-B20-B30,Bafia(A.50),NuclearBafia(A.50),Lefa-Bafia | false | false | missing | missing | lfa | Ann-Katrin Wett | NC.BANTU.A51_BAFIA_MAJA |
2 | A51_BAFIA_TUMI_TINGON | A51_BAFIA_TUMI_TINGON | lefa1242 | Lefa | lfa | Africa | 5.1 | 11.2 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Northwest,A,Bafia(A.51) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,BantuA-B10-B20-B30,Bafia(A.50),NuclearBafia(A.50),Lefa-Bafia | false | false | missing | missing | lfa | Ann-Katrin Wett | NC.BANTU.A51_BAFIA_TUMI_TINGON |
3 | A53_BAFIA_RIKPA | A53_BAFIA_RIKPA | bafi1243 | Bafia | ksf | Africa | 5.0 | 11.17 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Northwest,A,Bafia(A.53) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,BantuA-B10-B20-B30,Bafia(A.50),NuclearBafia(A.50),Lefa-Bafia | false | false | missing | bfi | ksf | Ann-Katrin Wett | NC.BANTU.A53_BAFIA_RIKPA |
4 | A54_BAFIA_NJANTI | A54_BAFIA_NJANTI | tibe1274 | Tibea | ngy | Africa | 5.3 | 11.3 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Northwest,A,Bafia(A.54) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,BantuA-B10-B20-B30,Bafia(A.50) | false | false | missing | missing | ngy | Ann-Katrin Wett | NC.BANTU.A54_BAFIA_NJANTI |
5 | A60_GUNU | A60_GUNU | nugu1242 | Nugunu (Cameroon) | yas | Africa | 4.58 | 11.25 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Northwest,A,Sanaga(A.622) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,Mbam-Bubi-Jarawan,Mbam,NuclearMbam,Bati-Mbure-Yambassa,Mbure-Yambassa,Yambassa(A.60),Mmala-Elip-Gunu,Elip-Gunu | false | false | missing | gun | yas | Ann-Katrin Wett | NC.BANTU.A60_GUNU |
6 | A60_MMAALA | A60_MMAALA | mmaa1238 | Mmaala | mmu | Africa | 4.5 | 11.08 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Northwest,A,Sanaga(A.62) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,Mbam-Bubi-Jarawan,Mbam,NuclearMbam,Bati-Mbure-Yambassa,Mbure-Yambassa,Yambassa(A.60),Mmala-Elip-Gunu | false | false | missing | missing | mmu | Ann-Katrin Wett | NC.BANTU.A60_MMAALA |
7 | A61_NGORO_ASOM | A61_NGORO_ASOM | tuki1240 | Tuki | bag | Africa | 4.58 | 11.5 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Northwest,A,Sanaga(A.601) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,Mbam-Bubi-Jarawan,Mbam,NuclearMbam,Sanaga-WestMbam(A.40),Sanaga(A.60) | false | false | missing | tki | bag | Ann-Katrin Wett | NC.BANTU.A61_NGORO_ASOM |
8 | A62_KALONGE | A62_KALONGE | yang1293 | Yangben | yav | Africa | 4.43 | 11.08 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Northwest,A,Sanaga(A.62) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,Mbam-Bubi-Jarawan,Mbam,NuclearMbam,Bati-Mbure-Yambassa,Mbure-Yambassa,Yambassa(A.60) | false | false | missing | missing | yav | Ann-Katrin Wett | NC.BANTU.A62_KALONGE |
9 | A72a_EWONDO | A72a_EWONDO | ewon1239 | Ewondo | ewo | Africa | 4.0 | 12.0 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Northwest,A,Ewondo-Fang(A.72) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,BantuA-B10-B20-B30,Basaa-Yaunde(A40-70),Yaunde-Fang(A.70),Ewondo-Bebele | false | false | missing | ewo | ewo | Ann-Katrin Wett | NC.BANTU.A72a_EWONDO |
10 | AASAX | AASAX | aasa1238 | Aasax | aas | Africa | -4.04 | 37.16 | Afro-Asiatic | AA.SOUTHERN_CUSHITIC | Afro-Asiatic,Cushitic,South | Afro-Asiatic,Cushitic,SouthCushitic | true | false | 2010 | missing | aas | Darja Appelganz | AA.SOUTHERN_CUSHITIC.AASAX |
11 | ABAGA | ABAGA | abag1245 | Abaga | abg | Papunesia | -6.17 | 145.67 | Nuclear Trans New Guinea | TNG.SIANE-YAGARIA | Trans-NewGuinea,Madang,Kalam-Kobon | Nuclear_Trans_New_Guinea,Kainantu-Goroka,Goroka,NuclearGoroka,Siane-Yagaria,Kamano-Yagaria,UnclassifiedKamano-Yagaria | false | false | missing | missing | abg | Matthew S. Dryer and Søren Wichmann | TNG.SIANE_YAGARIA.ABAGA |
12 | ABANYOM | ABANYOM | aban1242 | Abanyom | abm | Africa | 6.29 | 8.63 | Atlantic-Congo | NC.EKOID-MBE | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,Ekoid,Bakor | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,Ekoid-Mbe,Ekoid,Bakor-Ejagham,Bakor,NorthernBakor,Abanyom-Nkem-Nkum | false | false | missing | missing | abm | Guillaume Segerer and Søren Wichmann | NC.EKOID_MBE.ABANYOM |
13 | ABAR | ABAR | abar1238 | Mungbam | mij | Africa | 6.58 | 10.25 | Atlantic-Congo | NC.YEMNE-KIMBI | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,Yemne-Kimbi | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,Yemne-Kimbi | false | false | missing | missing | mij | Viveka Velupillai | NC.YEMNE_KIMBI.ABAR |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
7722 | ZOOMBO_3 | ZOOMBO_3 | koon1244 | South-Central Koongo | kng | Africa | -5.0 | 15.0 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Central,H,Kikongo(H.16) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,Central-WesternBantu,West-CoastalBantu,Nzadic,Lweric,Dingic,Loange-Atlantic,KLCExtended,KikongoLanguageCluster,NuclearKLC,Kikongoic,KambakunyicKikongo,KilaadicKikongo,Central-SouthernKikongo,SoutheasternKikongo,SouthernKikongo,Koongo-Kituba | false | false | missing | fio | kng | Ann-Katrin Wett | NC.BANTU.ZOOMBO_3 |
7723 | ZOOMBO_4 | ZOOMBO_4 | koon1244 | South-Central Koongo | kng | Africa | -5.0 | 15.0 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Central,H,Kikongo(H.16) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,Central-WesternBantu,West-CoastalBantu,Nzadic,Lweric,Dingic,Loange-Atlantic,KLCExtended,KikongoLanguageCluster,NuclearKLC,Kikongoic,KambakunyicKikongo,KilaadicKikongo,Central-SouthernKikongo,SoutheasternKikongo,SouthernKikongo,Koongo-Kituba | false | false | missing | fio | kng | Ann-Katrin Wett | NC.BANTU.ZOOMBO_4 |
7724 | ZOQUE_FRANCISCO_LEON | ZOQUE_FRANCISCO_LEON | fran1266 | Francisco León Zoque | zos | North America | 17.33 | -93.25 | Mixe-Zoque | MZ.MIXE-ZOQUE | Mixe-Zoquean,Zoquean,ChiapasZoquean | Mixe-Zoque,Zoque,ChiapasZoque | false | false | missing | zfl | zos | Søren Wichmann and Viveka Velupillai | MZ.MIXE_ZOQUE.ZOQUE_FRANCISCO_LEON |
7725 | ZOQUE_RAYON | ZOQUE_RAYON | rayo1235 | Rayón Zoque | zor | North America | 17.08 | -93.0 | Mixe-Zoque | MZ.MIXE-ZOQUE | Mixe-Zoquean,Zoquean,ChiapasZoquean,NortheastZoque | Mixe-Zoque,Zoque,ChiapasZoque | false | false | missing | zqr | zor | Søren Wichmann and Viveka Velupillai | MZ.MIXE_ZOQUE.ZOQUE_RAYON |
7726 | ZOROP | ZOROP | yafi1240 | Yafi | wfg | Papunesia | -3.42 | 140.92 | Pauwasi | Pau.EASTERN_PAUWASI | Pauwasi,Eastern | Pauwasi,EasternPauwasi | false | false | missing | missing | wfg | Matthew S. Dryer | Pau.EASTERN_PAUWASI.ZOROP |
7727 | ZUGUNUK_KALASHA | ZUGUNUK_KALASHA | kala1372 | Chitral Kalasha | kls | Eurasia | 35.49 | 71.7 | Indo-European | IE.INDIC | Indo-European,Indo-Iranian,Indo-Aryan,OuterLanguages,Northwestern,Dardic,Chitral | Indo-European,ClassicalIndo-European,Indo-Iranian,Indo-Aryan,Indo-AryanNorthwesternzone,Chitral | false | false | missing | klh | kls | missing | IE.INDIC.ZUGUNUK_KALASHA |
7728 | ZULGO | ZULGO | zulg1242 | Zulgo-Gemzek | gnd | Africa | 10.83 | 14.08 | Afro-Asiatic | AA.BIU-MANDARA | Afro-Asiatic,Chadic,Biu-Mandara,A,A.5 | Afro-Asiatic,Chadic,Biu-Mandara,NorthBiu-Mandara,Margi-Mandara-Mofu,Mofuic,Meri | false | false | missing | missing | gnd | Ann-Katrin Wett | AA.BIU_MANDARA.ZULGO |
7729 | ZULU | ZULU | zulu1248 | Zulu | zul | Africa | -30.0 | 30.0 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Central,S,Nguni(S.42) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,EastBantu,SouthernBantu-Makua,Nguni-Tsonga,Nguni(S.40),SouthernNdebele-Lowland,Zulu-Xhosa | false | false | missing | zno | zul | Cecil H. Brown | NC.BANTU.ZULU |
7730 | ZULU_2 | ZULU_2 | zulu1248 | Zulu | zul | Africa | -30.0 | 30.0 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Central,S,Nguni(S.42) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,EastBantu,SouthernBantu-Makua,Nguni-Tsonga,Nguni(S.40),SouthernNdebele-Lowland,Zulu-Xhosa | false | false | missing | zno | zul | Ann-Katrin Wett | NC.BANTU.ZULU_2 |
7731 | ZULU_NKANDLA | ZULU_NKANDLA | zulu1248 | Zulu | zul | Africa | -30.0 | 30.0 | Atlantic-Congo | NC.BANTU | Niger-Congo,Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,Southern,NarrowBantu,Central,S,Nguni(S.42) | Atlantic-Congo,Volta-Congo,Benue-Congo,Bantoid,SouthernBantoid,NarrowBantu,EastBantu,SouthernBantu-Makua,Nguni-Tsonga,Nguni(S.40),SouthernNdebele-Lowland,Zulu-Xhosa | false | false | missing | zno | zul | Ann-Katrin Wett | NC.BANTU.ZULU_NKANDLA |
7732 | ZUNI | ZUNI | zuni1245 | Zuni | zun | North America | 35.08 | -108.83 | Zuni | Zun.ZUNI | Languageisolate_zun | Zuni | false | false | missing | zun | zun | Cecil H. Brown | Zun.ZUNI.ZUNI |
7733 | ZWAY | ZWAY | zayy1238 | Zay | zwa | Africa | 7.93 | 38.83 | Afro-Asiatic | AA.SEMITIC | Afro-Asiatic,Semitic,South,Ethiopian,South,Transversal,Harari-EastGurage | Afro-Asiatic,Semitic,WestSemitic,Ethiosemitic,SouthEthiopic,TransversalSouthEthiopic,Harari-EastGurage | false | false | missing | missing | zwa | Cecil H. Brown and Dmitry Egorov | AA.SEMITIC.ZWAY |
Convert the Glottolog classification paths into a tree represented as nested dictionaries.
Helper function converting a nested dictionary tree into a Newick string.
function pair2string(p)
if p[2] == Dict()
return p[1]
elseif length(p[2]) == 1
return pair2string(first(p[2]))
else
return "(" *
join(pair2string.(collect(p[2])), ",") *
")"
end
end
pair2string (generic function with 1 method)
Construct Glottolog tree and save it to disk.