At least three data types:
library(lingtypology)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────── tidyverse 1.2.0 ──
## ✔ ggplot2 2.2.1.9000 ✔ purrr 0.2.4
## ✔ tibble 1.3.4 ✔ dplyr 0.7.4
## ✔ tidyr 0.7.2 ✔ stringr 1.2.0
## ✔ readr 1.1.1 ✔ forcats 0.2.0
## ── Conflicts ──────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(leaflet)
library(leaflet.minicharts)
## Warning: package 'leaflet.minicharts' was built under R version 3.4.3
library(sf)
## Warning: package 'sf' was built under R version 3.4.3
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
uralic <- lingtypology::lang.aff("Uralic")
wals_85A <- wals.feature("85A")
wals_85A_scandinavia <- wals_85A %>% filter(language %in% c("Finnish", "Russian", "Swedish"))
map.feature(languages = wals_85A_scandinavia$language,
features = wals_85A_scandinavia$`85A`,
label = wals_85A_scandinavia$language,
shape = c("➡", "⬅"))
map.feature(languages = circassian$language,
features = circassian$dialect,
label = circassian$village,
latitude = circassian$latitude,
longitude = circassian$longitude)
kpv <- read_csv("https://raw.githubusercontent.com/langdoc/kpv-geography/master/kpv.csv")
## Parsed with column specification:
## cols(
## village = col_character(),
## population_2010 = col_integer(),
## latitude = col_double(),
## longitude = col_double(),
## type = col_character(),
## adm_center = col_character(),
## district = col_character(),
## region = col_character(),
## language = col_character(),
## dialect = col_character()
## )
map.feature(languages = kpv$language,
features = kpv$dialect,
label = kpv$village,
latitude = kpv$latitude,
longitude = kpv$longitude)
Map source: http://kettunen.fnhost.org/html/kett117.html
sfc_as_cols <- function(x, names = c("longitude","latitude")) {
stopifnot(inherits(x,"sf") && inherits(sf::st_geometry(x),"sfc_POINT"))
ret <- sf::st_coordinates(x)
ret <- tibble::as_tibble(ret)
stopifnot(length(names) == ncol(ret))
x <- x[ , !names(x) %in% names]
ret <- setNames(ret,names)
dplyr::bind_cols(x,ret)
}
kettunen <- st_read('data/kettunen.shp') %>% st_transform("+proj=longlat +datum=WGS84") %>% sfc_as_cols()
## Reading layer `kettunen' from data source `/Users/niko/github/paris20180122/data/kettunen.shp' using driver `ESRI Shapefile'
## Simple feature collection with 108500 features and 12 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: -272962.2 ymin: 6509287 xmax: 757501.3 ymax: 7795978
## epsg (SRID): NA
## proj4string: +proj=utm +zone=35 +ellps=GRS80 +units=m +no_defs
map_finnic <- function(data, map = "Kartta 151"){
my_colors <-
c(
"#1f77b4",
"#ff7f0e",
"#2ca02c",
"#d62728",
"#9467bd",
"#8c564b",
"#e377c2",
"#7f7f7f",
"#17becf",
sample(grDevices::colors()[!grepl("ivory|azure|white|gray|grey|black|pink|1",
grDevices::colors())])
)
corpus <- data
current_selection <- corpus %>% filter(map_id == map)
pal <- colorFactor({my_colors[1:length(unique(current_selection$feature_value))]},
domain = current_selection$feature_value)
title_text <- current_selection$feature_description[1] %>% as.character()
leaflet(data = current_selection) %>%
addTiles() %>%
addCircleMarkers(color = ~pal(feature_value),
radius = 4,
stroke = FALSE, fillOpacity = 0.5,
popup = ~feature_value) %>%
addLegend("bottomleft", pal = pal, values = ~feature_value,
title = title_text,
opacity = 1
)
}
kettunen_names <- names(kettunen)
kettunen <- kettunen %>% mutate(ilmio = as.character(ilmio)) %>%
rename(feature_id = ilmio_id,
feature_value = ilmio,
feature_description = kuvaus,
location = paikka_nim) %>%
mutate(map_id = str_extract(alaryhma_n, "^[^:]+(?=:)"))
map_finnic(kettunen, "Kartta 117")
Features used in my variants of Finnic dialect maps:
names(kettunen_names)
## NULL
skn <- read_rds("data/skn_df.rds") %>%
left_join(read_csv("data/skn_paikat.csv"))
## Parsed with column specification:
## cols(
## paikka = col_character(),
## maa = col_character(),
## lat = col_double(),
## lon = col_double()
## )
## Joining, by = "paikka"
skn_names <- names(skn)
leaflet(skn %>% distinct(paikka, lat, lon)) %>%
addTiles() %>%
addCircleMarkers()
## Assuming 'lon' and 'lat' are longitude and latitude, respectively
Structure here:
Note! Some annotations automatically created! Quality is good, but this is crucial to remember.
names(skn)
## [1] "sane" "alkup" "pos" "norm"
## [5] "msd" "lemma" "dephead" "deprel"
## [9] "ref" "paikka" "nauhoitusaika" "puhuja"
## [13] "sukupuoli" "murre" "murrealue" "rooli"
## [17] "url" "wav" "start" "length"
## [21] "id" "position" "maa" "lat"
## [25] "lon"
skn %>% arrange(position) %>% slice(1:10) %>% knitr::kable()
sane | alkup | pos | norm | msd | lemma | dephead | deprel | ref | paikka | nauhoitusaika | puhuja | sukupuoli | murre | murrealue | rooli | url | wav | start | length | id | position | maa | lat | lon |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
se | se | Pron | se | SUBCAT_Dem|NUM_Sg|CASE_Nom | se | 3 | nsubj | 2 | Suomussalmi | 1978 | AR | NA | Kainuu | Savolaismurteet | muu | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=200&duration=1593&tiername=AR-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:0.20 | 1.59 s | 1 | 1 | Suomussalmi, Finland | 64.88411 | 28.91166 |
ponttuu | ponttuu | V | ponttuu | PRS_Sg3|VOICE_Act|TENSE_Prs|MOOD_Ind|OTHER_UNK | ponttuu | 4 | dobj | 3 | Suomussalmi | 1978 | AR | NA | Kainuu | Savolaismurteet | muu | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=200&duration=1593&tiername=AR-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:0.20 | 1.59 s | 1 | 2 | Suomussalmi, Finland | 64.88411 | 28.91166 |
tehtiin | tehtiin | V | tehtiin | PRS_Pe4|VOICE_Pass|TENSE_Prt|MOOD_Ind | tehdä | 0 | ROOT | 4 | Suomussalmi | 1978 | AR | NA | Kainuu | Savolaismurteet | muu | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=200&duration=1593&tiername=AR-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:0.20 | 1.59 s | 1 | 3 | Suomussalmi, Finland | 64.88411 | 28.91166 |
? | ? | Punct | ? | _ | ? | 4 | punct | 5 | Suomussalmi | 1978 | AR | NA | Kainuu | Savolaismurteet | muu | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=200&duration=1593&tiername=AR-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:0.20 | 1.59 s | 1 | 4 | Suomussalmi, Finland | 64.88411 | 28.91166 |
no | ’noo | Adv | noo | _ | no | 2 | advmod | 1 | Suomussalmi | 1978 | RJ | M | Kainuu | Savolaismurteet | haastateltava | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=1957&duration=4426&tiername=RJ-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:1.96 | 4.43 s | 2 | 5 | Suomussalmi, Finland | 64.88411 | 28.91166 |
pannaan | “pannaan | V | pannaan | PRS_Pe4|VOICE_Pass|TENSE_Prs|MOOD_Ind | panna | 0 | ROOT | 2 | Suomussalmi | 1978 | RJ | M | Kainuu | Savolaismurteet | haastateltava | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=1957&duration=4426&tiername=RJ-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:1.96 | 4.43 s | 2 | 6 | Suomussalmi, Finland | 64.88411 | 28.91166 |
, | , | Punct | , | _ | , | 7 | punct | 3 | Suomussalmi | 1978 | RJ | M | Kainuu | Savolaismurteet | haastateltava | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=1957&duration=4426&tiername=RJ-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:1.96 | 4.43 s | 2 | 7 | Suomussalmi, Finland | 64.88411 | 28.91166 |
ne | ne | Pron | ne | SUBCAT_Dem|NUM_Pl|CASE_Nom | se | 7 | nsubj-cop | 4 | Suomussalmi | 1978 | RJ | M | Kainuu | Savolaismurteet | haastateltava | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=1957&duration=4426&tiername=RJ-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:1.96 | 4.43 s | 2 | 8 | Suomussalmi, Finland | 64.88411 | 28.91166 |
oli | oli | V | oli | PRS_Sg3|VOICE_Act|TENSE_Prt|MOOD_Ind | olla | 7 | cop | 5 | Suomussalmi | 1978 | RJ | M | Kainuu | Savolaismurteet | haastateltava | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=1957&duration=4426&tiername=RJ-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:1.96 | 4.43 s | 2 | 9 | Suomussalmi, Finland | 64.88411 | 28.91166 |
, | , | Punct | , | _ | , | 7 | punct | 6 | Suomussalmi | 1978 | RJ | M | Kainuu | Savolaismurteet | haastateltava | https://lat.csc.fi/ds/annex/runLoader?nodeid=MPI7571%23&time=1957&duration=4426&tiername=RJ-original | https://lat.csc.fi/ds/imdi_browser/viewcontroller?nodeid=MPI7512%23&action=Download | 00:00:1.96 | 4.43 s | 2 | 10 | Suomussalmi, Finland | 64.88411 | 28.91166 |
skn_kanssa <- skn %>% mutate(id = as.numeric(id)) %>%
arrange(id, position) %>%
filter(rooli == "haastateltava") %>%
# mutate(context = glue("{lag(sane)} {sane} {lead(sane)}")) %>%
filter(pos == "Adp") %>%
filter(deprel == "adpos") %>%# View
mutate(type = ifelse(dephead > ref, "pre", "post")) %>%
filter(lemma == "kanssa") %>%
add_count(paikka) %>%
rename(count_adpos = n) %>%
group_by(paikka, type) %>%
mutate(freq_adpos = n() / count_adpos) %>%
ungroup() %>%
distinct(paikka, lat, lon, freq_adpos, type) %>%
spread(type, freq_adpos) %>%
replace(is.na(.), 0)
# skn_kanssa_hits %>% slice(1) %>% pull(url) %>% browseURL()
You end up with something like this (in this case, for different scenarios with different structures):
skn_kanssa %>% slice(1:10) %>% knitr::kable()
paikka | lat | lon | post | pre |
---|---|---|---|---|
Alastaro | 60.95198 | 22.86193 | 0.8888889 | 0.1111111 |
Alatornio | 65.82584 | 24.16745 | 0.9333333 | 0.0666667 |
Artjärvi | 60.74365 | 26.05508 | 0.9230769 | 0.0769231 |
Askola | 60.53044 | 25.59784 | 0.8800000 | 0.1200000 |
Eurajoki | 61.20236 | 21.73398 | 0.9696970 | 0.0303030 |
Hailuoto | 65.03333 | 24.70000 | 0.9677419 | 0.0322581 |
Heinola | 61.20423 | 26.03810 | 0.9473684 | 0.0526316 |
Hietamäki | 63.07211 | 22.51892 | 0.9565217 | 0.0434783 |
Hinnerjoki | 60.99986 | 21.98383 | 0.9777778 | 0.0222222 |
Hollola | 60.98870 | 25.51632 | 0.9444444 | 0.0555556 |
leaflet() %>%
leaflet::addTiles() %>%
addMinicharts(lng = skn_kanssa$lon,
lat = skn_kanssa$lat,
type = "pie", width = 20,
chartdata = skn_kanssa[, c("pre", "post")]) %>%
map.feature(pipe.data = .,
languages = wals_85A_scandinavia$language,
features = wals_85A_scandinavia$`85A`,
label = wals_85A_scandinavia$language,
shape = c("➡", "⬅"))
More realistic workflow:
kettunen_names
## [1] "ilmio_id" "aineisto_i" "aineisto_n" "alaryhma_i" "alaryhma_n"
## [6] "ylaryhma_i" "ilmio" "tyyppi" "kuvaus" "paikka_nim"
## [11] "paikantyyp" "mml_paikka" "geometry" "longitude" "latitude"
skn_names
## [1] "sane" "alkup" "pos" "norm"
## [5] "msd" "lemma" "dephead" "deprel"
## [9] "ref" "paikka" "nauhoitusaika" "puhuja"
## [13] "sukupuoli" "murre" "murrealue" "rooli"
## [17] "url" "wav" "start" "length"
## [21] "id" "position" "maa" "lat"
## [25] "lon"
Comments
village
be changed toname
andsettlement_type
, or some equivalents?