Jupyter notebook Inv-Analysis.ipynb

¹⁰⁹ views

Kernel: Python 3 (Ubuntu Linux)

In [2]:

%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')
$('head').append('<style>table {font-family: "Doulos SIL"; font-size=5em;}')

Out[2]:

MIME type unknown not supported

In [3]:

from IPython.display import display
from IPython.display import HTML
from IPython.display import Image

import csv
import collections
import matplotlib
import random

In [4]:

def get_inventories(aggregated, consonants):
    with open(aggregated) as f:
        aggregated_data = [i for i in csv.DictReader(f, delimiter='\t')]

    with open(consonants) as f:
        segments = [segment for segment in csv.DictReader(f, delimiter='\t')]

    inventories = []
    for inventory in aggregated_data:
        inventory_consonants = int(inventory['Consonants'])
        if inventory_consonants <= 14 and 'dialect' not in inventory['LanguageName']:
            # Get rid of ugly language labels
            lang_name = inventory['LanguageName']
            
            if 'Maxakali' in lang_name:
                continue
                
            if lang_name.isupper() or lang_name.islower():
                inventory['LanguageName'] = inventory['LanguageName'].title()

            # Map the genus to actual language family name
            try:
                inventory['LanguageFamilyRoot'] = genus_to_family[inventory['LanguageFamilyGenus']]
            except KeyError:
                # If the language is unclassified, we'll have to skip it 😢
                continue

            inventory['Segments'] = []
            for segment in segments:
                if inventory['InventoryID'] == segment['InventoryID']:
                    inventory['Segments'].append(segment['Phoneme'])

            for key in ['Phonemes', 'Tones', 'Population', 'Trump', 'Country', 'Vowels']:
                del inventory[key]

            if inventory_consonants != len(inventory['Segments']):
                print('Invalid inv: {}'.format(inventory))
            else:
                inventories.append(inventory)

    else:
        return inventories


def get_features_dict(features_file, phonemes_file):
    with open(phonemes_file) as f:
        phonemes = [i for i in csv.DictReader(f, delimiter='\t')]

    phoneme_classes = {i['Phoneme']: i['CombinedClass'] for i in phonemes}
    
    with open(features_file) as f:
        features = [i for i in csv.DictReader(f, delimiter='\t')]

    features_d = {}

    # NOTE: if we need non-present features, disable deep copy
    for segment in features:
        # Only act on consonants
        if segment['syllabic'] == '+':
            continue
        else:
            segment_copy = {}
            for feature in segment:
                if segment[feature] == '0':
                    segment_copy[feature] = None
                elif segment[feature] == '+':
                    segment_copy[feature] = True
                elif segment[feature] == '-':
                    segment_copy[feature] = False
                elif segment[feature] == '+,-' or segment[feature] == '-,+':
                    segment_copy[feature] = True
                    segment_copy['complex'] = True
            
            try:
                segment_copy['class'] = phoneme_classes[segment['segment']]
            except KeyError:
                segment_copy['class'] = 'c-d-c'
            features_d[segment['segment']] = segment_copy
    
    else:
        return features_d

    
def get_genus_map(family_file, genus_file):
    # Initialise families and genera
    with open(family_file) as f:
        families = [i for i in csv.DictReader(f)]

    with open(genus_file) as f:
        genera = [i for i in csv.DictReader(f)]

    # Map a genus to a family, since the source data provides an ID instead of a pretty name
    families_map = {family['pk']: family['name'] for family in families}
    genus_to_family = {genus['name']: families_map[genus['family_pk']] for genus in genera}
    
    return genus_to_family
        
    
def is_voiced(segment):
    return features_d[segment]['periodicGlottalSource']


def is_plosive(segment):
    is_consonantal = features_d[segment]['consonantal'] is True
    is_not_sonorant = features_d[segment]['sonorant'] is False
    is_not_continuant = features_d[segment]['continuant'] is False
    is_not_strident = features_d[segment]['strident'] is None or features_d[segment]['strident'] is False
    is_not_delayed_release = features_d[segment]['delayedRelease'] is None or features_d[segment]['delayedRelease'] is False
    
    return is_consonantal and is_not_sonorant and is_not_continuant and is_not_strident and is_not_delayed_release


def is_affricate(segment):
    is_consonantal = features_d[segment]['consonantal'] is True
    is_not_sonorant = features_d[segment]['sonorant'] is False
    is_not_continuant = features_d[segment]['continuant'] is False
    is_strident = features_d[segment]['strident'] is True
    
    return is_consonantal and is_not_sonorant and is_not_continuant and is_strident


def is_fricative(segment):
    is_syllabic = features_d[segment]['syllabic'] is False
    is_not_sonorant = features_d[segment]['sonorant'] is False
    is_continuant = features_d[segment]['continuant'] is True
    
    return is_syllabic and is_not_sonorant and is_continuant


def is_nasal(segment):
    is_consonantal = features_d[segment]['consonantal'] is True
    is_nasal = features_d[segment]['nasal'] is True
    is_continuant = features_d[segment]['continuant'] is False
    
    return is_consonantal and is_nasal and is_continuant


def is_liquid(segment):
    is_consonantal = features_d[segment]['consonantal'] is True
    is_continuant = features_d[segment]['continuant'] is True
    is_sonorant = features_d[segment]['sonorant'] is True
    is_not_strident = features_d[segment]['strident'] is False or features_d[segment]['strident'] is None 
    has_lateral_feature = features_d[segment]['lateral'] is not None
    
    return is_consonantal and is_continuant and is_not_strident and is_sonorant and has_lateral_feature


def is_rothic(segment):
    is_consonantal = features_d[segment]['consonantal'] is True
    is_not_lateral = features_d[segment]['lateral'] is False
    
    return is_consonantal and is_liquid(segment) and is_not_lateral

def sanity_checks():
    plosives = 'p b t d ʈ ɖ c ɟ k ɡ q ɢ ʔ'.split()
    fricatives = 'ɸ β f v θ ð s z ʃ ʒ ʂ ʐ ç ʝ x ɣ χ ʁ ħ ʕ h ɦ'.split()
    laterals = 'l̪  l ɭ ʎ ʟ'.split()
    nasals = ' m ɱ n̪ n ɳ ɲ ŋ ɴ'.split()
    voiced = 'b d ɖ ɟ ɡ ɢ β v ð z ʒ ʐ ʝ ɣ ʁ ʕ'.split()
    voiceless = 'p t ʈ c k q ʔ ɸ f θ s ʃ ʂ ç x χ'.split()
    
    for plosive in plosives:
        assert is_plosive(plosive)
        assert not is_affricate(plosive)
        assert not is_nasal(plosive)
        assert not is_liquid(plosive)
    
    for fricative in fricatives:
        assert is_fricative(fricative)
        assert not is_affricate(fricative)
        assert not is_plosive(fricative)
        assert not is_nasal(fricative)
        assert not is_liquid(fricative)

    for lateral in laterals:
        assert is_liquid(lateral)
    
    for nasal in nasals:
        assert is_nasal(nasal)
        assert not is_fricative(nasal)
        assert not is_affricate(nasal)
        assert not is_plosive(nasal)
        assert not is_liquid(nasal)
    
    assert not is_plosive('t̠ʃ') and is_affricate('t̠ʃ')
    assert is_liquid('r')
    assert is_rothic('r')
    
    for c in voiced:
        assert is_voiced(c), c
    
    for c in voiceless:
        assert not is_voiced(c), c

In [5]:

features_d = get_features_dict('phoible-segments-features.tsv', 'phoible-phonemes.tsv')
genus_to_family = get_genus_map('family.csv', 'genus.csv')
inventories = get_inventories('phoible-aggregated.tsv', 'phoible-consonants.tsv')

# Make sure we won't make phonological errors 😉
sanity_checks()

Here we'll filter out the inventories that are possibly duplicates. The criterion will be multiple occurences of an identifier made of the language code and the language name concatenated in lower case with stripped spaces. The inventory we'll keep is the one with a PHOIBLE or SAPHON inventory preferably.

In [6]:

def get_canonical_name(inventory):
    code = inventory['LanguageCode']
    name = inventory['LanguageName']
    name = name.replace(' ', '')
    name = name.lower()
    return '{}{}'.format(code, name)


def filter_inventories(inventories):
    codes = [get_canonical_name(i) for i in inventories]
    dupes = [item for item, count in collections.Counter(codes).items() if count > 1]
    dupe_sources = {}

    for code in dupes:
        for inventory in inventories:
            inventory_id = get_canonical_name(inventory)
            if inventory_id == code:
                try:
                    dupe_sources[inventory_id].append(inventory['Source'])
                except KeyError:
                    dupe_sources[inventory_id] = [inventory['Source']]

    filtered_inventories = []
    for inventory in inventories:
        inventory_id = get_canonical_name(inventory)

        if inventory_id in dupes:
            source = inventory['Source']
            if source != 'PH' and 'PH' in dupe_sources[inventory_id]:
                continue
            elif source != 'SAPHON' and 'SAPHON' in dupe_sources[inventory_id]:
                continue
            elif source != 'UPSID' and 'UPSID' in dupe_sources[inventory_id]:
                continue
            else:
                filtered_inventories.append(inventory)
        else:
            filtered_inventories.append(inventory)

    return filtered_inventories


inventories = filter_inventories(inventories)[:160]
# aleatori: inventories = random.sample(inventories, 160)
inventories.sort(key=lambda k: k['LanguageFamilyRoot']+k['LanguageFamilyGenus'])

Família, genus, llengua i segments

In [7]:

html_table = ['<table>']

total_segments = 0
# Build the header
html_table.append('<tr>')
for header in ['Familia', 'Genus', 'Area', 'Llengua', '#', 'Segments']:
    html_table.append('<td><b>{}</b></td>'.format(header))
else:
    html_table.append('</tr>')

# Add the actual payload
for inventory in inventories:
    html_table.append('<tr>')
    for attribute in ['LanguageFamilyRoot', 'LanguageFamilyGenus', 'Area', 'LanguageName']: 
        html_table.append('<td>{}</td>'.format(inventory[attribute]))
    else:
        html_table.append('<td>{}</td>'.format(len(inventory['Segments'])))
        total_segments += len(inventory['Segments'])
        html_table.append('<td>')
        html_table.append('  '.join(sorted(inventory['Segments'])))
        html_table.append('</font></td>')
        html_table.append('</tr>')

else:
    html_table.append('</table>')

    
print('Número de llengües: {}'.format(len(inventories)))
print('Mitjana de fonemes per llengua {}'.format(total_segments / len(inventories)))
# Construct the table and display it
html_table = ''.join(html_table)
html_table = HTML(html_table)
display(html_table)

Out[7]:

Número de llengües: 160
Mitjana de fonemes per llengua 12.00625

In [8]:

inventories.sort(key=lambda k: k['LanguageName'])

Sistemes secundaris

In [9]:

sistemes_secundaris = [
'Awtu, Aspiració',
'Auyana, Allargament',
'Comanche, Labialització',
'Dadibi, Aspiració',
'Dani, Labialització',
'ekari, Alliberament lateral',
'fuzhou, Aspiració',
'gaviao do para, Aspiració',
'guajajara, Labialització',
'kayabi, Labialització',
'kitsijis, Aspiració',
'karajo, Aspiració',
'krinkati-tinbira, Aspiració',
'kuikura-kalapalo, Palatalització',
'northern paiute, Labialització',
'pompeian, Labialització',
'sanuma, Aspiració',
'shawnee, Llargada',
'shirihana, Aspiració',
'wantoat, Labialització',
'wariwari, Labialització / Glotalitzacio',
'yawa, Palatalització']

langs_with_aspiration = 0
langs_with_labialitzation = 0
langs_with_palatalitzation = 0
langs_with_glotalization = 0
langs_with_duration = 0
langs_with_lateralrelease = 0

html_table = ['<table>']
html_table.append('<tr><td>Llengua</td><td>Sistema secundari</td>')

for s in sistemes_secundaris:
    if 'Labialització' in s:
        langs_with_labialitzation += 1
    if 'Aspiració' in s:
        langs_with_aspiration += 1
    if 'Palatalitzaci' in s:
        langs_with_palatalitzation += 1
    if 'Llargada' in s:
        langs_with_duration += 1
    if 'Glotalitzacio' in s:
        langs_with_glotalization += 1
    if 'Alliberament' in s:
        langs_with_lateralrelease += 1

    html_table.append('<tr>')
    for attr in s.split(','):
        html_table.append('<td>{}</td>'.format(attr.title()))
    
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

print('Labialitzacio: {} {:.2%}'.format(langs_with_labialitzation, langs_with_labialitzation / len(inventories)))
print('Aspiracio: {} {:.2%}'.format(langs_with_aspiration, langs_with_aspiration / len(inventories)))
print('Palatalitzacio: {} {:.2%}'.format(langs_with_palatalitzation, langs_with_palatalitzation / len(inventories)))
print('Duracio: {} {:.2%}'.format(langs_with_duration, langs_with_duration / len(inventories)))
print('Glotalitzacio: {} {:.2%}'.format(langs_with_glotalization, langs_with_glotalization / len(inventories)))
print('Alliberament lateral: {} {:.2%}'.format(langs_with_lateralrelease, langs_with_lateralrelease / len(inventories)))

Out[9]:

Labialitzacio: 8 5.00%
Aspiracio: 9 5.62%
Palatalitzacio: 2 1.25%
Duracio: 1 0.62%
Glotalitzacio: 1 0.62%
Alliberament lateral: 1 0.62%

Presència de fonemes

In [10]:

all_consonants = []
for inventory in inventories:
    for consonant in inventory['Segments']:
        all_consonants.append(consonant)

all_consonants = sorted(all_consonants)
all_consonants_count = []
counted = []
for consonant in all_consonants:
    if consonant not in counted:
        counted.append(consonant)
        count = all_consonants.count(consonant)
        all_consonants_count.append([consonant, count,
                                     '{:.2%}'.format(count / len(inventories))])

all_consonants_count.sort(key=lambda f: f[1], reverse=True)
all_consonants_uniq = [c[0] for c in all_consonants_count]

html_table = ['<table>']
html_table.append('<tr><td>Fonema</td><td>Ocurrències</td><td>Percentatge</td></tr>')

for consonant in all_consonants_count:
    html_table.append('<tr>')
    for attribute in consonant:
        html_table.append('<td>{}</td>'.format(attribute))
    
    html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

Out[10]:

In [11]:

print(len(all_consonants_uniq))

Out[11]:

109

hem de mirar obstruents, quantes d'elles són sordes i sonores
de les obstruents quantes oclusives i sordes i sonores
presència d'africades i les seves qualitats
nasals

Obstruents

In [12]:

langs_with_obstruents = []
for inventory in inventories:
    obstruents_voiced = []
    obstruents_voiceless = []

    for segment in inventory['Segments']:
        if features_d[segment]['sonorant'] is False:
            if is_voiced(segment):
                obstruents_voiced.append(segment)
            else:
                obstruents_voiceless.append(segment)

    else:
        langs_with_obstruents.append({inventory['LanguageName']: [obstruents_voiced, obstruents_voiceless]})

html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Llengua</td><td>obstruents sonores</td><td>obstruents sordes</td><td>total (sonores + sordes)</td>')
for lang in langs_with_obstruents:
    for inv in lang:
        voiced_inv = lang[inv][0]
        voiceless_inv = lang[inv][1]
        joined_inv = list(voiced_inv + voiceless_inv)
        
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        html_table.append('<td>{}</td>'.format(' '.join(voiced_inv)))
        html_table.append('<td>{}</td>'.format(' '.join(voiceless_inv)))
        html_table.append('<td>{}</td>'.format("{} ({} + {})".format(len(joined_inv),
                                                                     len(voiced_inv),
                                                                     len(voiceless_inv))))

        #for item in sorted(lang[inv]):
        #    html_table.append('<td>{}</td>'.format(item))
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

Out[12]:

Oclusives

In [13]:

langs_with_plosives = []
more_voiceless_plosives = 0
for inventory in inventories:
    plosives_voiced = []
    plosives_voiceless = []

    for segment in inventory['Segments']:
        if is_plosive(segment):
            if is_voiced(segment):
                plosives_voiced.append(segment)
            else:
                plosives_voiceless.append(segment)

    else:
        langs_with_plosives.append({inventory['LanguageName']: [plosives_voiced, plosives_voiceless]})

html_table = ['<table>']
html_table.append('<tr><td>Llengua</td><td>plosives sonores</td><td>plosives sordes</td><td>total (sonores + sordes)</td>')
for lang in langs_with_plosives:
    for inv in lang:
        voiced_inv = lang[inv][0]
        voiceless_inv = lang[inv][1]
        joined_inv = list(voiced_inv + voiceless_inv)
        
        if len(voiceless_inv) > len(voiced_inv):
            more_voiceless_plosives += 1
        
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        html_table.append('<td>{}</td>'.format(' '.join(voiced_inv)))
        html_table.append('<td>{}</td>'.format(' '.join(voiceless_inv)))
        html_table.append('<td>{}</td>'.format("{} ({} + {})".format(len(joined_inv),
                                                                     len(voiced_inv),
                                                                     len(voiceless_inv))))

        #for item in sorted(lang[inv]):
        #    html_table.append('<td>{}</td>'.format(item))
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

print('El nombre d’oclusives sordes és normalment major o igual al d’oclusives sonores: {:.2%} ({} / {})'.format(more_voiceless_plosives / len(langs_with_plosives),
                                           more_voiceless_plosives, len(langs_with_plosives)))

Out[13]:

El nombre d’oclusives sordes és normalment major o igual al d’oclusives sonores: 77.50% (124 / 160)

Fricatives

In [14]:

langs_with_fricatives = []
for inventory in inventories:
    fricatives_voiced = []
    fricatives_voiceless = []

    for segment in inventory['Segments']:
        if is_fricative(segment):
            if is_voiced(segment):
                fricatives_voiced.append(segment)
            else:
                fricatives_voiceless.append(segment)

    else:
        langs_with_fricatives.append({inventory['LanguageName']: [fricatives_voiced, fricatives_voiceless]})

html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Llengua</td><td>fricatives sonores</td><td>fricatives sordes</td><td>total (sonores + sordes)</td>')
for lang in langs_with_fricatives:
    for inv in lang:
        voiced_inv = lang[inv][0]
        voiceless_inv = lang[inv][1]
        joined_inv = list(voiced_inv + voiceless_inv)
        
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        html_table.append('<td>{}</td>'.format(' '.join(voiced_inv)))
        html_table.append('<td>{}</td>'.format(' '.join(voiceless_inv)))
        html_table.append('<td>{}</td>'.format("{} ({} + {})".format(len(joined_inv),
                                                                     len(voiced_inv),
                                                                     len(voiceless_inv))))

        #for item in sorted(lang[inv]):
        #    html_table.append('<td>{}</td>'.format(item))
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

Out[14]:

Nasals

In [15]:

langs_with_nasals = []
for inventory in inventories:
    nasals = []
    for segment in inventory['Segments']:
        try:
            if is_nasal(segment):
                nasals.append(segment)
        except KeyError:
            pass
    else:
        langs_with_nasals.append({inventory['LanguageName']: sorted(nasals)})

html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Llengua</td><td>no. de nasals</td><td colspan=0>Nasals</td></tr>')
for lang in langs_with_nasals:
    for inv in sorted(lang):
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        html_table.append('<td>{}</td>'.format(len(lang[inv])))
        for item in sorted(lang[inv]):
            html_table.append('<td>{}</td>'.format(item))
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

Out[15]:

Líquides

Codi per comprovar els universals

Gairebé totes les llengües tenen almenys una líquida.
Les lengües amb dues o més líquides generalment tenen una lateral i el contrast lateral/no lateral.

In [16]:

langs_with_liquides = []
at_least_one_liquid = 0
at_least_two_liquid = 0
has_laterality_contrast = 0
for inventory in inventories:
    liquides = []
    laterality = []
    for segment in inventory['Segments']:
        try:
            if is_liquid(segment):
                laterality.append(features_d[segment]['lateral'])
                liquides.append(segment)
        except KeyError:
            pass
        
    else:
        if len(liquides) >= 1:
            at_least_one_liquid += 1
        
        if len(liquides) >= 2:
            at_least_two_liquid += 1
            if laterality.count(True) > 0 and laterality.count(False) > 0:
                has_laterality_contrast += 1
            
        langs_with_liquides.append({inventory['LanguageName']: sorted(liquides)})

html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Llengua</td><td>Numero de liquides</td><td colspan=0>Liquides</td></tr>')
for lang in langs_with_liquides:
    for inv in sorted(lang):
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        html_table.append('<td>{}</td>'.format(len(lang[inv])))
        for item in sorted(lang[inv]):
            html_table.append('<td>{}</td>'.format(item))
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

print("Tenen almenys, una líquida: {:2%} ({} / {})".format(at_least_one_liquid / len(inventories),
                                                         at_least_one_liquid,
                                                         len(inventories)))

print('{} {} {:.2%} ({} / {})'.format('Les lengües amb dues o més líquides generalment',
                                      'tenen una lateral i lateralitat contrastiva',
                                       has_laterality_contrast / at_least_two_liquid,
                                       has_laterality_contrast, at_least_two_liquid))

Out[16]:

Tenen almenys, una líquida: 83.750000% (134 / 160)
Les lengües amb dues o més líquides generalment tenen una lateral i lateralitat contrastiva 94.74% (36 / 38)

Comprovació dels universals de jerarquies

Totes les consonants

In [17]:

langs_hierarchy = []
for inventory in inventories:
    subsystems = []

    dentoalveolars = [segment for segment in inventory['Segments'] if features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
    subsystems.append(dentoalveolars)

    labials = [segment for segment in inventory['Segments'] if features_d[segment]['labial']]
    subsystems.append(labials)

    velars = [segment for segment in inventory['Segments'] if features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant']]
    subsystems.append(velars)

    palatals = [segment for segment in inventory['Segments'] if features_d[segment]['coronal'] is True and features_d[segment]['dorsal'] is True]
    subsystems.append(palatals)
    
    langs_hierarchy.append({inventory['LanguageName']: subsystems})


html_table = ['<table>']

# Build header
html_table.append('<tr>')
for header in ['Llengua', 'Alveolars', 'Labials', 'Velars',
               'Palatals', 'Jerarquia (a &gt; l &gt; v  &gt; p)']:
    
    html_table.append('<td>{}</td>'.format(header))
    
else:
    html_table.append('<tr>')

# Add contents
for lang in langs_hierarchy:
    for inv in lang:
        if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and  len(lang[inv][3]) == 0:
            continue

        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        
        for item in lang[inv]:
            html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
        
        follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
        html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))

    else:
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

Out[17]:

Oclusives

Jerarquia de les oclusives

In [18]:

langs_plosive_hierarchy = []
for inventory in inventories:
    subsystems = []

    dentoalveolars = [segment for segment in inventory['Segments'] if is_plosive(segment) and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
    subsystems.append(dentoalveolars)

    labials = [segment for segment in inventory['Segments'] if is_plosive(segment) and features_d[segment]['labial']]
    subsystems.append(labials)

    velars = [segment for segment in inventory['Segments'] if is_plosive(segment) and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant']]
    subsystems.append(velars)

    palatals = [segment for segment in inventory['Segments'] if is_plosive(segment) and features_d[segment]['coronal'] is True and features_d[segment]['dorsal'] is True]
    subsystems.append(palatals)
    
    langs_plosive_hierarchy.append({inventory['LanguageName']: subsystems})


html_table = ['<table>']

# Build header
html_table.append('<tr>')
for header in ['Llengua', 'Oclusives alveolars', 'Oclusives labials', 'Oclusives Velars',
               'Oclusives Palatals', 'Jerarquia oclusives (a &gt; l &gt; v  &gt; p)']:
    
    html_table.append('<td>{}</td>'.format(header))
    
else:
    html_table.append('<tr>')

# Add contents
followers = 0

for lang in langs_plosive_hierarchy:
    for inv in lang:
        if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and  len(lang[inv][3]) == 0:
            continue
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        
        for item in lang[inv]:
            html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
        
        follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
        if follows_hierarchy:
            followers +=1 
            
        html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))

    else:
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))
print('Followers: {:.2%} ({} / {})'.format(followers / len(inventories), followers, len(inventories)))

Out[18]:

Followers: 63.75% (102 / 160)

Jerarquia de les africades

In [19]:

langs_affricate_hierarchy = []
for inventory in inventories:
    subsystems = []
    
    palatals = [segment for segment in inventory['Segments'] if is_affricate(segment) and not features_d[segment]['anterior'] and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
    subsystems.append(palatals)

    dentoalveolars = [segment for segment in inventory['Segments'] if is_affricate(segment) and features_d[segment]['anterior'] and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
    subsystems.append(dentoalveolars)

    labials = [segment for segment in inventory['Segments'] if is_affricate(segment) and features_d[segment]['labial']]
    subsystems.append(labials)

    velars = [segment for segment in inventory['Segments'] if is_affricate(segment) and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant']]
    subsystems.append(velars)
    
    langs_affricate_hierarchy.append({inventory['LanguageName']: subsystems})


html_table = ['<table>']

# Build header
html_table.append('<tr>')
for header in ['Llengua', 'Africades palatals', 'Africades dentoalveolars', 'Africades Velars',
               'Africades labials', 'Africades (p &gt; d &gt; v  &gt; l)']:
    
    html_table.append('<td>{}</td>'.format(header))
    
else:
    html_table.append('<tr>')

# Add contents
followers = 0
candidates = 0
for lang in langs_affricate_hierarchy:
    for inv in lang:
        if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and  len(lang[inv][3]) == 0:
            continue
        else:
            candidates += 1
            
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        
        for item in lang[inv]:
            html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
            
        follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
        if follows_hierarchy:
            followers +=1
            
        html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))

    else:
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))
print('Followers: {:.2%} ({} / {})'.format(followers / candidates, followers, candidates))

Out[19]:

Followers: 77.05% (47 / 61)

Jerarquia de les fricatives

In [20]:

langs_fricative_hierarchy = []
for inventory in inventories:
    subsystems = []

    dentoalveolars = [segment for segment in inventory['Segments'] if is_fricative(segment) and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
    subsystems.append(dentoalveolars)

    labials = [segment for segment in inventory['Segments'] if is_fricative(segment) and features_d[segment]['labial']]
    subsystems.append(labials)
    
    palatals = [segment for segment in inventory['Segments'] if is_fricative(segment) and features_d[segment]['coronal'] is True and features_d[segment]['dorsal'] is True]
    subsystems.append(palatals)

    velars = [segment for segment in inventory['Segments'] if is_fricative(segment) and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant']]
    subsystems.append(velars)
    
    langs_fricative_hierarchy.append({inventory['LanguageName']: subsystems})


html_table = ['<table>']

# Build header
html_table.append('<tr>')
for header in ['Llengua', 'fricatives alveolars', 'fricatives labials', 'fricatives palatals',
               'fricatives velars', 'Jerarquia fricatives (a &gt; l &gt; p  &gt; v)']:
    
    html_table.append('<td>{}</td>'.format(header))
    
else:
    html_table.append('<tr>')

# Add contents
followers = 0
for lang in langs_fricative_hierarchy:
    for inv in lang:
        if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and  len(lang[inv][3]) == 0:
            continue
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        
        for item in lang[inv]:
            html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
        
        follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
        
        if follows_hierarchy:
            followers += 1
            
        html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))

    else:
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))
print('Followers: {:.2%} ({} / {})'.format(followers / len(inventories), followers, len(inventories)))

Out[20]:

Followers: 68.75% (110 / 160)

Jerarquia de les nasals

In [21]:

# • Nasals:  Dento-alveolar > Labial > Velar > Palatal > Retroflexa.
langs_nasal_hierarchy = []
for inventory in inventories:
    subsystems = []

    dentoalveolars = [segment for segment in inventory['Segments'] if is_nasal(segment) and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
    subsystems.append(dentoalveolars)

    labials = [segment for segment in inventory['Segments'] if is_nasal(segment) and features_d[segment]['labial']]
    subsystems.append(labials)
    
    velars = [segment for segment in inventory['Segments'] if is_nasal(segment) and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant'] and features_d[segment]['coronal'] is False ]
    subsystems.append(velars)
    
    palatals = [segment for segment in inventory['Segments'] if is_nasal(segment) and features_d[segment]['coronal'] is True and features_d[segment]['coronal'] is True and features_d[segment]['dorsal'] is True]
    subsystems.append(palatals)
    
    langs_nasal_hierarchy.append({inventory['LanguageName']: subsystems})


html_table = ['<table>']

# Build header
html_table.append('<tr>')
for header in ['Llengua', 'nasals alveolars', 'nasals labials', 'nasals velars',
               'nasals palatals', 'Jerarquia nasals (a &gt; l &gt; p  &gt; v)']:
    
    html_table.append('<td>{}</td>'.format(header))
    
else:
    html_table.append('<tr>')

candidates = 0
followers = 0
# Add contents
for lang in langs_nasal_hierarchy:
    candidates += 1
    
    for inv in lang:
        if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and  len(lang[inv][3]) == 0:
            continue
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        
        for item in lang[inv]:
            html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
        
        follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
        html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))
        

    else:
        if follows_hierarchy:
            followers += 1
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))
print('Followers: {:.2%} ({} / {})'.format(followers / candidates, followers, candidates))

Out[21]:

Followers: 95.62% (153 / 160)

Comprovació de més universals

Presència o absència de /p t k/

In [22]:

ptk_in_language = 0
langs_with_ptk = []
langs_without_ptk = []
for inventory in inventories:
    inventory_segments = inventory['Segments']
    if 'p' in inventory_segments and 't' in inventory_segments and 'k' in inventory_segments:
        ptk_in_language += 1
        langs_with_ptk.append(inventory['LanguageName'])
    else:
        langs_without_ptk.append(inventory['LanguageName'])

print('Llengües amb /p t k/: {:.2%} ({} de {})'.format(ptk_in_language / len(inventories),
                                                       ptk_in_language,
                                                       len(inventories)))

Out[22]:

Llengües amb /p t k/: 58.13% (93 de 160)

In [23]:

africades = []
for inventory in inventories:
    for segment in inventory['Segments']:
        try:
            if features_d[segment]['continuant'] is False \
            and features_d[segment]['sonorant'] is False \
            and features_d[segment]['continuant'] is False \
            and (features_d[segment]['strident'] is True or features_d[segment]['strident']):
            
                africades.append(segment)
        except KeyError:
            pass

africades = collections.Counter(africades)

html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Fonema</td><td>Ocurrències</td>')
for k, v in africades.items():
    for inv in lang:
        html_table.append('<tr>')
        html_table.append('<td>{}</td><td>{}</td>'.format(k, v))

else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

Out[23]:

Comprovació de l'universal "Si només té una africada, generalment és /t̠ʃ/"

In [24]:

followers = 0
possible_followers = 0
for inventory in inventories:
    affricates = [s for s in inventory['Segments'] if is_affricate(s)]
    if len(affricates) == 1:
        possible_followers += 1
        if affricates[0] == 't̠ʃ':
            followers += 1

else:
    print('Segueixen l\'universal {:.2%} ({} de {})'.format(followers / possible_followers,
                                                            followers,
                                                            possible_followers))

Out[24]:

Segueixen l'universal 66.67% (32 de 48)

Comprovació de l'universal "El nombre d’africades és menor que el d’oclusives simples."

In [25]:

followers = 0
possible_followers = 0
for inventory in inventories:
    affricates = [s for s in inventory['Segments'] if is_affricate(s)]
    plosives = [s for s in inventory['Segments'] if is_plosive(s)]
    possible_followers += 1
    
    if len(affricates) < len(plosives):
        followers += 1

else:
    print('Segueixen l\'universal {:.2%} ({} de {})'.format(followers / possible_followers,
                                                            followers,
                                                            possible_followers))

Out[25]:

Segueixen l'universal 100.00% (160 de 160)

Comprovació de l'universal "Si una llengua té una sola fricativa, generalment és / s / i si no és aquesta sol ser /f/."

In [26]:

followers_1st_part = 0
followers_2nd_part = 0
candidates = 0

for inventory in inventories:
    fricatives = [s for s in inventory['Segments'] if is_fricative(s)]
    
    if len(fricatives) == 1:
        candidates += 1
        if fricatives[0] == 's':
            followers_1st_part += 1
        elif fricatives[0] == 'f':
            followers_2nd_part += 1

else:
    print('Segueixen l\'universal primera part {:.2%} ({} de {})'.format(followers_1st_part / candidates,
                                                            followers_1st_part,
                                                            candidates))
    print('Segueixen l\'universal primera part {:.2%} ({} de {})'.format(followers_2nd_part / candidates,
                                                        followers_2nd_part,
                                                        candidates))
    print('Segueixen l\'universal (total) {:.2%} ({} de {})'.format((followers_1st_part + followers_2nd_part) / candidates,
                                                        (followers_1st_part + followers_2nd_part),
                                                        candidates))

Out[26]:

Segueixen l'universal primera part 34.38% (11 de 32)
Segueixen l'universal primera part 0.00% (0 de 32)
Segueixen l'universal (total) 34.38% (11 de 32)

Comprovació de l'universal "El nombre de fricatives sordes generalment és més gran que el de sonores…"

In [27]:

followers = 0
non_followers = []
candidates = len(langs_with_fricatives)
for lang in langs_with_fricatives:
    for _, v in lang.items():
        if len(v[0]) > 0 or len(v[1]) > 0:
            if len(v[1]) > len(v[0]):
                followers += 1
            else:
                non_followers.append(lang)

else:
    print('Segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates,
                                                        followers,
                                                        candidates))
    print("No el segueixen:",  non_followers)

Out[27]:

Segueixen l'universal 73.12% (117 de 160)
No el segueixen: [{'Ache': [['β'], []]}, {'Akawaio': [['z̪'], ['s̪']]}, {'Ao': [['ɭ͓', 'z'], ['s']]}, {'Apinaye': [['ʒ', 'v'], ['s̪']]}, {'Au': [['ɣ'], ['s']]}, {'Baining': [['ɣ'], ['s']]}, {'Baruga': [['β', 'ɣ'], ['s', 'ɸ']]}, {'Binandere': [['β'], []]}, {'Cubeo': [['β', 'ð'], ['x']]}, {'Gadsup': [['β'], []]}, {'Guajajara': [['z'], ['h']]}, {'Jabutí': [['β', 'bz'], ['ps', 'h']]}, {'Kikamba': [['β', 'ð'], ['s']]}, {'Leti': [['β'], ['s']]}, {'Manam': [['z'], ['s']]}, {'Mixe': [['ʒ', 'v'], ['ʃ', 's̪']]}, {'Nankina': [['β'], []]}, {'Oro Win': [['β'], ['s']]}, {'Rotokas': [['β'], []]}, {'Suyá': [['ɣ'], ['s']]}, {'Tigak': [['ɮ', 'β'], ['s']]}, {'Tiwi': [['ɣ'], []]}, {'Vanimo': [['β', 'ɦ'], ['s']]}]

Comprovació de l'universal "El nombre de fricatives improbablement és major que el d’oclusives."

In [28]:

candidates = 0
followers = []
for inventory in inventories:
    fricatives = [s for s in inventory['Segments'] if is_fricative(s)]
    plosives = [s for s in inventory['Segments'] if is_plosive(s)]
    
    if len(fricatives) > 0 or len(plosives) > 0:
        candidates += 1
        if len(fricatives) > len(plosives):
            followers.append(inventory['LanguageName'])
else:
    print('Segueixen l\'universal {:.2%} ({} de {})'.format(len(followers) / candidates,
                                                        len(followers),
                                                        candidates))
    print('Llengües amb més fricatives que oclusives: {}'.format(', '.join(followers)))

Out[28]:

Segueixen l'universal 5.62% (9 de 160)
Llengües amb més fricatives que oclusives: Awa Pit, Crow, Huron, Irarutu, Jabutí, Kikamba, North Marquesan, Shanenawa, Waiwai

Comprovació dels universals sobre nasals

Preferència per l’articulació dento-alveolar.
Preferència per la sonoritat.
Gairebé totes les llengües tenen almenys una nasal, generalment / n / Si hi ha una segona nasal generalment és /m / però també pot ser / ŋ /
La presència de les nasals complexes implica la de les simples i la presència de nasals sordes implica la de les sonores.

Comprovació de 1

In [29]:

candidates = 0
followers = []
at_least_one_dentoalveolar = 0
not_at_least_one_dentoalveolar = []

for inventory in inventories:
    nasals = [s for s in inventory['Segments'] if is_nasal(s)]
    if len(nasals) == 0:
        continue
    
    candidates += 1
    dentoalveolar = [s for s in nasals if features_d[s]['coronal'] and not features_d[s]['dorsal']]
    not_dentoalveolar = [s for s in nasals if not (features_d[s]['coronal'] and not features_d[s]['dorsal'])]

    """
    print(inventory['LanguageName'])
    print("\tnasals:", nasals)
    print("\tdento:", dentoalveolar)
    print("\tno dento:", not_dentoalveolar)
    """
    
    if len(dentoalveolar) > 0:
        at_least_one_dentoalveolar += 1
    else:
        not_at_least_one_dentoalveolar.append(inventory)
    
    if len(dentoalveolar) >= len(not_dentoalveolar):
        followers.append(inventory['LanguageName'])


print('Tenen, almenys, una nasal dentoalveolar: {:.2%} ({} de {})'.format(at_least_one_dentoalveolar / candidates,
                                                       at_least_one_dentoalveolar,
                                                       candidates))

print('Tenen tantes o més dentoalveolars que no dentoalveolars {:.2%} ({} de {})'.format(len(followers) / candidates,
                                                                                         len(followers),
                                                                                        candidates))

Out[29]:

Tenen, almenys, una nasal dentoalveolar: 97.99% (146 de 149)
Tenen tantes o més dentoalveolars que no dentoalveolars 55.70% (83 de 149)

Comprovació de 2

In [30]:

candidates = 0
followers = []
not_followers = []

for inventory in inventories:
    nasals = [s for s in inventory['Segments'] if is_nasal(s)]
    if len(nasals) == 0:
        continue
    
    candidates += 1
    voiced = [s for s in nasals if features_d[s]['periodicGlottalSource']]
    voiceless = [s for s in nasals if not features_d[s]['periodicGlottalSource']]

    if len(voiced) > len(voiceless):
        followers.append(inventory['LanguageName'])
    else:
        not_followers.append(inventory['LanguageName'])


print('Tenen més nasals sonores que sordes: {:.2%} ({} de {})'.format(len(followers) / candidates, len(followers), candidates))

Out[30]:

Tenen més nasals sonores que sordes: 100.00% (149 de 149)

In [31]:

candidates = 0
followers_1 = []
followers_2 = []
followers_3 = []
non_followers = []

for inventory in inventories:
    nasals = [s for s in inventory['Segments'] if is_nasal(s)]
    if len(nasals) == 0:
        continue
    
    candidates += 1
    
    if len(nasals) >= 1 and 'n' in nasals:
        followers_1.append(inventory)
    else:
        non_followers.append(inventory)

    if len(nasals) >= 2 and 'n' in nasals and 'ŋ' in nasals:
        followers_2.append(inventory)
        
    elif len(nasals) >= 2 and 'n' in nasals and 'm' in nasals:
        followers_3.append(inventory)
        
    else:
        non_followers.append(inventory)
    
print('Tenen almenys una nasal, /n/. {:.2%} ({} de {})'.format(len(followers_1) / candidates, len(followers_1), candidates))
print('Tenen almenys dues nasals: n i ŋ. {:.2%} ({} de {})'.format(len(followers_2) / candidates, len(followers_2), candidates))
print('Tenen almenys dues nasals: n i m. {:.2%} ({} de {})'.format(len(followers_3) / candidates, len(followers_3), candidates))

Out[31]:

Tenen almenys una nasal, /n/. 78.52% (117 de 149)
Tenen almenys dues nasals: n i ŋ. 29.53% (44 de 149)
Tenen almenys dues nasals: n i m. 46.98% (70 de 149)

Ultra-mega-taula

ZOMG

In [32]:

html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>')

for consonant in all_consonants_uniq:
    html_table.append('<td>{}</td>'.format(consonant))

for inventory in inventories:
    html_table.append('<tr>')
    html_table.append('<td>{}</td>'.format(inventory['LanguageName']))

    count = 0
    for consonant in all_consonants_uniq:
        if consonant in inventory['Segments']:
            html_table.append('<td>X</td>')
            count += 1
        else:
            html_table.append('<td> </td>')
    else:
        html_table.append('<td>{}</td>'.format(count))
        html_table.append('</tr>')
        assert count == len(inventory['Segments'])

html_table.append('</table></font>')  
display(HTML(''.join(html_table)))

Out[32]:

In [33]:

family_tree = []
for inventory in inventories:
    family = inventory['LanguageFamilyRoot']
    genus = inventory['LanguageFamilyGenus']
    name = inventory['LanguageName']
    
    print('{}\t{}\t{}'.format(family, genus, name))

Out[33]:

Sepik	Upper Sepik	Abau
Tupian	Tupi-Guaraní	Ache
Jivaroan	Jivaroan	Aguaruna
Ainu	Ainu	Ainu
Cariban	Cariban	Akawaio
Cariban	Cariban	Akurio
Muskogean	Muskogean	Alabama
Panoan	Panoan	Amahuaca
Border	Border	Amanab
Trans-New Guinea	Angan	Angaatiha
Sino-Tibetan	Kuki-Chin	Ao
Cariban	Cariban	Apalaí
Macro-Ge	Ge-Kaingang	Apinaye
Zaparoan	Zaparoan	Arabela
Cariban	Cariban	Arára, Pará
Jabutí	Jabutí	Arikapú
Trans-New Guinea	Asmat-Kamoro	Asmat
Torricelli	Wapei-Palei	Au
Waorani	Waorani	Auca
Trans-New Guinea	Eastern Highlands	Auyana
Barbacoan	Barbacoan	Awa Pit
Sepik	Ram	Awtuw
Baining-Taulil	Baining	Baining
Australian	Pama-Nyungan	Bandjalang
Tucanoan	Tucanoan	Barasano
Austronesian	Oceanic	Bariai
Chibchan	Motilon	Barí
Trans-New Guinea	Binanderean	Baruga
Chiquito	Chiquito	Bésɨro
Austronesian	South Halmahera - West New Guinea	Biak
Niger-Congo	Gur	Biali
Siouan	Siouan	Biloxi
Trans-New Guinea	Binanderean	Binandere
Trans-New Guinea	Eastern Highlands	Binumarien
Algic	Algonquian	Blackfoot
Macro-Ge	Bororo	Bororo
Cacua-Nukak	Cacua-Nukak	Cacua
Macro-Ge	Ge-Kaingang	Canela
Cariban	Cariban	Carijona
Iroquoian	Southern Iroquoian	Cherokee
Trans-New Guinea	Chimbu	Chuave
Uto-Aztecan	Numic	Comanche
Siouan	Siouan	Crow
Tucanoan	Tucanoan	Cubeo
Teberan-Pawaian	Teberan	Dadibi
Trans-New Guinea	Dani	Dani
Senagi	Senagi	Dera
Australian	Pama-Nyungan	Dyirbal
Niger-Congo	Cross River	Efik
Trans-New Guinea	Wissel Lakes-Kemandoga	Ekari
Eastern Sudanic	Nilotic	Endo
Trans-New Guinea	Ok	Faiwol
Trans-New Guinea	Fasu	Fasu
Sino-Tibetan	Chinese	Fuzhou
Trans-New Guinea	Eastern Highlands	Gadsup
Macro-Ge	Ge-Kaingang	Gavião do Pará
Trans-New Guinea	Chimbu	Golin
Tupian	Tupi-Guaraní	Guajajara
Australian	Pama-Nyungan	Gugu-Yalandyi
Austronesian	Oceanic	Hawaiian
Iroquoian	Northern Iroquoian	Huron
Cariban	Cariban	Ikpeng
Border	Border	Imonda
Cariban	Cariban	Ingarikó
Austronesian	South Halmahera - West New Guinea	Irarutu
Skou	Krisa	Isaka
Sepik	Upper Sepik	Iwam
Jabutí	Jabutí	Jabutí
Arauan	Arauan	Jamamadí
Tupian	Tupi-Guaraní	Kaiabi
Macro-Ge	Karajá	Karajá
Tupian	Arikem	Karitiana; Karitiâna
Katukinan	Katukinan	Katukína
Cariban	Cariban	Kaxuiâna
Niger-Congo	Bantoid	Kikamba
Eastern Sudanic	Nilotic	Kipsigis
Niger-Congo	Kru	Klao
Trans-New Guinea	Koiarian	Koiari
Tupian	Tupi-Guaraní	Kokama-Kokamilla
Macro-Ge	Ge-Kaingang	Krahô
Macro-Ge	Ge-Kaingang	Krinkati-Timbira
Cariban	Cariban	Kuikúro-Kalapálo
Kuot	Kuot	Kuot
Austronesian	Central Malayo-Polynesian	Leti
Cariban	Cariban	Macushi
Australian	Northern Daly	Malakmalak
Austronesian	Oceanic	Manam
Austronesian	Oceanic	Maori
Cariban	Cariban	Mapoyo
Macro-Ge	Maxakalí	Maxakali
Trans-New Guinea	Ok	Mianmin
Mixe-Zoque	Mixe-Zoque	Mixe
Sepik	Yellow River	Namia
Eastern Sudanic	Nilotic	Nandi
Trans-New Guinea	Finisterre-Huon	Nankina
East Bougainville	East Bougainville	Nasioi
Niger-Congo	Gur	Nateni
Trans-New Guinea	Madang	Ngomba
Nimboran	Nimboran	Nimboran
Torricelli	Wapei-Palei	Ningil
Austronesian	Oceanic	North Marquesan
Uto-Aztecan	Numic	Northern Paiute
Cacua-Nukak	Cacua-Nukak	Nukak
Niger-Congo	Bantoid	Nɔmaa (NɔmaáNdɛ́)
Iroquoian	Northern Iroquoian	Oneida
Chapacura-Wanham	Chapacura-Wanham	Oro Win
Trans-New Guinea	Binanderean	Orokaiva
Austronesian	Palauan	Palauan
Macro-Ge	Ge-Kaingang	Panará
Cariban	Cariban	Pémono
Mura	Mura	Piraha
Austronesian	Oceanic	Pohnpeian
Puinave	Puinave	Puinave
Macro-Ge	Rikbaktsa	Rikbaktsa
Austronesian	Oceanic	Roro
West Bougainville	West Bougainville	Rotokas
Nambikuaran	Nambikuaran	Sabane
Eastern Sudanic	Nilotic	Sabaot
East Strickland	East Strickland	Samo
Yanomam	Yanomam	Sanuma; Sanumá
Eastern Sudanic	Nilotic	Sebei
Austronesian	Oceanic	Seimat
Austronesian	Central Malayo-Polynesian	Selaru
Iroquoian	Northern Iroquoian	Seneca
Sentani	Sentani	Sentani
Panoan	Panoan	Shanenawa
Cahuapanan	Cahuapanan	Shawi
Algic	Algonquian	Shawnee
Yanomam	Yanomam	Shiriana
Jivaroan	Jivaroan	Shuar
Skou	Western Skou	Skou
Kiwaian	Kiwaian	Southern Kiwai
Austronesian	Central Malayo-Polynesian	Southern Nuautl
Trans-New Guinea	Binanderean	Suena
Macro-Ge	Ge-Kaingang	Suyá
Eleman	Eleman Proper	Taoripi
Taushiro	Taushiro	Taushiro
Austronesian	Central Malayo-Polynesian	Tetun
Austronesian	Oceanic	Tigak
Austronesian	Oceanic	Tinputz
Australian	Tiwian	Tiwi
Austronesian	Oceanic	Tongan; Tonga
Cariban	Cariban	Trió
Trans-New Guinea	Madang	Usan
Skou	Western Skou	Vanimo
Niger-Congo	Gur	Waama
Cariban	Cariban	Waiwai
Trans-New Guinea	Finisterre-Huon	Wantoat
Waorani	Waorani	Waorani
Chapacura-Wanham	Chapacura-Wanham	Wari; Wari'; Wariʔ; Oro Nao
Uto-Aztecan	Tarahumaran	Warihio
Border	Border	Waris
Australian	Pama-Nyungan	Wik-Munkan
Cariban	Cariban	Yabarana
Trans-New Guinea	Eastern Highlands	Yagaria
Yareban	Yareban	Yareba
Yawa	Yawa	Yawa
Cariban	Cariban	Yekwana
Australian	Pama-Nyungan	Yidiny
Torricelli	Wapei-Palei	Yil

In [34]:

print('{"type": "FeatureCollection", "features": [')

for inventory in inventories:
    lon, lat = inventory['Longitude'], inventory['Latitude']
    if lon == 'NULL':
        continue
    else:
        lon = lon.replace(':', '.')
        lat = lat.replace(':', '.')

        print('\t{"type": "Feature", "geometry": {"type": "Point",', end='')
        print('"coordinates": ', end='')
        coords = '[{:.3f}, {:.3f}]'.format(float(lon), float(lat))
        print(coords, end='')
        print('}, "properties": {}},')

print('], "properties": {}}')

Out[34]:

{"type": "FeatureCollection", "features": [
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.100, -4.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-55.150, -25.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-78.000, -5.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [143.000, 43.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-61.250, 6.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-55.400, 3.120]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-94.350, 30.450]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-73.000, -10.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.100, -3.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [146.200, -7.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [94.400, 26.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-54.450, 1.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-47.360, -6.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-75.000, -2.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-53.000, -3.370]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-62.480, -12.250]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [139.100, -5.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [142.050, -3.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-76.250, -1.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [145.450, -6.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-78.050, 1.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.550, -3.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [151.450, -4.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [153.000, -28.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-70.200, 0.250]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [148.450, -5.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-73.100, 8.500]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-60.000, -18.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [136.000, -1.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [1.100, 11.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-88.400, 30.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [147.550, -8.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [146.050, -6.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-113.300, 50.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-56.000, -16.450]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-69.550, 1.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-45.100, -6.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-72.000, 1.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-83.000, 35.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [145.050, -6.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-98.300, 34.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-107.000, 45.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-70.300, 1.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [144.350, -6.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [139.000, -4.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [140.550, -3.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [145.300, -17.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [8.300, 4.550]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [136.000, -3.450]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [35.350, 1.050]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.400, -5.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [143.100, -6.250]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [119.300, 26.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [146.000, -6.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-49.000, -4.450]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [144.500, -6.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-46.200, -5.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [145.300, -16.050]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-157.000, 20.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-77.300, 44.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-53.250, -11.230]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.100, -3.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-61.250, 6.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [133.300, -3.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.200, -2.450]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.550, -4.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-64.400, -12.030]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-66.150, -7.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-57.250, -10.550]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-50.250, -11.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-64.100, -9.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-70.200, -8.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-55.450, 2.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [38.000, -1.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-8.350, 5.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [147.300, -9.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-74.400, -5.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-47.450, -8.050]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-46.450, -5.500]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-53.150, -12.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [151.300, -3.050]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [127.400, -8.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-59.150, 4.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [130.450, -13.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [145.050, -4.050]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [176.300, -38.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-66.500, 6.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-40.500, -16.550]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.300, -4.400]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-96.000, 17.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.450, -3.500]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [146.300, -5.550]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [155.400, -6.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [1.300, 10.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [145.400, -5.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [140.100, -2.050]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [142.150, -3.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-140.300, -8.400]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-118.000, 41.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-71.250, 2.500]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [10.550, 4.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-79.550, 43.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-64.000, -10.400]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [134.300, 7.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-53.000, -10.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-62.000, -7.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [158.000, 7.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-68.000, 4.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-58.050, -11.080]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [146.300, -8.400]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [155.050, -6.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-60.200, -12.580]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [34.450, 1.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [142.100, -6.050]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-64.300, 4.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [34.350, 1.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [144.100, -1.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [130.550, -8.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-94.250, 36.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [140.300, -2.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-71.450, -9.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-76.550, -5.400]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-96.550, 34.500]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-62.300, 3.450]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-78.000, -2.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [140.550, -2.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [143.150, -8.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [129.050, -3.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [147.300, -7.450]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-53.000, -11.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [146.150, -8.050]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-75.320, -3.120]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [125.050, -9.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [150.550, -2.450]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [155.000, -5.400]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [131.000, -11.400]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-175.150, -21.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-56.100, 2.280]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [145.200, -4.500]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.200, -2.400]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [1.400, 10.350]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-59.120, 1.230]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [146.300, -6.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-76.250, -1.000]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-65.200, -11.150]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-108.500, 27.500]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.050, -3.100]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [141.500, -13.400]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-66.150, 5.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [145.250, -6.250]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [148.350, -9.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [136.150, -1.450]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [-64.300, 5.300]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [146.150, -17.200]}, "properties": {}},
	{"type": "Feature", "geometry": {"type": "Point","coordinates": [142.100, -3.250]}, "properties": {}},
], "properties": {}}

In [35]:

def get_upsid(aggregated, consonants):
    with open(aggregated) as f:
        aggregated_data = [i for i in csv.DictReader(f, delimiter='\t')]

    with open(consonants) as f:
        segments = [segment for segment in csv.DictReader(f, delimiter='\t')]

    inventories = []
    for inventory in aggregated_data:
            inventory['Segments'] = []
            
            if inventory['Source'] == 'UPSID':
                for segment in segments:
                    if inventory['InventoryID'] == segment['InventoryID']:
                        inventory['Segments'].append(segment['Phoneme'])

                for key in ['Phonemes', 'Tones', 'Population', 'Trump', 'Country', 'Vowels']:
                    del inventory[key]

                inventories.append(inventory)

    else:
        return inventories

upsid_inv = get_upsid('phoible-aggregated.tsv', 'phoible-consonants.tsv')

In [36]:

candidates = 0
followers = 0
for inv in upsid_inv:
    candidates += 1
    
    try:
        voiced_fric = [i for i in inv['Segments'] if is_fricative(i) and features_d[i]['periodicGlottalSource']]
        voiceless_fric = [i for i in inv['Segments'] if is_fricative(i) and features_d[i]['periodicGlottalSource'] is False]
    except KeyError:
        pass
    
    if len(voiced_fric) < len(voiceless_fric):
        followers += 1
    

print('UPSID: el nombre de fricatives sonores és menor que el de sordes: segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates, followers, candidates))

candidates = 0
followers = 0
for inv in upsid_inv:
    candidates += 1
    
    try:
        fric = [i for i in inv['Segments'] if is_fricative(i)]
        affric = [i for i in inv['Segments'] if is_affricate(i)]
        plosiv = [i for i in inv['Segments'] if is_plosive(i)]

    except KeyError:
        pass
    
    if len(fric) < (len(plosiv) + len(affric)):
        followers += 1

print('UPSID: el nombre de fricatives improbablement es major que oclusives i africades junts: segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates, followers, candidates))

candidates = 0
followers = 0
for inv in upsid_inv:
    candidates += 1
    
    try:
        liquid = [i for i in inv['Segments'] if is_fricative(i)]

    except KeyError:
        pass
    
    if len(liquid) > 0:
        followers += 1

print('UPSID: Gairebé totes les llengües tenen almenys una líquida: segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates, followers, candidates))

candidates = 0
followers = 0
non_followers = []
for inv in upsid_inv:    
    try:
        laterals = [i for i in inv['Segments'] if features_d[i]['lateral']]
    except KeyError:
        pass
    
    if len(laterals) > 0:
        candidates += 1
        
        for lateral in laterals:
            f = features_d[lateral]
            if f['periodicGlottalSource'] and f['approximant']:
                followers += 1
                break
        else:
            if inv['LanguageName'] not in non_followers:
                non_followers.append(inv['LanguageName'])

print('UPSID: Una llengua amb una o més laterals té una aproximant lateral sonora.: segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates, followers, candidates))
print('       no el segueixen', ', '.join(non_followers))

Out[36]:

UPSID: el nombre de fricatives sonores és menor que el de sordes: segueixen l'universal 76.05% (343 de 451)
UPSID: el nombre de fricatives improbablement es major que oclusives i africades junts: segueixen l'universal 91.57% (413 de 451)
UPSID: Gairebé totes les llengües tenen almenys una líquida: segueixen l'universal 93.13% (420 de 451)
UPSID: Una llengua amb una o més laterals té una aproximant lateral sonora.: segueixen l'universal 97.10% (368 de 379)
       no el segueixen AHTNA, CHUKCHI, EKARI, KABARDIAN, !XU, LUSHOOTSEED, TSESHAHT, TIGAK, TLINGIT, WAHGI, WINTU

In [0]:

In [0]:

In [37]:

langs_lateral_hierarchy = []
for inventory in inventories:
    subsystems = []

    dentoalveolars = [segment for segment in inventory['Segments'] if features_d[segment]['lateral'] and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
    subsystems.append(dentoalveolars)

    retroflex = [segment for segment in inventory['Segments'] if features_d[segment]['lateral'] and not features_d[segment]['anterior'] and not features_d[segment]['dorsal']]
    subsystems.append(retroflex)

    
    palatals = [segment for segment in inventory['Segments'] if features_d[segment]['lateral'] and not features_d[segment]['anterior'] and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
    subsystems.append(palatals)
    
        
    velars = [segment for segment in inventory['Segments'] if features_d[segment]['lateral'] and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant'] and features_d[segment]['coronal'] is False ]
    subsystems.append(velars)
    
    langs_lateral_hierarchy.append({inventory['LanguageName']: subsystems})


html_table = ['<table>']

# Build header
html_table.append('<tr>')
for header in ['Llengua', 'laterals dento', 'laterals retroflex', 'laterals palatal',
               'laterals velar', 'Jerarquia laterals (d &gt; r &gt; p  &gt; v)']:
    
    html_table.append('<td>{}</td>'.format(header))
    
else:
    html_table.append('<tr>')

candidates = 0
followers = 0
# Add contents
for lang in langs_lateral_hierarchy:
    
    candidates += 1
    for inv in lang:
        if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and  len(lang[inv][3]) == 0:
            continue
        
        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        
        for item in lang[inv]:
            html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
        
        follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
        html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))

    else:
        if follows_hierarchy:
            followers += 1
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

Out[37]:

In [38]:

langs_rothic_hierarchy = []
for inventory in inventories:
    subsystems = []

    dentoalveolars = [segment for segment in inventory['Segments'] if is_rothic(segment) and features_d[segment]['coronal'] and features_d[segment]['anterior']     and not features_d[segment]['dorsal']]
    subsystems.append(dentoalveolars)

    retroflex =      [segment for segment in inventory['Segments'] if is_rothic(segment) and features_d[segment]['coronal'] and not features_d[segment]['anterior'] and not features_d[segment]['dorsal']]
    subsystems.append(retroflex)
    
    uvulars =        [segment for segment in inventory['Segments'] if is_rothic(segment) and not features_d[segment]['coronal'] and not features_d[segment]['coronal'] and features_d[segment]['dorsal']]
    subsystems.append(uvulars)
    
    langs_rothic_hierarchy.append({inventory['LanguageName']: subsystems})


html_table = ['<table>']

# Build header
html_table.append('<tr>')
for header in ['Llengua', 'rothics dento', 'rothics retroflex', 'rothics uvulars',
               'Jerarquia rothics (d &gt; r &gt; u)']:
    
    html_table.append('<td>{}</td>'.format(header))
    
else:
    html_table.append('<tr>')

candidates = 0
followers = 0
# Add contents
for lang in langs_rothic_hierarchy:
    
    for inv in lang:
        if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0:
            continue
            
        candidates += 1

        html_table.append('<tr>')
        html_table.append('<td>{}</td>'.format(inv))
        
        for item in lang[inv]:
            html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
        
        follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
        html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))

    else:
        if follows_hierarchy:
            followers += 1
        html_table.append('</tr>')
else:
    html_table.append('</table></font>')

display(HTML(''.join(html_table)))

print('Segueixen la jerarquia {:.2%} ({} de {})'.format(followers / candidates, followers, candidates))

Out[38]:

Segueixen la jerarquia 151.43% (159 de 105)

In [46]:

morph = """
Alagwa
Alamblak
Amharic
Apurinã
Arabic (Egyptian)
Arabic (Gulf)
Arabic (Syrian)
Arbore
Bayso
Beja
Berber (Middle Atlas)
Campa (Axininca)
Chinantec (Lealao)
Cree (Plains)
Diyari
Dizi
French
Gaelic (Scots)
Hausa
Hebrew (Modern)
Hindi
Hixkaryana
Iraqw
Kashmiri
Khasi
Khmu'
Kolami
Latvian
Lele
Macushi
Maltese
Maybrat
Miya
Mosetén
Mundari
Ojibwa (Eastern)
Oromo (Harar)
Panjabi
Pashto
Passamaquoddy-Maliseet
Pitta Pitta
Qafar
Rendille
Spanish
Tagalog
Taiap
Tigré
Tiwi
Tunica
Yazgulyam
Abkhaz
Barasano
Defaka
English
German
Godoberi
Grebo
Greek (Modern)
Icelandic
Kannada
Ket
Khoekhoe
Koromfe
Lavukaleve
Mangarrayi
Marathi
Nicobarese (Car)
Oneida
Retuarã
Russian
Seneca
Tamil
Tidore
Ukrainian
Wardaman
Wari
Archi
Bininj Gun-Wok
Burushaski
Dyirbal
Godié
Lak
Marind
Paumarí
Pirahã
Tsez
Wambaya
Zande
Arapesh (Mountain)
Babungo
Chichewa
Diola-Fogny
Fula (Guinean)
Hunzib
Ingush
Juhoan
Kisi
Kongo
Lingala
Luvale
Maung
Mixtec (Chalcatongo)
Ngangityemerri
Nkore-Kiga
Nunggubuyu
Nyamwezi
Nyiha
Shona
Supyire
Swahili
Yimas
Zulu
""".split()

for lang in morph:
    for inventory in inventories:
        if inventory['LanguageName'] == lang:
            print(inventory[L])

Out[46]:

{'Area': 'America', 'LanguageFamilyGenus': 'Cariban', 'Consonants': '10', 'LanguageFamilyRoot': 'Cariban', 'InventoryID': '1877', 'LanguageCode': 'mbc', 'Longitude': '-59:15', 'Segments': ['ʔ', 'k', 'j', 'm', 'n', 'p', 's', 't', 'w', 'ɾ'], 'LanguageName': 'Macushi', 'Latitude': '04:00', 'Source': 'SAPHON'}
{'Area': 'Pacific', 'LanguageFamilyGenus': 'Tiwian', 'Consonants': '14', 'LanguageFamilyRoot': 'Australian', 'InventoryID': '1218', 'LanguageCode': 'tiw', 'Longitude': '131:0', 'Segments': ['n̪', 'j', 'k', 'm', 'ɣ', 'n', 'p', 't̪', 't', 'w', 'ɻ', 'ɾ', 'ŋ', 'l'], 'LanguageName': 'Tiwi', 'Latitude': '-11:40', 'Source': 'PH'}
{'Area': 'America', 'LanguageFamilyGenus': 'Tucanoan', 'Consonants': '11', 'LanguageFamilyRoot': 'Tucanoan', 'InventoryID': '239', 'LanguageCode': 'bao', 'Longitude': '-70:20', 'Segments': ['ɡ', 'b', 'd', 'k', 'j', 'p', 's', 't', 'w', 'h', 'ɾ'], 'LanguageName': 'Barasano', 'Latitude': '00:25', 'Source': 'UPSID'}
{'Area': 'America', 'LanguageFamilyGenus': 'Northern Iroquoian', 'Consonants': '10', 'LanguageFamilyRoot': 'Iroquoian', 'InventoryID': '77', 'LanguageCode': 'one', 'Longitude': '-79:55', 'Segments': ['ʔ', 'h', 'k', 'j', 'l', 'n', 'ɰ', 's', 't̠ʃ', 't'], 'LanguageName': 'Oneida', 'Latitude': '43:15', 'Source': 'SPA'}
{'Area': 'America', 'LanguageFamilyGenus': 'Northern Iroquoian', 'Consonants': '11', 'LanguageFamilyRoot': 'Iroquoian', 'InventoryID': '536', 'LanguageCode': 'see', 'Longitude': '-94:25', 'Segments': ['n̪', 'b', 'w', 'h', 'k', 'j', 'm', 's̪', 't̪', 'ʔ', 'd̪z̪'], 'LanguageName': 'Seneca', 'Latitude': '36:0', 'Source': 'UPSID'}
{'Area': 'Pacific', 'LanguageFamilyGenus': 'Pama-Nyungan', 'Consonants': '13', 'LanguageFamilyRoot': 'Australian', 'InventoryID': '303', 'LanguageCode': 'dbl', 'Longitude': '145:30', 'Segments': ['ɡ', 'b', 'ŋ', 'j', 'm', 'l', 'n', 'ɲ', 'r', 'w', 'ɻ', 'ɟ', 'd'], 'LanguageName': 'Dyirbal', 'Latitude': '-17:35', 'Source': 'UPSID'}

Table of Contents

Família, genus, llengua i segments

Sistemes secundaris

Presència de fonemes

Obstruents

Oclusives

Fricatives

Nasals

Líquides

Comprovació dels universals de jerarquies

Totes les consonants

Oclusives

Jerarquia de les oclusives

Jerarquia de les africades

Jerarquia de les fricatives

Jerarquia de les nasals

Comprovació de més universals

Presència o absència de /p t k/

Comprovació de l'universal "Si només té una africada, generalment és /t̠ʃ/"

Comprovació de l'universal "El nombre d’africades és menor que el d’oclusives simples."

Comprovació de l'universal "Si una llengua té una sola fricativa, generalment és / s / i si no és aquesta sol ser /f/."

Comprovació de l'universal "El nombre de fricatives sordes generalment és més gran que el de sonores…"

Comprovació de l'universal "El nombre de fricatives improbablement és major que el d’oclusives."

Comprovació dels universals sobre nasals

Comprovació de 1

Comprovació de 2

Ultra-mega-taula

Product

Resources

Company