Source code for cltk.morphology.universal_dependencies_features

"""Data types for each morphological category and features annotated
by the Universal Dependencies (UD) project

These are from v2 of UD, except for ``Strength`` which is from v1
and was (as of 12/2020) still in the Gothic treebank.
from enum import auto

from cltk.utils.utils import CLTKEnum

[docs]class MorphosyntacticFeature(CLTKEnum): """A generic multivalued morphosyntactic feature.""" pass
# Categorial Features # The following are the traditional categorial features [+/-N, +/-V] of generative linguistics, # augmented with the +/-F(unctional) feature as developed by Fukui (1986). # See Fukui, N. 1986. A theory of category projection and its applications. Ph.D. dissertation, MIT. # Though simplistic by today's standards, the scheme is more-or-less sufficient to represent # the parts of speech of the Universal Dependencies project ( # See for a readable explanation.
[docs]class N(MorphosyntacticFeature): """A `nominal word <>_, "a category used to group together nouns and adjectives based on shared properties. The motivation for nominal grouping is that in many languages nouns and adjectives share a number of morphological and syntactic properties." """ pos = auto() neg = auto()
[docs]class V(MorphosyntacticFeature): """A `verbal word <>`_, which "typically signal events and actions, can constitute a minimal predicate in a clause, and govern the number and types of other constituents which may occur in the clause." See notes that verb-like forms may be better classed as eg, nouns, adjectives, etc.. """ pos = auto() neg = auto()
[docs]class F(MorphosyntacticFeature): """A `function word <>`_. These "have little lexical meaning or have ambiguous meaning and express grammatical relationships among other words within a sentence, or specify the attitude or mood of the speaker". """ pos = auto() neg = auto()
[docs]class POS(MorphosyntacticFeature): """The POS "feature" represents the list of syntactic categories published by the UD project. See """ adjective = auto() adposition = auto() adverb = auto() auxiliary = auto() coordinating_conjunction = auto() determiner = auto() interjection = auto() noun = auto() numeral = auto() particle = auto() possessors_number = auto() pronoun = auto() proper_noun = auto() punctuation = auto() subordinating_conjunction = auto() symbol = auto() verb = auto() other = auto()
# Morphosyntactic Features. # The inventory of features represented here are those of the Universal Dependencies project. # See # While extensive, the inventory is naturally never quite complete. # In particular, the list spatiotemporal cases is likely to grow over time. # Verbal features, related to +V categories.
[docs]class VerbForm(MorphosyntacticFeature): """The inlectional type of the verb. Possibly this confuses tense, aspect, and other more primitive morphosyntactic information. see """ converb = auto() finite = auto() gerund = auto() gerundive = auto() infinitive = auto() participle = auto() supine = auto() masdar = auto()
[docs]class Mood(MorphosyntacticFeature): """The mood of a verb. see """ admirative = auto() conditional = auto() desiderative = auto() imperative = auto() indicative = auto() jussive = auto() necessitative = auto() optative = auto() potential = auto() purposive = auto() quotative = auto() subjunctive = auto()
[docs]class Tense(MorphosyntacticFeature): """The tense of a verb, i.e. the time of the eventuality in relation to a reference point in time. see """ future = auto() imperfect = auto() past = auto() pluperfect = auto() present = auto()
[docs]class Aspect(MorphosyntacticFeature): """The aspect of the verb, i.e. the temporal structure of the eventuality. see """ habitual = auto() imperfective = auto() iterative = auto() perfective = auto() progressive = auto() prospective = auto()
[docs]class Voice(MorphosyntacticFeature): """The voice of the verb, i.e. the relation of the participants to the eventuality. see """ active = auto() antipassive = auto() beneficiary_focus = auto() location_focus = auto() causative = auto() direct = auto() inverse = auto() middle = auto() passive = auto() reciprocal = auto()
[docs]class Evidentiality(MorphosyntacticFeature): """What evidence is there for the assertion of the eventuality described by the verb? Is it based on the speaker's knowledge, or indirect? see """ first_hand = auto() non_first_hand = auto()
[docs]class Polarity(MorphosyntacticFeature): """Is the proposition negative or positive? see """ pos = auto() neg = auto()
[docs]class Person(MorphosyntacticFeature): """The grammatical person of the verb, i.e. the participant indicated by the subject. # see """ zeroth = auto() first = auto() second = auto() third = auto() fourth = auto() psor = auto() subj = auto()
[docs]class Politeness(MorphosyntacticFeature): """The morphological reflex of the formal register with which participants are addressed in the sentence, affecting verbs and pronouns. see """ elevated = auto() formal = auto() humble = auto() informal = auto()
[docs]class Clusivity(MorphosyntacticFeature): """Does a first person plural subject include the addressee? see """ exclusive = auto() inclusive = auto()
[docs]class Strength(MorphosyntacticFeature): """Is this a strong or weak verb or adjective? UDv1 feature, specific to Gothic. see """ strong = auto() weak = auto()
VERBAL_FEATURES = [ VerbForm, Tense, Mood, Aspect, Voice, Person, Polarity, Politeness, Clusivity, Evidentiality, Strength, ] # Nominal features, related to the +N categories.
[docs]class Case(MorphosyntacticFeature): """The case of a noun phrase. see """ # structural cases nominative = auto() accusative = auto() ergative = auto() absolutive = auto() # oblique cases abessive = auto() befefactive = auto() causative = auto() comparative = auto() considerative = auto() comitative = auto() dative = auto() distributive = auto() equative = auto() genitive = auto() instrumental = auto() partitive = auto() vocative = auto() # spatiotemporal cases ablative = auto() additive = auto() adessive = auto() allative = auto() delative = auto() elative = auto() essive = auto() illative = auto() inessive = auto() lative = auto() locative = auto() perlative = auto() sublative = auto() superessive = auto() terminative = auto() temporal = auto() translative = auto()
[docs]class Gender(MorphosyntacticFeature): """The grammatical gender of a nominal. see """ masculine = auto() feminine = auto() neuter = auto() common = auto() psor = auto()
[docs]class Animacy(MorphosyntacticFeature): """The soul-type of an entity (as it were.) see """ animate = auto() human = auto() inanimate = auto() non_human = auto()
[docs]class Number(MorphosyntacticFeature): """The count type of an entity. see """ collective = auto() count_plural = auto() dual = auto() greater_paucal = auto() greater_plural = auto() inverse_number = auto() paucal = auto() plural = auto() plurale_tantum = auto() singular = auto() trial = auto() psor = auto()
[docs]class NumForm(MorphosyntacticFeature): """Feature of cardinal and ordinal numbers. Is the number expressed by digits or as a word? See `<>`_. """ word = auto() digit = auto() roman = auto() reference = auto()
[docs]class Definiteness(MorphosyntacticFeature): """The relationship between noun phrases and entities in or not in the discoursive context. see """ complex = auto() construct_state = auto() definite = auto() indefinite = auto() specific_indefinite = auto()
[docs]class Degree(MorphosyntacticFeature): """The degree of adjectives. see """ absolute_superlative = auto() comparative = auto() equative = auto() positive = auto() superlative = auto()
NOMINAL_FEATURES = [Case, Gender, Animacy, Number, Definiteness, Degree, Strength] # Other lexical features
[docs]class NameType(MorphosyntacticFeature): """The type of named entity, mostly applying to proper nouns. see """ place = auto() person = auto() person_given_name = auto() person_surname = auto() nationality = auto() company = auto() product = auto() other = auto()
[docs]class PronominalType(MorphosyntacticFeature): """A subclassification of pronouns. see """ article = auto() contrastive = auto() # specific to Latin demonstrative = auto() emphatic = auto() exclamative = auto() indefinite = auto() interrogative = auto() negative = auto() personal = auto() reciprocal = auto() relative = auto() total = auto()
[docs]class AdpositionalType(MorphosyntacticFeature): """Defines the position of an adposition. see """ preposition = auto() postposition = auto() circumposition = auto() vocalized_adposition = auto()
[docs]class AdverbialType(MorphosyntacticFeature): """What type of adverb is this? see """ manner = auto() location = auto() time = auto() degree = auto() cause = auto() modality = auto()
[docs]class VerbType(MorphosyntacticFeature): """If this is a functional verb, what kind is it? see """ auxiliary = auto() copula = auto() modal = auto() light = auto()
[docs]class Possessive(MorphosyntacticFeature): """Is this nominal form marked as a possessive? see """ pos = auto() neg = auto()
[docs]class Numeral(MorphosyntacticFeature): """A subclassification of numeric types. see """ cardinal = auto() distributive = auto() fractional = auto() multiplicative = auto() ordinal = auto() range = auto() sets = auto()
[docs]class Reflexive(MorphosyntacticFeature): """Is the pronoun reflexive? see """ pos = auto() neg = auto()
[docs]class Foreign(MorphosyntacticFeature): """Is this a foreign word, relative to the language of the sentences? see """ pos = auto() neg = auto()
[docs]class Abbreviation(MorphosyntacticFeature): """Is this word an abbreviation? see """ pos = auto() neg = auto() pass
[docs]class Typo(MorphosyntacticFeature): """Does this word contain a typo? see """ pos = auto() neg = auto()
[docs]class InflClass(MorphosyntacticFeature): ind_eur_a = auto() ind_eur_e = auto() ind_eur_i = auto() ind_eur_o = auto() ind_eur_u = auto() ind_eur_x = auto() lat_a = auto() lat_anom = auto() lat_e = auto() lat_i = auto() lat_i2 = auto() lat_pron = auto() lat_x = auto() nominal = auto()
[docs]class NumValue(MorphosyntacticFeature): pos = auto() neg = auto()
[docs]class Proper(MorphosyntacticFeature): """Added October 2023.""" yes = auto()
[docs]class Form(MorphosyntacticFeature): pos = auto() neg = auto()
# the feature value of an underspecified feature. Underspecified = None OTHER_FEATURES = [ NameType, PronominalType, AdpositionalType, AdverbialType, VerbType, Possessive, Numeral, Reflexive, Foreign, Abbreviation, Typo, ]