ud_deprels
Universal Dependencies (UD) dependency relations.
This module defines the core UD dependency relations (DepRel) and provides validated data models and helpers for working with them.
References
- UD homepage: https://universaldependencies.org/u/dep/index.html
VALID_DEPREL_CATEGORIES
module-attribute
VALID_DEPREL_CATEGORIES: dict[
str, tuple[str, Optional[str]]
] = {
"nsubj": ("Nominal", "Core Argument"),
"obj": ("Nominal", "Core Argument"),
"iobj": ("Nominal", "Core Argument"),
"csubj": ("Clause", "Core Argument"),
"ccomp": ("Clause", "Core Argument"),
"xcomp": ("Clause", "Core Argument"),
"obl": ("Nominal", "Non-core Dependent"),
"vocative": ("Nominal", "Non-core Dependent"),
"expl": ("Nominal", "Non-core Dependent"),
"dislocated": ("Nominal", "Non-core Dependent"),
"advcl": ("Clause", "Non-core Dependent"),
"advmod": ("Modifier Word", "Non-core Dependent"),
"discourse": ("Modifier Word", "Non-core Dependent"),
"aux": ("Function Word", "Non-core Dependent"),
"cop": ("Function Word", "Non-core Dependent"),
"mark": ("Function Word", "Non-core Dependent"),
"nmod": ("Nominal", "Nominal Dependent"),
"appos": ("Nominal", "Nominal Dependent"),
"nummod": ("Nominal", "Nominal Dependent"),
"acl": ("Clause", "Nominal Dependent"),
"amod": ("Modifier Word", "Nominal Dependent"),
"det": ("Function Word", "Nominal Dependent"),
"case": ("Function Word", "Nominal Dependent"),
"conj": ("Coordination", None),
"cc": ("Coordination", None),
"fixed": ("Headless", None),
"flat": ("Headless", None),
"list": ("Loose", None),
"parataxis": ("Loose", None),
"compound": ("Special", None),
"orphan": ("Special", None),
"goeswith": ("Special", None),
"reparandum": ("Special", None),
"punct": ("Other", None),
"root": ("Other", None),
"dep": ("Other", None),
}
UD_DEPRELS
module-attribute
UD_DEPRELS: dict[str, UDDeprel] = {
"acl": UDDeprel(
code="acl",
name="clausal modifier of noun",
word_type="Clause",
syntactic_role="Nominal Dependent",
description="Clausal modifier of noun (adnominal clause).",
subtypes=["relcl"],
),
"advcl": UDDeprel(
code="advcl",
name="adverbial clause modifier",
word_type="Clause",
syntactic_role="Non-core Dependent",
description="Adverbial clause modifier.",
subtypes=["relcl"],
),
"advmod": UDDeprel(
code="advmod",
name="adverbial modifier",
word_type="Modifier Word",
syntactic_role="Non-core Dependent",
description="Adverbial modifier.",
subtypes=["emph", "lmod"],
),
"amod": UDDeprel(
code="amod",
name="adjectival modifier",
word_type="Modifier Word",
syntactic_role="Nominal Dependent",
description="Adjectival modifier.",
),
"appos": UDDeprel(
code="appos",
name="appositional modifier",
word_type="Nominal",
syntactic_role="Nominal Dependent",
description="Appositional modifier.",
),
"aux": UDDeprel(
code="aux",
name="auxiliary",
word_type="Function Word",
syntactic_role="Non-core Dependent",
description="Auxiliary.",
subtypes=["pass"],
),
"case": UDDeprel(
code="case",
name="case marking",
word_type="Function Word",
syntactic_role="Nominal Dependent",
description="Case marking.",
),
"cc": UDDeprel(
code="cc",
name="coordinating conjunction",
word_type="Coordination",
syntactic_role=None,
description="Coordinating conjunction.",
subtypes=["preconj"],
),
"ccomp": UDDeprel(
code="ccomp",
name="clausal complement",
word_type="Clause",
syntactic_role="Core Argument",
description="Clausal complement with internal subject.",
),
"clf": UDDeprel(
code="clf",
name="classifier",
word_type="Function Word",
syntactic_role="Nominal Dependent",
description="Classifier.",
),
"compound": UDDeprel(
code="compound",
name="compound",
word_type="Special",
syntactic_role=None,
description="Compound.",
subtypes=["lvc", "prt", "redup", "svc"],
),
"conj": UDDeprel(
code="conj",
name="conjunct",
word_type="Coordination",
syntactic_role=None,
description="Conjunct.",
),
"cop": UDDeprel(
code="cop",
name="copula",
word_type="Function Word",
syntactic_role="Non-core Dependent",
description="Copula.",
),
"csubj": UDDeprel(
code="csubj",
name="clausal subject",
word_type="Clause",
syntactic_role="Core Argument",
description="Clausal subject.",
subtypes=["outer", "pass"],
),
"dep": UDDeprel(
code="dep",
name="unspecified dependency",
word_type="Other",
syntactic_role=None,
description="Unspecified dependency.",
),
"det": UDDeprel(
code="det",
name="determiner",
word_type="Function Word",
syntactic_role="Nominal Dependent",
description="Determiner.",
subtypes=["numgov", "nummod", "poss"],
),
"discourse": UDDeprel(
code="discourse",
name="discourse element",
word_type="Modifier Word",
syntactic_role="Non-core Dependent",
description="Discourse element.",
),
"dislocated": UDDeprel(
code="dislocated",
name="dislocated elements",
word_type="Nominal",
syntactic_role="Non-core Dependent",
description="Dislocated elements.",
),
"expl": UDDeprel(
code="expl",
name="expletive",
word_type="Nominal",
syntactic_role="Non-core Dependent",
description="Expletive.",
subtypes=["impers", "pass", "pv"],
),
"fixed": UDDeprel(
code="fixed",
name="fixed multiword expression",
word_type="Headless",
syntactic_role=None,
description="Fixed multiword expression.",
),
"flat": UDDeprel(
code="flat",
name="flat multiword expression",
word_type="Headless",
syntactic_role=None,
description="Flat multiword expression.",
subtypes=["foreign", "name"],
),
"goeswith": UDDeprel(
code="goeswith",
name="goes with",
word_type="Special",
syntactic_role=None,
description="Goes with.",
),
"iobj": UDDeprel(
code="iobj",
name="indirect object",
word_type="Nominal",
syntactic_role="Core Argument",
description="Indirect object.",
),
"list": UDDeprel(
code="list",
name="list",
word_type="Loose",
syntactic_role=None,
description="List.",
),
"mark": UDDeprel(
code="mark",
name="marker",
word_type="Function Word",
syntactic_role="Non-core Dependent",
description="Marker.",
),
"nmod": UDDeprel(
code="nmod",
name="nominal modifier",
word_type="Nominal",
syntactic_role="Nominal Dependent",
description="Nominal modifier.",
subtypes=["poss", "tmod"],
),
"nsubj": UDDeprel(
code="nsubj",
name="nominal subject",
word_type="Nominal",
syntactic_role="Core Argument",
description="Nominal subject.",
subtypes=["outer", "pass"],
),
"nummod": UDDeprel(
code="nummod",
name="numeric modifier",
word_type="Nominal",
syntactic_role="Nominal Dependent",
description="Numeric modifier.",
subtypes=["gov"],
),
"obj": UDDeprel(
code="obj",
name="object",
word_type="Nominal",
syntactic_role="Core Argument",
description="Object.",
),
"obl": UDDeprel(
code="obl",
name="oblique nominal",
word_type="Nominal",
syntactic_role="Non-core Dependent",
description="Oblique nominal.",
subtypes=["agent", "arg", "lmod", "tmod"],
),
"orphan": UDDeprel(
code="orphan",
name="orphan",
word_type="Special",
syntactic_role=None,
description="Orphan.",
),
"parataxis": UDDeprel(
code="parataxis",
name="parataxis",
word_type="Loose",
syntactic_role=None,
description="Parataxis.",
),
"punct": UDDeprel(
code="punct",
name="punctuation",
word_type="Other",
syntactic_role=None,
description="Punctuation.",
),
"reparandum": UDDeprel(
code="reparandum",
name="overridden disfluency",
word_type="Special",
syntactic_role=None,
description="Overridden disfluency.",
),
"root": UDDeprel(
code="root",
name="root",
word_type="Other",
syntactic_role=None,
description="Root.",
),
"vocative": UDDeprel(
code="vocative",
name="vocative",
word_type="Nominal",
syntactic_role="Non-core Dependent",
description="Vocative.",
),
"xcomp": UDDeprel(
code="xcomp",
name="open clausal complement",
word_type="Clause",
syntactic_role="Core Argument",
description="Open clausal complement.",
),
}
UDDeprel
Bases: BaseModel
Canonical UD dependency relation definition.
Represents a single UD relation (e.g., nsubj) together with its
human-readable name, high-level word type, optional syntactic role, and
other metadata.
Validation ensures that the combination of code, word_type and
syntactic_role matches the UD taxonomy encoded in
VALID_DEPREL_CATEGORIES.
Attributes:
-
code(str) –Short UD code (e.g.,
"nsubj"). -
name(str) –Human-readable name (e.g.,
"nominal subject"). -
word_type(Literal['Nominal', 'Clause', 'Modifier Word', 'Function Word', 'Coordination', 'Headless', 'Loose', 'Special', 'Other']) –High-level category describing the head/word type.
-
syntactic_role(Optional[Literal['Core Argument', 'Non-core Dependent', 'Nominal Dependent']]) –Optional role such as
"Core Argument". -
subtypes(Optional[list[str]]) –Optional list of defined UD subtypes for this relation.
-
description(str) –Official UD description of the relation.
-
is_obsolete(Optional[bool]) –Whether the relation is obsolete or discouraged.
word_type
instance-attribute
word_type: Literal[
"Nominal",
"Clause",
"Modifier Word",
"Function Word",
"Coordination",
"Headless",
"Loose",
"Special",
"Other",
]
syntactic_role
class-attribute
instance-attribute
syntactic_role: Optional[
Literal[
"Core Argument",
"Non-core Dependent",
"Nominal Dependent",
]
] = None
validate_categories
Validate word_type/syntactic_role for this relation.
Ensures that the pair (word_type, syntactic_role) is allowed for the
given code according to VALID_DEPREL_CATEGORIES.
Raises:
-
ValueError–If the combination is not allowed for
code.
Returns:
-
UDDeprel–The validated model instance (self).
Source code in cltk/morphosyntax/ud_deprels.py
UDDeprelTag
Bases: BaseModel
A concrete tag instance referencing a UD relation.
This model validates that the code is a known UD relation and, if a
subtype is provided, that it is permitted for the given code.
Attributes:
-
code(str) –Short UD code (e.g.,
"nsubj"). -
name(str) –Human-readable name for display.
-
subtype(Optional[str]) –Optional UD subtype (e.g.,
"outer","pass").
validate_code
classmethod
Ensure that deprel_code is one of the known UD relations.
Parameters:
-
deprel_code(str) –Candidate UD DepRel code.
Raises:
-
ValueError–If
deprel_codeis not present inUD_DEPRELS.
Returns:
-
str–The validated code.
Source code in cltk/morphosyntax/ud_deprels.py
validate_subtype
classmethod
Validate that subtype is allowed for the given code.
Uses the already-validated code value from Pydantic's values to
check whether a provided subtype is among the declared subtypes for
that relation.
Parameters:
-
subtype(Optional[str]) –Optional UD subtype to validate.
-
values(ValidationInfo) –Pydantic validation context containing
code.
Raises:
-
ValueError–If a non-empty
subtypeis not permitted forcode.
Returns:
-
Optional[str]–The provided
subtypeif valid, otherwiseNone.
Source code in cltk/morphosyntax/ud_deprels.py
normalize_deprel
Normalize a UD deprel pair to a valid combination.
If subtype is not one of the allowed subtypes for code (as declared
in this module), drop the subtype and log the remapping. This accommodates
common non‑UD patterns such as case‑coded subtypes (e.g., nmod:gen,
obl:abl) when morphology is already handled elsewhere.
Parameters:
-
code(str) –UD deprel code (e.g.,
"obl","nmod"). -
subtype(Optional[str]) –Optional subtype string.
Returns:
-
str–A tuple
(code, normalized_subtype)where the subtype may beNone -
Optional[str]–if it was not allowed for the given
code.
Source code in cltk/morphosyntax/ud_deprels.py
is_valid_deprel
Return whether code is a known UD relation.
Parameters:
-
code(str) –Candidate UD DepRel code to check.
Returns:
-
bool–True if
codeis defined inUD_DEPRELS; otherwise False.
Source code in cltk/morphosyntax/ud_deprels.py
get_ud_deprel_tag
Build a UDDeprelTag for a UD relation if available.
Applies a light normalization step: when the provided subtype is not
permitted for code, it is dropped and the remapping is logged.
Parameters:
-
code(str) –UD DepRel code to look up.
-
subtype(Optional[str], default:None) –Optional UD subtype to attach to the tag.
Returns:
-
Optional[UDDeprelTag]–A validated
UDDeprelTagfor the givencode, orNoneif the -
Optional[UDDeprelTag]–code is unknown.