describe
Corpus description utilities for Context Fabric.
This module provides centralized utilities for describing corpora, features, and text representations. It generates exhaustive samples for text format character coverage.
Usage:
>>> from cfabric.describe import describe_corpus, describe_feature
>>> result = describe_corpus(api, "BHSA")
>>> feature_info = describe_feature(api, "sp")Classes
CorpusDescription
Complete corpus description.
Attributes
| Name | Type | Description |
|---|---|---|
| edge_features | list[dict[(str, str)]] | — |
| features | list[dict[(str, str)]] | — |
| name | str | — |
| node_types | list[dict[(str, Any)]] | — |
| sections | dict[(str, Any)] | — |
| text_representations | TextRepresentationInfo | — |
Methods
__init__(self, name: str, node_types: list[dict[(str, Any)]] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'list'}}, sections: dict[(str, Any)] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'dict'}}, text_representations: TextRepresentationInfo = {'arguments': [], 'cls': 'ExprCall', 'function': {'body': {'arguments': [{'cls': 'ExprKeyword', 'function': {'cls': 'ExprName', 'member': None, 'name': 'TextRepresentationInfo'}, 'name': 'description', 'value': "''"}], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'TextRepresentationInfo'}}, 'cls': 'ExprLambda', 'parameters': []}}, features: list[dict[(str, str)]] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'list'}}, edge_features: list[dict[(str, str)]] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'list'}}) → NoneParameters
name: strnode_types: list[dict[(str, Any)]]sections: dict[(str, Any)]text_representations: TextRepresentationInfofeatures: list[dict[(str, str)]]edge_features: list[dict[(str, str)]]
to_dict(self) → dict[(str, Any)]CorpusOverview
Slim corpus overview (node types and sections only).
Attributes
| Name | Type | Description |
|---|---|---|
| name | str | — |
| node_types | list[dict[(str, Any)]] | — |
| sections | dict[(str, Any)] | — |
Methods
__init__(self, name: str, node_types: list[dict[(str, Any)]] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'list'}}, sections: dict[(str, Any)] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'dict'}}) → NoneParameters
name: strnode_types: list[dict[(str, Any)]]sections: dict[(str, Any)]
to_dict(self) → dict[(str, Any)]FeatureCatalogEntry
Lightweight feature entry for catalog listing.
Attributes
| Name | Type | Description |
|---|---|---|
| description | str | — |
| kind | str | — |
| name | str | — |
| value_type | str | — |
Methods
__init__(self, name: str, kind: str, value_type: str, description: str = '') → NoneParameters
name: strkind: strvalue_type: strdescription: str= ''
to_dict(self) → dict[(str, str)]FeatureDescription
Detailed description of a feature.
Attributes
| Name | Type | Description |
|---|---|---|
| description | str | — |
| error | str | None | — |
| has_values | bool | None | — |
| kind | str | — |
| name | str | — |
| node_types | list[str] | — |
| sample_values | list[dict[(str, Any)]] | — |
| unique_values | int | — |
| value_type | str | — |
Methods
__init__(self, name: str, kind: str, value_type: str = '', description: str = '', node_types: list[str] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'list'}}, unique_values: int = 0, sample_values: list[dict[(str, Any)]] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'list'}}, has_values: bool | None = None, error: str | None = None) → NoneParameters
name: strkind: strvalue_type: str= ''description: str= ''node_types: list[str]unique_values: int= 0sample_values: list[dict[(str, Any)]]has_values: bool | None= Noneerror: str | None= None
from_api(cls, api: Api, feature: str, sample_limit: int = 20) → FeatureDescriptionCreate FeatureDescription from API.
Parameters
clsapi: Apifeature: strsample_limit: int= 20
to_dict(self) → dict[(str, Any)]TextFormatInfo
Information about a text format pair (orig/trans).
Attributes
| Name | Type | Description |
|---|---|---|
| name | str | — |
| original_spec | str | — |
| samples | list[TextFormatSample] | — |
| total_samples | int | — |
| transliteration_spec | str | — |
| unique_characters | int | — |
Methods
__init__(self, name: str, original_spec: str, transliteration_spec: str, samples: list[TextFormatSample] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'list'}}, unique_characters: int = 0, total_samples: int = 0) → NoneParameters
name: stroriginal_spec: strtransliteration_spec: strsamples: list[TextFormatSample]unique_characters: int= 0total_samples: int= 0
to_dict(self) → dict[(str, Any)]TextFormatSample
A single text sample showing original and transliterated forms.
Attributes
| Name | Type | Description |
|---|---|---|
| original | str | — |
| transliterated | str | — |
Methods
__init__(self, original: str, transliterated: str) → NoneParameters
original: strtransliterated: str
to_dict(self) → dict[(str, str)]TextRepresentationInfo
Complete text representation info for a corpus.
Attributes
| Name | Type | Description |
|---|---|---|
| description | str | — |
| formats | list[TextFormatInfo] | — |
Methods
__init__(self, description: str, formats: list[TextFormatInfo] = {'arguments': [], 'cls': 'ExprCall', 'function': {'cls': 'ExprName', 'member': None, 'name': 'list'}}) → NoneParameters
description: strformats: list[TextFormatInfo]
to_dict(self) → dict[(str, Any)]Functions
describe_corpus(api: Api, name: str = '') → CorpusDescriptionGet complete corpus description.
Parameters
api: Apiname: str= ''
describe_corpus_overview(api: Api, name: str = '') → CorpusOverviewGet slim corpus overview (node types and sections only).
Parameters
api: Apiname: str= ''
describe_feature(api: Api, feature: str, sample_limit: int = 20) → FeatureDescriptionGet detailed description of a single feature.
Parameters
api: Apifeature: strsample_limit: int= 20
describe_features(api: Api, features: list[str], sample_limit: int = 20) → dict[(str, FeatureDescription)]Get detailed descriptions for multiple features.
Parameters
api: Apifeatures: list[str]sample_limit: int= 20
describe_text_formats(api: Api) → TextRepresentationInfoGet text format descriptions with exhaustive character coverage.
Parameters
api: Api
get_all_feature_otypes(api: Api, samples_per_type: int = 100) → dict[(str, list[str])]Pre-compute otype mappings for all features.
Parameters
api: Apisamples_per_type: int= 100
get_feature_otypes(api: Api, feature: str, samples_per_type: int = 100) → list[str]Determine which node types a feature applies to.
Parameters
api: Apifeature: strsamples_per_type: int= 100
list_features(api: Api, kind: str = 'all', node_types: list[str] | None = None) → list[FeatureCatalogEntry]List features with optional filtering.
Parameters
api: Apikind: str= 'all'node_types: list[str] | None= None