Skip to content

extraction

Modules:

Classes:

Functions:

BaseMedicalReportExtractor

Bases: ABC, Generic[T]

Methods:

extract_report_data abstractmethod

extract_report_data(source: T) -> dict[str, Any]
Source code in src/hiperhealth/skills/extraction/medical_reports.py
@abstractmethod
def extract_report_data(
    self,
    source: T,
) -> dict[str, Any]:
    """
    title: Extract structured text data from source file.
    parameters:
      source:
        type: T
        description: Value for source.
    returns:
      type: dict[str, Any]
      description: Return value.
    """
    raise NotImplementedError

BaseWearableDataExtractor

Bases: ABC, Generic[T]

Methods:

extract_wearable_data abstractmethod

extract_wearable_data(
    source: T,
) -> list[dict[str, object]]
Source code in src/hiperhealth/skills/extraction/wearable.py
@abstractmethod
def extract_wearable_data(self, source: T) -> list[dict[str, object]]:
    """
    title: Implement the wearable data extraction.
    parameters:
      source:
        type: T
        description: Value for source.
    returns:
      type: list[dict[str, object]]
      description: Return value.
    """
    raise NotImplementedError(source)

ExtractionSkill

ExtractionSkill()

Bases: BaseSkill

Looks for file sources in ctx.extras['extraction_sources'] and stores extracted data in ctx.results['intake']. attributes: _report_extractor: description: Value for _report_extractor. _wearable_extractor: description: Value for _wearable_extractor.

Methods:

Source code in src/hiperhealth/skills/extraction/skill.py
def __init__(self) -> None:
    """
    title: Initialize extractors and built-in skill metadata.
    """
    super().__init__(
        SkillMetadata(
            name='hiperhealth.extraction',
            version='0.4.0',
            stages=(Stage.INTAKE,),
            description=(
                'Extract text from medical reports and '
                'wearable data files.'
            ),
        )
    )
    self._report_extractor = MedicalReportFileExtractor()
    self._wearable_extractor = WearableDataFileExtractor()

check_requirements

check_requirements(
    stage: str, ctx: PipelineContext
) -> list[Inquiry]

Override to return a list of Inquiry objects describing what additional data the skill needs. The default implementation returns an empty list (no extra data needed). Inquiries use three priority levels: - required: must have before this stage can run - supplementary: improves results, available now - deferred: only available after a future pipeline step parameters: stage: type: str ctx: type: PipelineContext returns: type: list[Inquiry]

Source code in src/hiperhealth/pipeline/skill.py
def check_requirements(
    self, stage: str, ctx: PipelineContext
) -> list[Inquiry]:
    """
    title: Determine what information is needed before execution.
    summary: |-
      Override to return a list of Inquiry objects describing
      what additional data the skill needs.  The default
      implementation returns an empty list (no extra data needed).
      Inquiries use three priority levels:
      - required: must have before this stage can run
      - supplementary: improves results, available now
      - deferred: only available after a future pipeline step
    parameters:
      stage:
        type: str
      ctx:
        type: PipelineContext
    returns:
      type: list[Inquiry]
    """
    return []

execute

execute(
    stage: str, ctx: PipelineContext
) -> PipelineContext
Source code in src/hiperhealth/skills/extraction/skill.py
def execute(self, stage: str, ctx: PipelineContext) -> PipelineContext:
    """
    title: Extract data from sources listed in ctx.extras.
    parameters:
      stage:
        type: str
        description: Value for stage.
      ctx:
        type: PipelineContext
        description: Value for ctx.
    returns:
      type: PipelineContext
      description: Return value.
    """
    if stage != Stage.INTAKE:
        return ctx

    sources = ctx.extras.get('extraction_sources', {})
    results: dict[str, Any] = ctx.results.get(Stage.INTAKE, {})

    report_files = sources.get('medical_reports', [])
    for source in report_files:
        report = self._report_extractor.extract_report_data(source)
        results.setdefault('medical_reports', []).append(report)

    wearable_files = sources.get('wearable_data', [])
    for source in wearable_files:
        data = self._wearable_extractor.extract_wearable_data(source)
        results.setdefault('wearable_data', []).append(data)

    ctx.results[Stage.INTAKE] = results
    return ctx

post

post(stage: str, ctx: PipelineContext) -> PipelineContext
Source code in src/hiperhealth/pipeline/skill.py
def post(self, stage: str, ctx: PipelineContext) -> PipelineContext:
    """
    title: Called after the stage's main execution.
    parameters:
      stage:
        type: str
      ctx:
        type: PipelineContext
    returns:
      type: PipelineContext
    """
    return ctx

pre

pre(stage: str, ctx: PipelineContext) -> PipelineContext
Source code in src/hiperhealth/pipeline/skill.py
def pre(self, stage: str, ctx: PipelineContext) -> PipelineContext:
    """
    title: Called before the stage's main execution.
    parameters:
      stage:
        type: str
      ctx:
        type: PipelineContext
    returns:
      type: PipelineContext
    """
    return ctx

MedicalReportExtractorError

Bases: Exception

MedicalReportFileExtractor

MedicalReportFileExtractor()

Bases: BaseMedicalReportExtractor[FileInput]

Methods:

Attributes:

Source code in src/hiperhealth/skills/extraction/medical_reports.py
def __init__(self) -> None:
    """
    title: Initialize extractor with caches and mimetype detector.
    """
    self._mimetype_cache: dict[str, MimeType] = {}
    self._text_cache: dict[str, str] = {}
    self.mime = magic.Magic(mime=True)

allowed_extensions property

allowed_extensions: list[FileExtension]

allowed_mimetypes property

allowed_mimetypes: list[MimeType]

extract_report_data

extract_report_data(source: FileInput) -> dict[str, Any]
Source code in src/hiperhealth/skills/extraction/medical_reports.py
def extract_report_data(
    self,
    source: FileInput,
) -> dict[str, Any]:
    """
    title: Validate input and return extracted text plus basic metadata.
    parameters:
      source:
        type: FileInput
        description: Value for source.
    returns:
      type: dict[str, Any]
      description: Return value.
    """
    self._validate_or_raise(source)
    return self._process_file(source)

extract_text

extract_text(source: FileInput) -> str
Source code in src/hiperhealth/skills/extraction/medical_reports.py
def extract_text(self, source: FileInput) -> str:
    """
    title: Validate input and return the extracted raw text only.
    parameters:
      source:
        type: FileInput
        description: Value for source.
    returns:
      type: str
      description: Return value.
    """
    self._validate_or_raise(source)
    return self._extract_text(source)

TextExtractionError

WearableDataExtractorError

Bases: Exception

WearableDataFileExtractor

WearableDataFileExtractor()

Bases: BaseWearableDataExtractor[FileInput]

Methods:

Attributes:

Source code in src/hiperhealth/skills/extraction/wearable.py
def __init__(self) -> None:
    """
    title: Initialize caching an magic-python object.
    """
    self._mimetype_cache: dict[str, MimeType] = {}
    self.mime: magic.Magic = magic.Magic(mime=True)

allowed_extensions property

allowed_extensions: list[FileExtension]

allowed_mimetypes property

allowed_mimetypes: list[MimeType]

extract_wearable_data

extract_wearable_data(
    file: FileInput,
) -> list[dict[str, object]]
Source code in src/hiperhealth/skills/extraction/wearable.py
def extract_wearable_data(
    self, file: FileInput
) -> list[dict[str, object]]:
    """
    title: Extract wearable data from file.
    parameters:
      file:
        type: FileInput
        description: Value for file.
    returns:
      type: list[dict[str, object]]
      description: Return value.
    """
    self._validate_or_raise(file)
    return self._process_file(file)

is_supported

is_supported(file: FileInput) -> bool
Source code in src/hiperhealth/skills/extraction/wearable.py
def is_supported(self, file: FileInput) -> bool:
    """
    title: Check if file is supported.
    parameters:
      file:
        type: FileInput
        description: Value for file.
    returns:
      type: bool
      description: Return value.
    """
    if isinstance(file, (tempfile.SpooledTemporaryFile, io.BytesIO)):
        # if it's a inmemory-temp file, validate it
        return self._validate_inmemory_file(file)

    if isinstance(file, Path):
        # if it's normal file, gets its extension
        return file.suffix.replace('.', '') in self.allowed_extensions

    return self._get_mime_type(file) in self.allowed_mimetypes

get_medical_report_extractor

get_medical_report_extractor() -> (
    MedicalReportFileExtractor
)
Source code in src/hiperhealth/skills/extraction/medical_reports.py
def get_medical_report_extractor() -> MedicalReportFileExtractor:
    """
    title: Create and return an instance of MedicalReportFileExtractor.
    returns:
      type: MedicalReportFileExtractor
      description: Return value.
    """
    return MedicalReportFileExtractor()