Source code for graphrag_sdk.source

from typing import Iterator
from abc import ABC, abstractmethod
from graphrag_sdk.Document import Document
from graphrag_sdk.document_loaders import *


[docs]
def Source(path:str, instruction:str|None=None) ->'AbstractSource':
    """
    Creates a source object

    Parameters:
        path (str): path to source
        instruction (str): source specific instruction for the LLM

    Returns:
        AbstractSource: source
    """

    if not isinstance(path, str) or path == "":
        raise Exception("Invalid argument, path should be a none empty string.")

    s = None

    if ".pdf" in path.lower():
        s = PDF(path)
    elif ".html" in path.lower() or "http" in path.lower():
        s = HTML(path)
    else:
        s = TEXT(path)

    # Set source instructions
    s.instruction = instruction

    return s



[docs]
class AbstractSource(ABC):
    """
    Abstract class representing a source file
    """

    def __init__(self, path:str):
        self.path = path
        self.loader = None


[docs]
    def load(self) -> Iterator[Document]:
        return self.loader.load()


    def __eq__(self, other) -> bool:
        if not isinstance(other, AbstractSource):
            return False

        return self.path == other.path

    def __hash__(self):
        return hash(self.path)



[docs]
class PDF(AbstractSource):
    """
    PDF resource
    """

    def __init__(self, path):
        super().__init__(path)
        self.loader = PDFLoader(self.path)



[docs]
class TEXT(AbstractSource):
    """
    TEXT resource
    """

    def __init__(self, path):
        super().__init__(path)
        self.loader = TextLoader(self.path)



[docs]
class HTML(AbstractSource):
    """
    HTML resource
    """

    def __init__(self, path):
        super().__init__(path)
        self.loader = HTMLLoader(self.path)