Source code for sanskrit_data.schema.books

"""
Intro
-----------

Here we define data-containers to store books. Books are stored or represented as a tree of BookPortion objects - book
containing many chapters containing many lines etc..

-  JSON schema mindmap
   `here <https://drive.mindmup.com/map?state=%7B%22ids%22:%5B%220B1_QBT-hoqqVbHc4QTV3Q2hjdTQ%22%5D,%22action%22:%22open%22,%22userId%22:%22109000762913288837175%22%7D>`__
   (Updated as needed).
- `An introductory video <https://youtu.be/bkd1pPL8HXk>`_.
- For general context and class diagram, refer to :mod:`~sanskrit_data.schema`.
"""
import logging
import sys

from sanskrit_data.schema import common
from sanskrit_data.schema.common import UllekhanamJsonObject, TYPE_FIELD, JsonObject, Target, DataSource, Text, \
  NamedEntity


[docs]class BookPositionTarget(Target): schema = common.recursively_merge_json_schemas(Target.schema, { "type": "object", "description": "A BookPortion could represent a Book or a chapter or a verse or a half-verse or a sentence or any such unit.", "properties": { TYPE_FIELD: { "enum": ["BookPositionTarget"] }, "position": { "type": "number", "description": "Any number describing the position of one BookPortion within another." } } })
[docs] @classmethod def from_details(cls, container_id=None, position=None): target = BookPositionTarget() if container_id: target.container_id = container_id if position: target.position = position target.validate(db_interface=None) return target
[docs]class PublicationDetails(JsonObject): schema = common.recursively_merge_json_schemas(JsonObject.schema, ({ "type": "object", "description": "Publication details of a BookPortion.", "properties": { TYPE_FIELD: { "enum": ["PublicationDetails"] }, "release_time": { "type": "string" }, "publisher": NamedEntity.schema, "canonical_source": { "type": "string", }, "issue_page": { "type": "string", }, } }))
[docs]class CreationDetails(NamedEntity): """Many names are possible for the same work (eg. meghasandeshaH vs meghadUtam) - hence we extend the NamedEntity schema.""" schema = common.recursively_merge_json_schemas(NamedEntity.schema, ({ "type": "object", "properties": { TYPE_FIELD: { "enum": ["CreationDetails"] }, "authors": { "type": "array", "items": NamedEntity.schema } } }))
[docs] @classmethod def from_details(cls, names, authors=None): obj = CreationDetails() obj.names = names if authors is not None: obj.authors = authors return obj
[docs]class BookPortion(UllekhanamJsonObject): schema = common.recursively_merge_json_schemas(JsonObject.schema, ({ "type": "object", "description": "A BookPortion could represent a Book or a chapter or a verse or a half-verse or a sentence or any such unit.", "properties": { TYPE_FIELD: { "enum": ["BookPortion"] }, "creation_details": CreationDetails.schema, "path": { "type": "string", "description": "The path prefix where files are to be stored. " "If this field is empty, such a path is computed from _id of this object " "and its ancestors. " "Ideally, the value stored here should equal the result of this computation " "- but it may not be the case, especially in the following cases: " "* Imported books " "* Moved BookPortions. " " Once upon a time this field also uniquely identified a BookPortion." }, "thumbnail_path": { "type": "string" }, "base_data": { "type": "string", "enum": ["image", "text"] }, # naming the field that contains this object `source` to make querying uniform (ref Annotation.source). "source": DataSource.schema, "publication_details": PublicationDetails.schema, "portion_class": { "type": "string", "description": "book, part, chapter, verse, line etc.." }, "curated_content": Text.schema, "editable_by_others": { "default": False }, "targets": { "maxLength": 1, "items": BookPositionTarget.schema, "description": ( "Target for BookPortion of which this BookPortion is a part. It is an array only for consistency. " "For any given BookPortion, one can get the right order of contained BookPortions by seeking all " "BookPortions referring to it in the targets list, and sorting them by their target.position values.") } }, })) target_class = BookPositionTarget
[docs] @classmethod def get_allowed_target_classes(cls): return [BookPortion]
[docs] @classmethod def from_details(cls, title, path=None, authors=None, targets=None, base_data=None, curated_content=None, portion_class=None, publication_details=None, source=None): if authors is None: authors = [] book_portion = BookPortion() book_portion.title = title book_portion.authors = authors # logging.debug(str(book_portion)) if path: book_portion.path = path targets = targets or [] logging.debug(str(book_portion)) book_portion.targets = targets if curated_content is not None: book_portion.curated_content = curated_content if base_data is not None: book_portion.base_data = base_data if portion_class is not None: book_portion.portion_class = portion_class if publication_details is not None: book_portion.publication_details = publication_details if source is not None: book_portion.source = source book_portion.validate() return book_portion
[docs] @classmethod def from_path(cls, path, db_interface): book_portion_dict = db_interface.find_one(find_filter={"path": path}) if book_portion_dict is None: return None else: book_portion = JsonObject.make_from_dict(book_portion_dict) return book_portion
[docs] @classmethod def add_indexes(cls, db_interface): super(BookPortion, cls).add_indexes(db_interface=db_interface) db_interface.add_index(keys_dict={ "creation_details.names.script_renderings.text": 1 }, index_name="creation_details_names_script_renderings_text") db_interface.add_index(keys_dict={ "creation_details.authors.names.script_renderings.text": 1 }, index_name="creation_details_authors_names_script_renderings_text") db_interface.add_index(keys_dict={ "curated_content.script_renderings.text": 1 }, index_name="curated_content_script_renderings_text") db_interface.add_index(keys_dict={ "creation_details.names.search_strings": 1 }, index_name="creation_details_names_search_strings") db_interface.add_index(keys_dict={ "creation_details.authors.names.search_strings": 1 }, index_name="creation_details_authors_names_search_strings") db_interface.add_index(keys_dict={ "curated_content.search_strings": 1 }, index_name="curated_content_search_strings")
[docs] def get_path(self, db_interface): external_file_store = db_interface.external_file_store import os if hasattr(self, "path") and self.path is not None: return os.path.join(external_file_store, self.path) elif hasattr(self, "targets") and self.targets is not None and len(self.targets) > 0: container_book = self.targets[0] return os.path.join(external_file_store, container_book.get_path(db_interface=db_interface), self._id)
[docs] def dump_book_portion(self, export_dir, db_interface): import os book_node = common.JsonObjectNode.from_details(content=self) book_node.fill_descendents(db_interface=db_interface, entity_type="BookPortion") export_dir_destination = os.path.join(export_dir, self._id) if self.portion_class == "book": import copy copied_node = copy.deepcopy(book_node) copied_node.recursively_delete_attr(field_name="path") copied_node.dump_to_file(filename=os.path.join(export_dir_destination, "book.json")) elif self.portion_class == "page": # Just dump the file. import shutil # TODO: Remove this branch once data migration is done. if hasattr(self, "path"): src_file = self.path # noinspection PyArgumentList os.makedirs(name=export_dir_destination, exist_ok=True) shutil.copyfile(os.path.join(db_interface.external_file_store, src_file), os.path.join(export_dir_destination, "content" + os.path.splitext( os.path.basename(src_file))[1])) else: for f in self.list_files(db_interface=db_interface): # noinspection PyArgumentList os.makedirs(name=export_dir_destination, exist_ok=True) shutil.copyfile(f, os.path.join(export_dir_destination, os.path.basename(f))) for sub_portion in book_node.children: sub_portion.content.dump_book_portion(export_dir=export_dir, db_interface=db_interface)
# Essential for depickling to work. common.update_json_class_index(sys.modules[__name__]) logging.debug(common.json_class_index)