Catalog Database (v1.0.0)
Primary database for product catalog information.
Overview
The Catalog Database is the authoritative source for all product catalog information in the BookWorm system. Built on PostgreSQL 18, it manages the complete lifecycle of book products, including their metadata, categorization, authorship, and publisher information. This database serves as the backbone of the product catalog service, enabling efficient product discovery, management, and synchronization across the BookWorm platform.
BEGIN;
CREATE TABLE IF NOT EXISTS public."__EFMigrationsHistory"( migration_id character varying(150) COLLATE pg_catalog."default" NOT NULL, product_version character varying(32) COLLATE pg_catalog."default" NOT NULL, CONSTRAINT pk___ef_migrations_history PRIMARY KEY (migration_id));
CREATE TABLE IF NOT EXISTS public.authors( id uuid NOT NULL DEFAULT uuidv7(), name character varying(100) COLLATE pg_catalog."default" NOT NULL, CONSTRAINT pk_authors PRIMARY KEY (id));
CREATE TABLE IF NOT EXISTS public.book_authors( id uuid NOT NULL DEFAULT uuidv7(), author_id uuid NOT NULL, book_id uuid NOT NULL, CONSTRAINT pk_book_authors PRIMARY KEY (id));
CREATE TABLE IF NOT EXISTS public.books( id uuid NOT NULL DEFAULT uuidv7(), name character varying(50) COLLATE pg_catalog."default" NOT NULL, description character varying(1000) COLLATE pg_catalog."default" NOT NULL, image character varying(255) COLLATE pg_catalog."default", price_original_price numeric(18, 2), price_discount_price numeric(18, 2), status smallint NOT NULL, average_rating double precision NOT NULL DEFAULT 0.0, total_reviews integer NOT NULL, category_id uuid, publisher_id uuid, is_deleted boolean NOT NULL, created_at timestamp with time zone NOT NULL DEFAULT (now() AT TIME ZONE 'UTC'::text), last_modified_at timestamp with time zone DEFAULT (now() AT TIME ZONE 'UTC'::text), CONSTRAINT pk_books PRIMARY KEY (id));
CREATE TABLE IF NOT EXISTS public.categories( id uuid NOT NULL DEFAULT uuidv7(), name character varying(50) COLLATE pg_catalog."default" NOT NULL, CONSTRAINT pk_categories PRIMARY KEY (id));
CREATE TABLE IF NOT EXISTS public.inbox_state( id bigint NOT NULL GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), message_id uuid NOT NULL, consumer_id uuid NOT NULL, lock_id uuid NOT NULL, row_version bytea, received timestamp with time zone NOT NULL, receive_count integer NOT NULL, expiration_time timestamp with time zone, consumed timestamp with time zone, delivered timestamp with time zone, last_sequence_number bigint, CONSTRAINT pk_inbox_state PRIMARY KEY (id), CONSTRAINT ak_inbox_state_message_id_consumer_id UNIQUE (message_id, consumer_id));
CREATE TABLE IF NOT EXISTS public.outbox_message( sequence_number bigint NOT NULL GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), enqueue_time timestamp with time zone, sent_time timestamp with time zone NOT NULL, headers text COLLATE pg_catalog."default", properties text COLLATE pg_catalog."default", inbox_message_id uuid, inbox_consumer_id uuid, outbox_id uuid, message_id uuid NOT NULL, content_type character varying(256) COLLATE pg_catalog."default" NOT NULL, message_type text COLLATE pg_catalog."default" NOT NULL, body text COLLATE pg_catalog."default" NOT NULL, conversation_id uuid, correlation_id uuid, initiator_id uuid, request_id uuid, source_address character varying(256) COLLATE pg_catalog."default", destination_address character varying(256) COLLATE pg_catalog."default", response_address character varying(256) COLLATE pg_catalog."default", fault_address character varying(256) COLLATE pg_catalog."default", expiration_time timestamp with time zone, CONSTRAINT pk_outbox_message PRIMARY KEY (sequence_number));
CREATE TABLE IF NOT EXISTS public.outbox_state( outbox_id uuid NOT NULL, lock_id uuid NOT NULL, row_version bytea, created timestamp with time zone NOT NULL, delivered timestamp with time zone, last_sequence_number bigint, CONSTRAINT pk_outbox_state PRIMARY KEY (outbox_id));
CREATE TABLE IF NOT EXISTS public.publishers( id uuid NOT NULL DEFAULT uuidv7(), name character varying(100) COLLATE pg_catalog."default" NOT NULL, CONSTRAINT pk_publishers PRIMARY KEY (id));
ALTER TABLE IF EXISTS public.book_authors ADD CONSTRAINT fk_book_authors_authors_author_id FOREIGN KEY (author_id) REFERENCES public.authors (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE RESTRICT;CREATE INDEX IF NOT EXISTS ix_book_authors_author_id ON public.book_authors(author_id);
ALTER TABLE IF EXISTS public.book_authors ADD CONSTRAINT fk_book_authors_books_book_id FOREIGN KEY (book_id) REFERENCES public.books (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE CASCADE;CREATE INDEX IF NOT EXISTS ix_book_authors_book_id ON public.book_authors(book_id);
ALTER TABLE IF EXISTS public.books ADD CONSTRAINT fk_books_categories_category_id FOREIGN KEY (category_id) REFERENCES public.categories (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE SET NULL;CREATE INDEX IF NOT EXISTS ix_books_category_id ON public.books(category_id);
ALTER TABLE IF EXISTS public.books ADD CONSTRAINT fk_books_publishers_publisher_id FOREIGN KEY (publisher_id) REFERENCES public.publishers (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE SET NULL;CREATE INDEX IF NOT EXISTS ix_books_publisher_id ON public.books(publisher_id);
ALTER TABLE IF EXISTS public.outbox_message ADD CONSTRAINT fk_outbox_message_inbox_state_inbox_message_id_inbox_consumer_ FOREIGN KEY (inbox_message_id, inbox_consumer_id) REFERENCES public.inbox_state (message_id, consumer_id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION;
ALTER TABLE IF EXISTS public.outbox_message ADD CONSTRAINT fk_outbox_message_outbox_state_outbox_id FOREIGN KEY (outbox_id) REFERENCES public.outbox_state (outbox_id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION;
END;-- Business Intelligence Queries for Catalog Database-- These queries provide valuable insights for business decision-making
-- ============================================-- INVENTORY & PRODUCT MANAGEMENT-- ============================================
-- 1. Top 10 Best-Selling Books by Average Rating-- Purpose: Identify high-performing products for promotion and restockingSELECT b.id, b.name, b.average_rating, b.total_reviews, b.price_original_price, b.price_discount_price, c.name AS category, p.name AS publisher, b.statusFROM books bLEFT JOIN categories c ON b.category_id = c.idLEFT JOIN publishers p ON b.publisher_id = p.idWHERE b.is_deleted = FALSE AND b.total_reviews > 10 -- Minimum reviews for statistical significanceORDER BY b.average_rating DESC, b.total_reviews DESCLIMIT 10;
-- 2. Books with Low Stock Status-- Purpose: Identify products that need restockingSELECT b.id, b.name, b.status, b.average_rating, b.total_reviews, p.name AS publisherFROM books bLEFT JOIN publishers p ON b.publisher_id = p.idWHERE b.is_deleted = FALSE AND b.status != 0 -- Assuming 0 = InStockORDER BY b.total_reviews DESC;
-- 3. Books with Discounts and Their Performance-- Purpose: Analyze effectiveness of pricing strategiesSELECT b.id, b.name, b.price_original_price, b.price_discount_price, ROUND(((b.price_original_price - b.price_discount_price) / b.price_original_price * 100)::numeric, 2) AS discount_percentage, b.average_rating, b.total_reviews, c.name AS categoryFROM books bLEFT JOIN categories c ON b.category_id = c.idWHERE b.is_deleted = FALSE AND b.price_discount_price IS NOT NULL AND b.price_discount_price < b.price_original_priceORDER BY discount_percentage DESC;
-- 4. Books Without Reviews-- Purpose: Identify products that need marketing attention or customer engagementSELECT b.id, b.name, b.price_original_price, c.name AS category, p.name AS publisher, b.created_atFROM books bLEFT JOIN categories c ON b.category_id = c.idLEFT JOIN publishers p ON b.publisher_id = p.idWHERE b.is_deleted = FALSE AND b.total_reviews = 0ORDER BY b.created_at DESC;
-- ============================================-- CATEGORY ANALYSIS-- ============================================
-- 5. Category Performance Overview-- Purpose: Understand which categories drive the most engagementSELECT c.id, c.name AS category, COUNT(b.id) AS total_books, ROUND(AVG(b.average_rating)::numeric, 2) AS avg_category_rating, SUM(b.total_reviews) AS total_reviews, ROUND(AVG(b.price_original_price)::numeric, 2) AS avg_price, COUNT(CASE WHEN b.price_discount_price IS NOT NULL THEN 1 END) AS books_on_saleFROM categories cLEFT JOIN books b ON c.id = b.category_id AND b.is_deleted = FALSEGROUP BY c.id, c.nameORDER BY total_books DESC, total_reviews DESC;
-- 6. Most Popular Categories by Review Count-- Purpose: Identify trending categories for inventory planningSELECT c.name AS category, COUNT(b.id) AS book_count, SUM(b.total_reviews) AS total_reviews, ROUND(AVG(b.average_rating)::numeric, 2) AS avg_ratingFROM categories cINNER JOIN books b ON c.id = b.category_idWHERE b.is_deleted = FALSE AND b.total_reviews > 0GROUP BY c.nameORDER BY total_reviews DESCLIMIT 10;
-- 7. Categories with Highest Revenue Potential-- Purpose: Calculate potential revenue by categorySELECT c.name AS category, COUNT(b.id) AS total_books, ROUND(SUM(COALESCE(b.price_discount_price, b.price_original_price))::numeric, 2) AS total_inventory_value, ROUND(AVG(COALESCE(b.price_discount_price, b.price_original_price))::numeric, 2) AS avg_selling_price, SUM(b.total_reviews) AS customer_interestFROM categories cINNER JOIN books b ON c.id = b.category_idWHERE b.is_deleted = FALSEGROUP BY c.nameORDER BY total_inventory_value DESC;
-- ============================================-- AUTHOR & PUBLISHER INSIGHTS-- ============================================
-- 8. Top Authors by Book Count and Average Rating-- Purpose: Identify prolific and well-reviewed authors for partnershipsSELECT a.id, a.name AS author, COUNT(DISTINCT ba.book_id) AS books_count, ROUND(AVG(b.average_rating)::numeric, 2) AS avg_rating, SUM(b.total_reviews) AS total_reviewsFROM authors aINNER JOIN book_authors ba ON a.id = ba.author_idINNER JOIN books b ON ba.book_id = b.idWHERE b.is_deleted = FALSEGROUP BY a.id, a.nameHAVING COUNT(DISTINCT ba.book_id) > 0ORDER BY books_count DESC, avg_rating DESCLIMIT 20;
-- 9. Publisher Performance Scorecard-- Purpose: Evaluate publisher relationships and catalog qualitySELECT p.id, p.name AS publisher, COUNT(b.id) AS total_books, COUNT(CASE WHEN b.status = 0 THEN 1 END) AS in_stock_books, ROUND(AVG(b.average_rating)::numeric, 2) AS avg_rating, SUM(b.total_reviews) AS total_reviews, ROUND(AVG(b.price_original_price)::numeric, 2) AS avg_price, COUNT(CASE WHEN b.price_discount_price IS NOT NULL THEN 1 END) AS discounted_booksFROM publishers pLEFT JOIN books b ON p.id = b.publisher_id AND b.is_deleted = FALSEGROUP BY p.id, p.nameORDER BY total_books DESC, avg_rating DESC;
-- 10. Co-Authored Books Analysis-- Purpose: Understand collaboration patterns and their successSELECT b.id, b.name AS book_title, b.average_rating, b.total_reviews, COUNT(ba.author_id) AS author_count, STRING_AGG(a.name, ', ' ORDER BY a.name) AS authorsFROM books bINNER JOIN book_authors ba ON b.id = ba.book_idINNER JOIN authors a ON ba.author_id = a.idWHERE b.is_deleted = FALSEGROUP BY b.id, b.name, b.average_rating, b.total_reviewsHAVING COUNT(ba.author_id) > 1ORDER BY b.average_rating DESC, b.total_reviews DESC;
-- ============================================-- PRICING ANALYTICS-- ============================================
-- 11. Price Range Distribution-- Purpose: Understand pricing strategy and market positioningSELECT CASE WHEN b.price_original_price < 10 THEN 'Budget (< $10)' WHEN b.price_original_price BETWEEN 10 AND 20 THEN 'Economy ($10-20)' WHEN b.price_original_price BETWEEN 20 AND 35 THEN 'Standard ($20-35)' WHEN b.price_original_price BETWEEN 35 AND 50 THEN 'Premium ($35-50)' ELSE 'Luxury (> $50)' END AS price_range, COUNT(*) AS book_count, ROUND(AVG(b.average_rating)::numeric, 2) AS avg_rating, SUM(b.total_reviews) AS total_reviewsFROM books bWHERE b.is_deleted = FALSE AND b.price_original_price IS NOT NULLGROUP BY price_rangeORDER BY MIN(b.price_original_price);
-- 12. Discount Strategy Effectiveness-- Purpose: Analyze if discounts correlate with higher engagementSELECT CASE WHEN b.price_discount_price IS NULL THEN 'No Discount' WHEN ((b.price_original_price - b.price_discount_price) / b.price_original_price * 100) < 10 THEN 'Small (< 10%)' WHEN ((b.price_original_price - b.price_discount_price) / b.price_original_price * 100) BETWEEN 10 AND 25 THEN 'Medium (10-25%)' WHEN ((b.price_original_price - b.price_discount_price) / b.price_original_price * 100) BETWEEN 25 AND 50 THEN 'Large (25-50%)' ELSE 'Very Large (> 50%)' END AS discount_tier, COUNT(*) AS book_count, ROUND(AVG(b.average_rating)::numeric, 2) AS avg_rating, ROUND(AVG(b.total_reviews)::numeric, 2) AS avg_reviews_per_bookFROM books bWHERE b.is_deleted = FALSEGROUP BY discount_tierORDER BY CASE discount_tier WHEN 'No Discount' THEN 1 WHEN 'Small (< 10%)' THEN 2 WHEN 'Medium (10-25%)' THEN 3 WHEN 'Large (25-50%)' THEN 4 ELSE 5 END;
-- ============================================-- QUALITY & ENGAGEMENT METRICS-- ============================================
-- 13. Books with Rating-Review Mismatch-- Purpose: Identify potential fake reviews or quality issuesSELECT b.id, b.name, b.average_rating, b.total_reviews, c.name AS category, CASE WHEN b.average_rating >= 4.5 AND b.total_reviews < 5 THEN 'High rating, low reviews - Verify quality' WHEN b.average_rating < 3.0 AND b.total_reviews > 50 THEN 'Low rating, many reviews - Quality issue' WHEN b.average_rating >= 4.0 AND b.total_reviews > 100 THEN 'Proven bestseller' END AS quality_flagFROM books bLEFT JOIN categories c ON b.category_id = c.idWHERE b.is_deleted = FALSE AND ( (b.average_rating >= 4.5 AND b.total_reviews < 5) OR (b.average_rating < 3.0 AND b.total_reviews > 50) OR (b.average_rating >= 4.0 AND b.total_reviews > 100) )ORDER BY b.total_reviews DESC;
-- 14. New Releases Performance (Last 90 Days)-- Purpose: Track how new additions are performingSELECT b.id, b.name, b.created_at, b.average_rating, b.total_reviews, b.price_original_price, c.name AS category, p.name AS publisher, EXTRACT(DAY FROM (NOW() - b.created_at)) AS days_in_catalogFROM books bLEFT JOIN categories c ON b.category_id = c.idLEFT JOIN publishers p ON b.publisher_id = p.idWHERE b.is_deleted = FALSE AND b.created_at >= NOW() - INTERVAL '90 days'ORDER BY b.total_reviews DESC, b.average_rating DESC;
-- ============================================-- OPERATIONAL QUERIES-- ============================================
-- 15. Orphaned Books (No Category or Publisher)-- Purpose: Data quality check for incomplete recordsSELECT b.id, b.name, b.category_id, b.publisher_id, b.created_at, CASE WHEN b.category_id IS NULL AND b.publisher_id IS NULL THEN 'Missing Both' WHEN b.category_id IS NULL THEN 'Missing Category' WHEN b.publisher_id IS NULL THEN 'Missing Publisher' END AS data_issueFROM books bWHERE b.is_deleted = FALSE AND (b.category_id IS NULL OR b.publisher_id IS NULL)ORDER BY b.created_at DESC;
-- 16. Books with No Authors-- Purpose: Data integrity checkSELECT b.id, b.name, b.created_at, c.name AS category, p.name AS publisherFROM books bLEFT JOIN categories c ON b.category_id = c.idLEFT JOIN publishers p ON b.publisher_id = p.idLEFT JOIN book_authors ba ON b.id = ba.book_idWHERE b.is_deleted = FALSE AND ba.id IS NULLORDER BY b.created_at DESC;
-- 17. Soft Deleted Books Report-- Purpose: Review deleted items for potential recovery or final purgeSELECT b.id, b.name, b.last_modified_at AS deleted_at, b.average_rating, b.total_reviews, c.name AS category, p.name AS publisherFROM books bLEFT JOIN categories c ON b.category_id = c.idLEFT JOIN publishers p ON b.publisher_id = p.idWHERE b.is_deleted = TRUEORDER BY b.last_modified_at DESC;
-- ============================================-- TRENDING & RECOMMENDATIONS-- ============================================
-- 18. Rising Stars (Good ratings with growing review counts)-- Purpose: Identify books gaining traction for marketing campaignsSELECT b.id, b.name, b.average_rating, b.total_reviews, b.price_original_price, c.name AS category, p.name AS publisher, ROUND(((b.average_rating * b.total_reviews) / NULLIF(EXTRACT(DAY FROM (NOW() - b.created_at)), 0))::numeric, 2) AS momentum_scoreFROM books bLEFT JOIN categories c ON b.category_id = c.idLEFT JOIN publishers p ON b.publisher_id = p.idWHERE b.is_deleted = FALSE AND b.average_rating >= 4.0 AND b.total_reviews >= 10 AND b.created_at >= NOW() - INTERVAL '180 days'ORDER BY momentum_score DESCLIMIT 20;
-- 19. Books Frequently Bought Together (Same Authors)-- Purpose: Create product recommendations and bundlesSELECT a.name AS author, COUNT(ba.book_id) AS books_by_author, STRING_AGG(DISTINCT b.name, ' | ' ORDER BY b.name) AS book_titles, ROUND(AVG(b.average_rating)::numeric, 2) AS avg_rating, SUM(b.total_reviews) AS total_reviewsFROM authors aINNER JOIN book_authors ba ON a.id = ba.author_idINNER JOIN books b ON ba.book_id = b.idWHERE b.is_deleted = FALSEGROUP BY a.id, a.nameHAVING COUNT(ba.book_id) >= 2ORDER BY books_by_author DESC, total_reviews DESCLIMIT 15;
-- 20. Category Cross-Sell Opportunities-- Purpose: Find authors who write in multiple categories for cross-promotionSELECT a.name AS author, COUNT(DISTINCT b.category_id) AS categories_count, STRING_AGG(DISTINCT c.name, ', ' ORDER BY c.name) AS categories, COUNT(DISTINCT ba.book_id) AS total_books, ROUND(AVG(b.average_rating)::numeric, 2) AS avg_ratingFROM authors aINNER JOIN book_authors ba ON a.id = ba.author_idINNER JOIN books b ON ba.book_id = b.id AND b.is_deleted = FALSELEFT JOIN categories c ON b.category_id = c.idGROUP BY a.id, a.nameHAVING COUNT(DISTINCT b.category_id) > 1ORDER BY categories_count DESC, total_books DESC;Schema Design
The database follows Domain-Driven Design (DDD) principles with the following aggregate roots:
Book Aggregate
The central entity representing books in the catalog system. Each book contains:
- Core Information: Name, description, and image
- Pricing: Base price and optional sale price (managed as a value object)
- Status: Current availability status (InStock, OutOfStock, etc.)
- Rating Metrics: Average rating and total reviews count
- Relationships: Links to Category, Publisher, and multiple Authors through BookAuthor junction table
Category Aggregate
Organizes books into logical groupings:
- Name: Unique category identifier (e.g., Fiction, Non-Fiction, Science, History)
- Used for product organization and filtering
Author Aggregate
Represents book authors in the system:
- Name: Author’s full name
- Books: Many-to-many relationship with books through BookAuthor junction table
- Supports co-authored books
Publisher Aggregate
Manages publisher information:
- Name: Publisher company name
- Books: One-to-many relationship with books
BookAuthor (Junction Entity)
Enables many-to-many relationships between books and authors, supporting co-authored publications. Includes soft delete capability for maintaining historical data integrity.
Key Features
Soft Delete Support
Books and BookAuthor entities implement soft delete patterns, ensuring:
- Historical data preservation for audit trails
- Safe deletion without breaking referential integrity
- Ability to recover accidentally deleted records
- Compliance with data retention policies
Audit Trail
All entities inherit auditable properties tracking:
- Creation timestamp
- Last modification timestamp
- Created by user
- Modified by user
Domain Events
The Book aggregate emits domain events (e.g., BookCreatedEvent) for:
- Event-driven architecture integration
- Cross-service communication
- Audit logging
- Real-time updates to other services (Basket, Ordering)
Data Classification & Governance
- Classification: Internal - Not exposed directly to external systems
- Access Mode: Read/Write - Full CRUD operations for authorized services
- Retention: 2 years - Aligns with business and regulatory requirements
- Residency: East Asia region - Optimized for primary user base location
- Authoritative: True - Single source of truth for catalog data
Performance Considerations
- Indexed on frequently queried fields (Name, CategoryId, PublisherId, Status)
- Optimized for read-heavy workloads typical of e-commerce catalog browsing
- Supports efficient filtering and searching through specification patterns
- Materialized rating data (AverageRating, TotalReviews) for fast retrieval
Integration Points
The Catalog Database integrates with:
- Basket Service: Product information for cart items
- Ordering Service: Product details for order processing
- Rating Service: Aggregated rating data updates
- Search Service: Product indexing for full-text search
- Finance Service: Pricing information for revenue calculations
Security
- Database access restricted to Catalog Service only
- Connection strings managed through secure configuration
- Row-level security policies for multi-tenant scenarios (if applicable)
- Encryption at rest for sensitive data
- Audit logging for all data modifications