Spaces:

GopalKrushnaMahapatra
/

TrueWrite-Scan-Backend

Sleeping

App Files Files Community

GopalKrushnaMahapatra commited on 5 days ago

Commit

e1acb44

verified ·

1 Parent(s): d070828

Update pdf_reports.py

Browse files

Files changed (1) hide show

pdf_reports.py +177 -187

pdf_reports.py CHANGED Viewed

@@ -1,196 +1,186 @@
 import os
 from reportlab.lib.pagesizes import A4
 from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
-from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_LEFT, TA_JUSTIFY
 from reportlab.lib import colors
-from reportlab.platypus import BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, Table, TableStyle, KeepTogether
 from reportlab.pdfbase.ttfonts import TTFont
 from reportlab.pdfbase import pdfmetrics
-from datetime import datetime
-# Ensure output directory exists
-out_dir = "/mnt/data"
-os.makedirs(out_dir, exist_ok=True)
-# Register a serif-like font if available (fallback to Times)
-body_font = 'Times-Roman'
 try:
-    font_path = '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf'
-    if os.path.exists(font_path):
-        pdfmetrics.registerFont(TTFont('DejaVuSerif', font_path))
-        body_font = 'DejaVuSerif'
-    else:
-        # try common alternative (mac)
-        font_path2 = '/Library/Fonts/DejaVuSerif.ttf'
-        if os.path.exists(font_path2):
-            pdfmetrics.registerFont(TTFont('DejaVuSerif', font_path2))
-            body_font = 'DejaVuSerif'
 except Exception:
-    body_font = 'Times-Roman'
-pdf_path = os.path.join(out_dir, "TrueWriteScan_Duplichecker_PixelPerfect_Report.pdf")
-PAGE_WIDTH, PAGE_HEIGHT = A4
-MARGIN = 36
-usable_width = PAGE_WIDTH - 2*MARGIN
-styles = getSampleStyleSheet()
-styles.add(ParagraphStyle(name='ReportTitle', fontName=body_font, fontSize=18, alignment=TA_CENTER, leading=22))
-styles.add(ParagraphStyle(name='SmallRight', fontName=body_font, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#555555")))
-styles.add(ParagraphStyle(name='TileBig', fontName=body_font, fontSize=30, alignment=TA_CENTER, leading=32))
-styles.add(ParagraphStyle(name='TileLabel', fontName=body_font, fontSize=10, alignment=TA_CENTER, textColor=colors.HexColor("#666666")))
-styles.add(ParagraphStyle(name='SectionHeading', fontName=body_font, fontSize=13, spaceBefore=8, spaceAfter=4, leading=15))
-styles.add(ParagraphStyle(name='Body', fontName=body_font, fontSize=11, leading=15, alignment=TA_JUSTIFY))
-styles.add(ParagraphStyle(name='HighlightYellow', fontName=body_font, fontSize=11, leading=15, backColor=colors.HexColor("#fff3b0"), alignment=TA_JUSTIFY))
-styles.add(ParagraphStyle(name='HighlightRed', fontName=body_font, fontSize=11, leading=15, backColor=colors.HexColor("#ffd6d6"), alignment=TA_JUSTIFY))
-styles.add(ParagraphStyle(name='Footer', fontName=body_font, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#666666")))
-styles.add(ParagraphStyle(name='MatchedHeader', fontName=body_font, fontSize=12, leading=14, alignment=TA_LEFT, spaceBefore=6, spaceAfter=6))
-def header_footer(canvas, doc):
-    canvas.saveState()
-    date_str = datetime.now().strftime("%d %B %Y, %H:%M")
-    canvas.setFont(body_font, 9)
-    canvas.setFillColor(colors.HexColor("#555555"))
-    canvas.drawString(MARGIN, PAGE_HEIGHT - MARGIN + 8, f"Date: {date_str}")
-    canvas.setFont(body_font, 16)
-    canvas.setFillColor(colors.black)
-    canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT - MARGIN + 4, "Plagiarism Scan Report")
-    canvas.setFont(body_font, 9)
-    canvas.setFillColor(colors.HexColor("#666666"))
-    canvas.drawRightString(PAGE_WIDTH - MARGIN, MARGIN - 10, f"Page {doc.page}")
-    canvas.restoreState()
-doc = BaseDocTemplate(pdf_path, pagesize=A4,
-                      leftMargin=MARGIN, rightMargin=MARGIN,
-                      topMargin=MARGIN, bottomMargin=MARGIN)
-frame = Frame(MARGIN, MARGIN, usable_width, PAGE_HEIGHT - 2*MARGIN, id='normal')
-template = PageTemplate(id='report', frames=[frame], onPage=header_footer)
-doc.addPageTemplates([template])
-# Optionally set metadata
-doc.title = "TrueWrite Scan — Plagiarism Report"
-doc.author = "TrueWrite Scan"
-story = []
-# Tiles (colors matched to Duplichecker-like look)
-tiles_data = [
-    [Paragraph("<b>12%</b>", styles['TileBig']), Paragraph("<b>4%</b>", styles['TileBig']), Paragraph("<b>8%</b>", styles['TileBig']), Paragraph("<b>88%</b>", styles['TileBig'])],
-    [Paragraph("Plagiarism", styles['TileLabel']), Paragraph("Exact Match", styles['TileLabel']), Paragraph("Partial Match", styles['TileLabel']), Paragraph("Unique", styles['TileLabel'])]
-]
-tiles_table = Table(tiles_data, colWidths=[usable_width/4.0]*4, rowHeights=[46, 18])
-tiles_table.setStyle(TableStyle([
-    ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f7f7f9")),
-    ('ALIGN', (0,0), (-1,-1), 'CENTER'),
-    ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
-    ('BOX', (0,0), (-1,-1), 0.6, colors.HexColor("#dddddd")),
-]))
-story.append(tiles_table)
-story.append(Spacer(1, 12))
-# Counts row
-counts = [
-    ['Words', 'Characters', 'Sentences', 'Paragraphs', 'Read Time'],
-    ['950', '7138', '43', '16', '5 minute(s)']
-]
-counts_table = Table(counts, colWidths=[usable_width/5.0]*5)
-counts_table.setStyle(TableStyle([
-    ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f4f6f7")),
-    ('ALIGN', (0,0), (-1,-1), 'CENTER'),
-    ('BOX', (0,0), (-1,-1), 0.5, colors.HexColor("#e6e6e6")),
-]))
-story.append(counts_table)
-story.append(Spacer(1, 12))
-# Sample content with highlighted spans (we'll highlight whole sentences as blocks)
-story.append(Paragraph("Abstract—", styles['SectionHeading']))
-para1 = ("Using AI, machine learning, and advanced computing together opens up a lot of possibilities to effectively tackle "
-         "major issues such as climate resilience and health equity.")
-para2 = ("The research looks at the ways in which AI can be a social developmental tool by aiding first responders during calamities "
-         "as well as healthcare personalization.")
-para3 = ("In particular, it mentions the use of deep learning and NLP for better prediction, efficient resource management, and improved accessibility of services.")
-para4 = ("Moreover, the paper points to the issues that revolve around the ethics of the technology and the need for transparent models.")
-story.append(Paragraph(para1, styles['Body']))
-story.append(Spacer(1, 6))
-story.append(Paragraph(para2, styles['HighlightYellow']))
-story.append(Spacer(1, 6))
-story.append(Paragraph(para3, styles['HighlightRed']))
-story.append(Spacer(1, 6))
-story.append(Paragraph(para4, styles['Body']))
-story.append(Spacer(1, 10))
-story.append(Paragraph("I. Introduction", styles['SectionHeading']))
-for _ in range(3):
-    story.append(Paragraph(para1 + " " + para2, styles['Body']))
-    story.append(Spacer(1, 6))
-story.append(Paragraph("II. Literature Review", styles['SectionHeading']))
-for i in range(4):
-    # Insert occasional highlighted sentences
-    story.append(Paragraph(para2, styles['Body']))
-    story.append(Paragraph(para3, styles['HighlightYellow'] if i % 2 == 0 else styles['Body']))
-    story.append(Spacer(1, 6))
-story.append(Paragraph("III. Methodology", styles['SectionHeading']))
-for _ in range(4):
-    story.append(Paragraph(para3, styles['Body']))
-    story.append(Spacer(1, 6))
-story.append(Spacer(1, 12))
-# Matched sources header
-story.append(Paragraph("Matched Sources", styles['MatchedHeader']))
-matched_sources = [
-    {"title": "AI for Social Good", "url": "https://example.com/ai-social-good", "similarity": "42%"},
-    {"title": "Deep Learning Predictions", "url": "https://example.org/deep-learning", "similarity": "67%"},
-    {"title": "NLP and Resource Management", "url": "https://sample.org/nlp-resource-management", "similarity": "18%"}
-]
-ms_table_data = [["#", "Source Title", "URL", "Similarity"]]
-for i, ms in enumerate(matched_sources, start=1):
-    title_par = Paragraph(ms['title'], styles['Body'])
-    # Use <a href="..."> for clickable link
-    url_par = Paragraph(f'<a href="{ms["url"]}">{ms["url"]}</a>', styles['Body'])
-    ms_table_data.append([str(i), title_par, url_par, ms['similarity']])
-ms_table = Table(ms_table_data, colWidths=[30, usable_width*0.35, usable_width*0.45, usable_width*0.15])
-ms_table.setStyle(TableStyle([
-    ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f2f4f5")),
-    ('TEXTCOLOR', (0,0), (-1,0), colors.HexColor("#333333")),
-    ('ALIGN', (0,0), (-1,0), 'CENTER'),
-    ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
-    ('BOX', (0,0), (-1,-1), 0.6, colors.HexColor("#e0e0e0")),
-    ('INNERGRID', (0,0), (-1,-1), 0.4, colors.HexColor("#efefef")),
-    ('LEFTPADDING', (1,1), (1,-1), 6),
-    ('LEFTPADDING', (2,1), (2,-1), 6),
-    ('WORDWRAP', (2,1), (2,-1), 'CJK')  # helps long URLs wrap
-]))
-story.append(ms_table)
-story.append(Spacer(1, 14))
-# Matched Source final block
-matched_table = Table(
-    [[Paragraph("<b>Matched Source Overview</b>", styles['Body'])],
-     [Paragraph("Congratulations! Sections highlighted indicate similarity with external sources. Please review matched entries for exact references.", styles['Body'])]],
-    colWidths=[usable_width]
-)
-matched_table.setStyle(TableStyle([
-    ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f7fafb")),
-    ('BOX', (0,0), (-1,-1), 0.5, colors.HexColor("#e6e6e6")),
-    ('LEFTPADDING', (0,0), (-1,-1), 8),
-    ('RIGHTPADDING', (0,0), (-1,-1), 8),
-    ('TOPPADDING', (0,0), (-1,-1), 6),
-    ('BOTTOMPADDING', (0,0), (-1,-1), 6),
-]))
-story.append(matched_table)
-story.append(Spacer(1, 24))
-story.append(Paragraph("Generated by TrueWrite Scan • https://gopalkrushnamahapatra-truewrite-scan.static.hf.space", styles['Footer']))
-# Build PDF
-doc.build(story)
-print("PDF written to:", pdf_path)

+# pdf_reports.py
 import os
+import uuid
+from datetime import datetime
 from reportlab.lib.pagesizes import A4
 from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
+from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_JUSTIFY, TA_LEFT
 from reportlab.lib import colors
+from reportlab.platypus import (
+    BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, Table, TableStyle
+)
 from reportlab.pdfbase.ttfonts import TTFont
 from reportlab.pdfbase import pdfmetrics
+# Try to load a nicer serif; fallback to Times
 try:
+    pdfmetrics.registerFont(TTFont('DejaVuSerif', '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf'))
+    BODY_FONT = 'DejaVuSerif'
 except Exception:
+    BODY_FONT = 'Times-Roman'
+def _build_doc(filepath, title_text, tiles, counts, sections, matched_sources, footer_text):
+    PAGE_WIDTH, PAGE_HEIGHT = A4
+    MARGIN = 36
+    usable_width = PAGE_WIDTH - 2 * MARGIN
+    styles = getSampleStyleSheet()
+    styles.add(ParagraphStyle(name='ReportTitle', fontName=BODY_FONT, fontSize=18, alignment=TA_CENTER, leading=22))
+    styles.add(ParagraphStyle(name='SmallRight', fontName=BODY_FONT, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#555555")))
+    styles.add(ParagraphStyle(name='TileBig', fontName=BODY_FONT, fontSize=30, alignment=TA_CENTER, leading=32))
+    styles.add(ParagraphStyle(name='TileLabel', fontName=BODY_FONT, fontSize=10, alignment=TA_CENTER, textColor=colors.HexColor("#666666")))
+    styles.add(ParagraphStyle(name='SectionHeading', fontName=BODY_FONT, fontSize=13, spaceBefore=8, spaceAfter=4, leading=15))
+    styles.add(ParagraphStyle(name='Body', fontName=BODY_FONT, fontSize=11, leading=15, alignment=TA_JUSTIFY))
+    styles.add(ParagraphStyle(name='HighlightYellow', fontName=BODY_FONT, fontSize=11, leading=15, backColor=colors.HexColor("#fff3b0"), alignment=TA_JUSTIFY))
+    styles.add(ParagraphStyle(name='HighlightRed', fontName=BODY_FONT, fontSize=11, leading=15, backColor=colors.HexColor("#ffd6d6"), alignment=TA_JUSTIFY))
+    styles.add(ParagraphStyle(name='Footer', fontName=BODY_FONT, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#666666")))
+    styles.add(ParagraphStyle(name='MatchedHeader', fontName=BODY_FONT, fontSize=12, leading=14, alignment=TA_LEFT, spaceBefore=6, spaceAfter=6))
+    def header_footer(canvas, doc):
+        canvas.saveState()
+        date_str = datetime.now().strftime("%d %B %Y, %H:%M")
+        canvas.setFont(BODY_FONT, 9)
+        canvas.setFillColor(colors.HexColor("#555555"))
+        canvas.drawString(MARGIN, PAGE_HEIGHT - MARGIN + 8, f"Date: {date_str}")
+        canvas.setFont(BODY_FONT, 16)
+        canvas.setFillColor(colors.black)
+        canvas.drawCentredString(PAGE_WIDTH / 2.0, PAGE_HEIGHT - MARGIN + 4, title_text)
+        canvas.setFont(BODY_FONT, 9)
+        canvas.setFillColor(colors.HexColor("#666666"))
+        canvas.drawRightString(PAGE_WIDTH - MARGIN, MARGIN - 10, f"Page {doc.page}")
+        canvas.restoreState()
+    doc = BaseDocTemplate(filepath, pagesize=A4,
+                          leftMargin=MARGIN, rightMargin=MARGIN,
+                          topMargin=MARGIN, bottomMargin=MARGIN)
+    frame = Frame(MARGIN, MARGIN, usable_width, PAGE_HEIGHT - 2 * MARGIN, id='normal')
+    template = PageTemplate(id='report', frames=[frame], onPage=header_footer)
+    doc.addPageTemplates([template])
+    story = []
+    # Tiles (4 small summary tiles)
+    tile_values = tiles  # list of 4 dicts: {'value': '12%', 'label': 'Plagiarism'}
+    tiles_data = [
+        [Paragraph(f"<b>{tile_values[i]['value']}</b>", styles['TileBig']) for i in range(4)],
+        [Paragraph(tile_values[i]['label'], styles['TileLabel']) for i in range(4)]
+    ]
+    tiles_table = Table(tiles_data, colWidths=[usable_width / 4.0] * 4, rowHeights=[46, 18])
+    tiles_table.setStyle(TableStyle([
+        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f7f7f9")),
+        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
+        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
+        ('BOX', (0, 0), (-1, -1), 0.6, colors.HexColor("#dddddd")),
+    ]))
+    story.append(tiles_table)
+    story.append(Spacer(1, 12))
+    # Counts row
+    if counts:
+        counts_table = Table([list(counts.keys()), list(counts.values())],
+                             colWidths=[usable_width / len(counts)] * len(counts))
+        counts_table.setStyle(TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f4f6f7")),
+            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
+            ('BOX', (0, 0), (-1, -1), 0.5, colors.HexColor("#e6e6e6")),
+        ]))
+        story.append(counts_table)
+        story.append(Spacer(1, 12))
+    # Sections + highlighting: sections is list of dicts: {'heading': 'Abstract', 'paragraphs': [p1, p2...]}
+    for sec in sections or []:
+        if sec.get('heading'):
+            story.append(Paragraph(sec['heading'], styles['SectionHeading']))
+        for para in sec.get('paragraphs', []):
+            # para may be dict {'text': '...', 'highlight':'yellow'/'red'/None}
+            if isinstance(para, dict):
+                text = para.get('text', '')
+                hl = para.get('highlight')
+                if hl == 'yellow':
+                    story.append(Paragraph(text, styles['HighlightYellow']))
+                elif hl == 'red':
+                    story.append(Paragraph(text, styles['HighlightRed']))
+                else:
+                    story.append(Paragraph(text, styles['Body']))
+            else:
+                story.append(Paragraph(para, styles['Body']))
+            story.append(Spacer(1, 6))
+    story.append(Spacer(1, 10))
+    # Matched Sources table (if any)
+    if matched_sources:
+        story.append(Paragraph("Matched Sources", styles['MatchedHeader']))
+        ms_table_data = [["#", "Source Title", "URL", "Similarity"]]
+        for i, ms in enumerate(matched_sources, start=1):
+            title_par = Paragraph(ms.get('title', ''), styles['Body'])
+            url_par = Paragraph(f'<link href="{ms.get("url", "")}">{ms.get("url", "")}</link>', styles['Body'])
+            ms_table_data.append([str(i), title_par, url_par, ms.get('similarity', '')])
+        ms_table = Table(ms_table_data, colWidths=[30, usable_width * 0.35, usable_width * 0.45, usable_width * 0.15])
+        ms_table.setStyle(TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f2f4f5")),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor("#333333")),
+            ('ALIGN', (0, 0), (-1, 0), 'CENTER'),
+            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
+            ('BOX', (0, 0), (-1, -1), 0.6, colors.HexColor("#e0e0e0")),
+            ('INNERGRID', (0, 0), (-1, -1), 0.4, colors.HexColor("#efefef")),
+            ('LEFTPADDING', (1, 1), (1, -1), 6),
+            ('LEFTPADDING', (2, 1), (2, -1), 6),
+        ]))
+        story.append(ms_table)
+        story.append(Spacer(1, 14))
+    # Matched Source Overview block (footer_text)
+    if footer_text:
+        matched_table = Table(
+            [[Paragraph("<b>Matched Source Overview</b>", styles['Body'])],
+             [Paragraph(footer_text, styles['Body'])]],
+            colWidths=[usable_width]
+        )
+        matched_table.setStyle(TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f7fafb")),
+            ('BOX', (0, 0), (-1, -1), 0.5, colors.HexColor("#e6e6e6")),
+            ('LEFTPADDING', (0, 0), (-1, -1), 8),
+            ('RIGHTPADDING', (0, 0), (-1, -1), 8),
+            ('TOPPADDING', (0, 0), (-1, -1), 6),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
+        ]))
+        story.append(matched_table)
+        story.append(Spacer(1, 24))
+    story.append(Paragraph("Generated by TrueWrite Scan • https://gopalkrushnamahapatra-truewrite-scan.static.hf.space", styles['Footer']))
+    doc.build(story)
+def generate_report(report_type: str, out_dir: str = "/tmp", **kwargs) -> str:
+    """
+    report_type: "ai" | "grammar" | "plagiarism"
+    kwargs expected:
+      - title_text: str
+      - tiles: list of 4 dicts [{'value': '12%', 'label': 'Plagiarism'}, ...]
+      - counts: dict {'Words': 950, ...}
+      - sections: list [{'heading':'','paragraphs':[...]}]
+      - matched_sources: list [{'title','url','similarity'}]
+      - footer_text: str
+    Returns: path to generated PDF
+    """
+    os.makedirs(out_dir, exist_ok=True)
+    filename = f"{report_type}_report_{uuid.uuid4().hex[:8]}.pdf"
+    filepath = os.path.join(out_dir, filename)
+    title_text = kwargs.get('title_text', "Report")
+    tiles = kwargs.get('tiles') or [
+        {'value': '0%', 'label': 'Plagiarism'},
+        {'value': '0%', 'label': 'Exact Match'},
+        {'value': '0%', 'label': 'Partial Match'},
+        {'value': '100%', 'label': 'Unique'},
+    ]
+    counts = kwargs.get('counts') or {}
+    sections = kwargs.get('sections') or []
+    matched_sources = kwargs.get('matched_sources') or []
+    footer_text = kwargs.get('footer_text') or ''
+    _build_doc(filepath, title_text, tiles, counts, sections, matched_sources, footer_text)
+    return filepath