GopalKrushnaMahapatra commited on
Commit
e1acb44
·
verified ·
1 Parent(s): d070828

Update pdf_reports.py

Browse files
Files changed (1) hide show
  1. pdf_reports.py +177 -187
pdf_reports.py CHANGED
@@ -1,196 +1,186 @@
 
1
  import os
 
 
2
  from reportlab.lib.pagesizes import A4
3
  from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
4
- from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_LEFT, TA_JUSTIFY
5
  from reportlab.lib import colors
6
- from reportlab.platypus import BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, Table, TableStyle, KeepTogether
 
 
7
  from reportlab.pdfbase.ttfonts import TTFont
8
  from reportlab.pdfbase import pdfmetrics
9
- from datetime import datetime
10
 
11
- # Ensure output directory exists
12
- out_dir = "/mnt/data"
13
- os.makedirs(out_dir, exist_ok=True)
14
-
15
- # Register a serif-like font if available (fallback to Times)
16
- body_font = 'Times-Roman'
17
  try:
18
- font_path = '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf'
19
- if os.path.exists(font_path):
20
- pdfmetrics.registerFont(TTFont('DejaVuSerif', font_path))
21
- body_font = 'DejaVuSerif'
22
- else:
23
- # try common alternative (mac)
24
- font_path2 = '/Library/Fonts/DejaVuSerif.ttf'
25
- if os.path.exists(font_path2):
26
- pdfmetrics.registerFont(TTFont('DejaVuSerif', font_path2))
27
- body_font = 'DejaVuSerif'
28
  except Exception:
29
- body_font = 'Times-Roman'
30
-
31
- pdf_path = os.path.join(out_dir, "TrueWriteScan_Duplichecker_PixelPerfect_Report.pdf")
32
- PAGE_WIDTH, PAGE_HEIGHT = A4
33
- MARGIN = 36
34
- usable_width = PAGE_WIDTH - 2*MARGIN
35
-
36
- styles = getSampleStyleSheet()
37
- styles.add(ParagraphStyle(name='ReportTitle', fontName=body_font, fontSize=18, alignment=TA_CENTER, leading=22))
38
- styles.add(ParagraphStyle(name='SmallRight', fontName=body_font, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#555555")))
39
- styles.add(ParagraphStyle(name='TileBig', fontName=body_font, fontSize=30, alignment=TA_CENTER, leading=32))
40
- styles.add(ParagraphStyle(name='TileLabel', fontName=body_font, fontSize=10, alignment=TA_CENTER, textColor=colors.HexColor("#666666")))
41
- styles.add(ParagraphStyle(name='SectionHeading', fontName=body_font, fontSize=13, spaceBefore=8, spaceAfter=4, leading=15))
42
- styles.add(ParagraphStyle(name='Body', fontName=body_font, fontSize=11, leading=15, alignment=TA_JUSTIFY))
43
- styles.add(ParagraphStyle(name='HighlightYellow', fontName=body_font, fontSize=11, leading=15, backColor=colors.HexColor("#fff3b0"), alignment=TA_JUSTIFY))
44
- styles.add(ParagraphStyle(name='HighlightRed', fontName=body_font, fontSize=11, leading=15, backColor=colors.HexColor("#ffd6d6"), alignment=TA_JUSTIFY))
45
- styles.add(ParagraphStyle(name='Footer', fontName=body_font, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#666666")))
46
- styles.add(ParagraphStyle(name='MatchedHeader', fontName=body_font, fontSize=12, leading=14, alignment=TA_LEFT, spaceBefore=6, spaceAfter=6))
47
-
48
- def header_footer(canvas, doc):
49
- canvas.saveState()
50
- date_str = datetime.now().strftime("%d %B %Y, %H:%M")
51
- canvas.setFont(body_font, 9)
52
- canvas.setFillColor(colors.HexColor("#555555"))
53
- canvas.drawString(MARGIN, PAGE_HEIGHT - MARGIN + 8, f"Date: {date_str}")
54
- canvas.setFont(body_font, 16)
55
- canvas.setFillColor(colors.black)
56
- canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT - MARGIN + 4, "Plagiarism Scan Report")
57
- canvas.setFont(body_font, 9)
58
- canvas.setFillColor(colors.HexColor("#666666"))
59
- canvas.drawRightString(PAGE_WIDTH - MARGIN, MARGIN - 10, f"Page {doc.page}")
60
- canvas.restoreState()
61
-
62
- doc = BaseDocTemplate(pdf_path, pagesize=A4,
63
- leftMargin=MARGIN, rightMargin=MARGIN,
64
- topMargin=MARGIN, bottomMargin=MARGIN)
65
-
66
- frame = Frame(MARGIN, MARGIN, usable_width, PAGE_HEIGHT - 2*MARGIN, id='normal')
67
- template = PageTemplate(id='report', frames=[frame], onPage=header_footer)
68
- doc.addPageTemplates([template])
69
-
70
- # Optionally set metadata
71
- doc.title = "TrueWrite Scan — Plagiarism Report"
72
- doc.author = "TrueWrite Scan"
73
-
74
- story = []
75
-
76
- # Tiles (colors matched to Duplichecker-like look)
77
- tiles_data = [
78
- [Paragraph("<b>12%</b>", styles['TileBig']), Paragraph("<b>4%</b>", styles['TileBig']), Paragraph("<b>8%</b>", styles['TileBig']), Paragraph("<b>88%</b>", styles['TileBig'])],
79
- [Paragraph("Plagiarism", styles['TileLabel']), Paragraph("Exact Match", styles['TileLabel']), Paragraph("Partial Match", styles['TileLabel']), Paragraph("Unique", styles['TileLabel'])]
80
- ]
81
- tiles_table = Table(tiles_data, colWidths=[usable_width/4.0]*4, rowHeights=[46, 18])
82
- tiles_table.setStyle(TableStyle([
83
- ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f7f7f9")),
84
- ('ALIGN', (0,0), (-1,-1), 'CENTER'),
85
- ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
86
- ('BOX', (0,0), (-1,-1), 0.6, colors.HexColor("#dddddd")),
87
- ]))
88
- story.append(tiles_table)
89
- story.append(Spacer(1, 12))
90
-
91
- # Counts row
92
- counts = [
93
- ['Words', 'Characters', 'Sentences', 'Paragraphs', 'Read Time'],
94
- ['950', '7138', '43', '16', '5 minute(s)']
95
- ]
96
- counts_table = Table(counts, colWidths=[usable_width/5.0]*5)
97
- counts_table.setStyle(TableStyle([
98
- ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f4f6f7")),
99
- ('ALIGN', (0,0), (-1,-1), 'CENTER'),
100
- ('BOX', (0,0), (-1,-1), 0.5, colors.HexColor("#e6e6e6")),
101
- ]))
102
- story.append(counts_table)
103
- story.append(Spacer(1, 12))
104
-
105
- # Sample content with highlighted spans (we'll highlight whole sentences as blocks)
106
- story.append(Paragraph("Abstract—", styles['SectionHeading']))
107
-
108
- para1 = ("Using AI, machine learning, and advanced computing together opens up a lot of possibilities to effectively tackle "
109
- "major issues such as climate resilience and health equity.")
110
- para2 = ("The research looks at the ways in which AI can be a social developmental tool by aiding first responders during calamities "
111
- "as well as healthcare personalization.")
112
- para3 = ("In particular, it mentions the use of deep learning and NLP for better prediction, efficient resource management, and improved accessibility of services.")
113
- para4 = ("Moreover, the paper points to the issues that revolve around the ethics of the technology and the need for transparent models.")
114
-
115
- story.append(Paragraph(para1, styles['Body']))
116
- story.append(Spacer(1, 6))
117
- story.append(Paragraph(para2, styles['HighlightYellow']))
118
- story.append(Spacer(1, 6))
119
- story.append(Paragraph(para3, styles['HighlightRed']))
120
- story.append(Spacer(1, 6))
121
- story.append(Paragraph(para4, styles['Body']))
122
- story.append(Spacer(1, 10))
123
-
124
- story.append(Paragraph("I. Introduction", styles['SectionHeading']))
125
- for _ in range(3):
126
- story.append(Paragraph(para1 + " " + para2, styles['Body']))
127
- story.append(Spacer(1, 6))
128
-
129
- story.append(Paragraph("II. Literature Review", styles['SectionHeading']))
130
- for i in range(4):
131
- # Insert occasional highlighted sentences
132
- story.append(Paragraph(para2, styles['Body']))
133
- story.append(Paragraph(para3, styles['HighlightYellow'] if i % 2 == 0 else styles['Body']))
134
- story.append(Spacer(1, 6))
135
-
136
- story.append(Paragraph("III. Methodology", styles['SectionHeading']))
137
- for _ in range(4):
138
- story.append(Paragraph(para3, styles['Body']))
139
- story.append(Spacer(1, 6))
140
-
141
- story.append(Spacer(1, 12))
142
-
143
- # Matched sources header
144
- story.append(Paragraph("Matched Sources", styles['MatchedHeader']))
145
-
146
- matched_sources = [
147
- {"title": "AI for Social Good", "url": "https://example.com/ai-social-good", "similarity": "42%"},
148
- {"title": "Deep Learning Predictions", "url": "https://example.org/deep-learning", "similarity": "67%"},
149
- {"title": "NLP and Resource Management", "url": "https://sample.org/nlp-resource-management", "similarity": "18%"}
150
- ]
151
-
152
- ms_table_data = [["#", "Source Title", "URL", "Similarity"]]
153
- for i, ms in enumerate(matched_sources, start=1):
154
- title_par = Paragraph(ms['title'], styles['Body'])
155
- # Use <a href="..."> for clickable link
156
- url_par = Paragraph(f'<a href="{ms["url"]}">{ms["url"]}</a>', styles['Body'])
157
- ms_table_data.append([str(i), title_par, url_par, ms['similarity']])
158
-
159
- ms_table = Table(ms_table_data, colWidths=[30, usable_width*0.35, usable_width*0.45, usable_width*0.15])
160
- ms_table.setStyle(TableStyle([
161
- ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f2f4f5")),
162
- ('TEXTCOLOR', (0,0), (-1,0), colors.HexColor("#333333")),
163
- ('ALIGN', (0,0), (-1,0), 'CENTER'),
164
- ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
165
- ('BOX', (0,0), (-1,-1), 0.6, colors.HexColor("#e0e0e0")),
166
- ('INNERGRID', (0,0), (-1,-1), 0.4, colors.HexColor("#efefef")),
167
- ('LEFTPADDING', (1,1), (1,-1), 6),
168
- ('LEFTPADDING', (2,1), (2,-1), 6),
169
- ('WORDWRAP', (2,1), (2,-1), 'CJK') # helps long URLs wrap
170
- ]))
171
- story.append(ms_table)
172
- story.append(Spacer(1, 14))
173
-
174
- # Matched Source final block
175
- matched_table = Table(
176
- [[Paragraph("<b>Matched Source Overview</b>", styles['Body'])],
177
- [Paragraph("Congratulations! Sections highlighted indicate similarity with external sources. Please review matched entries for exact references.", styles['Body'])]],
178
- colWidths=[usable_width]
179
- )
180
- matched_table.setStyle(TableStyle([
181
- ('BACKGROUND', (0,0), (-1,0), colors.HexColor("#f7fafb")),
182
- ('BOX', (0,0), (-1,-1), 0.5, colors.HexColor("#e6e6e6")),
183
- ('LEFTPADDING', (0,0), (-1,-1), 8),
184
- ('RIGHTPADDING', (0,0), (-1,-1), 8),
185
- ('TOPPADDING', (0,0), (-1,-1), 6),
186
- ('BOTTOMPADDING', (0,0), (-1,-1), 6),
187
- ]))
188
- story.append(matched_table)
189
- story.append(Spacer(1, 24))
190
-
191
- story.append(Paragraph("Generated by TrueWrite Scan • https://gopalkrushnamahapatra-truewrite-scan.static.hf.space", styles['Footer']))
192
-
193
- # Build PDF
194
- doc.build(story)
195
-
196
- print("PDF written to:", pdf_path)
 
1
+ # pdf_reports.py
2
  import os
3
+ import uuid
4
+ from datetime import datetime
5
  from reportlab.lib.pagesizes import A4
6
  from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
7
+ from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_JUSTIFY, TA_LEFT
8
  from reportlab.lib import colors
9
+ from reportlab.platypus import (
10
+ BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, Table, TableStyle
11
+ )
12
  from reportlab.pdfbase.ttfonts import TTFont
13
  from reportlab.pdfbase import pdfmetrics
 
14
 
15
+ # Try to load a nicer serif; fallback to Times
 
 
 
 
 
16
  try:
17
+ pdfmetrics.registerFont(TTFont('DejaVuSerif', '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf'))
18
+ BODY_FONT = 'DejaVuSerif'
 
 
 
 
 
 
 
 
19
  except Exception:
20
+ BODY_FONT = 'Times-Roman'
21
+
22
+
23
+ def _build_doc(filepath, title_text, tiles, counts, sections, matched_sources, footer_text):
24
+ PAGE_WIDTH, PAGE_HEIGHT = A4
25
+ MARGIN = 36
26
+ usable_width = PAGE_WIDTH - 2 * MARGIN
27
+
28
+ styles = getSampleStyleSheet()
29
+ styles.add(ParagraphStyle(name='ReportTitle', fontName=BODY_FONT, fontSize=18, alignment=TA_CENTER, leading=22))
30
+ styles.add(ParagraphStyle(name='SmallRight', fontName=BODY_FONT, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#555555")))
31
+ styles.add(ParagraphStyle(name='TileBig', fontName=BODY_FONT, fontSize=30, alignment=TA_CENTER, leading=32))
32
+ styles.add(ParagraphStyle(name='TileLabel', fontName=BODY_FONT, fontSize=10, alignment=TA_CENTER, textColor=colors.HexColor("#666666")))
33
+ styles.add(ParagraphStyle(name='SectionHeading', fontName=BODY_FONT, fontSize=13, spaceBefore=8, spaceAfter=4, leading=15))
34
+ styles.add(ParagraphStyle(name='Body', fontName=BODY_FONT, fontSize=11, leading=15, alignment=TA_JUSTIFY))
35
+ styles.add(ParagraphStyle(name='HighlightYellow', fontName=BODY_FONT, fontSize=11, leading=15, backColor=colors.HexColor("#fff3b0"), alignment=TA_JUSTIFY))
36
+ styles.add(ParagraphStyle(name='HighlightRed', fontName=BODY_FONT, fontSize=11, leading=15, backColor=colors.HexColor("#ffd6d6"), alignment=TA_JUSTIFY))
37
+ styles.add(ParagraphStyle(name='Footer', fontName=BODY_FONT, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#666666")))
38
+ styles.add(ParagraphStyle(name='MatchedHeader', fontName=BODY_FONT, fontSize=12, leading=14, alignment=TA_LEFT, spaceBefore=6, spaceAfter=6))
39
+
40
+ def header_footer(canvas, doc):
41
+ canvas.saveState()
42
+ date_str = datetime.now().strftime("%d %B %Y, %H:%M")
43
+ canvas.setFont(BODY_FONT, 9)
44
+ canvas.setFillColor(colors.HexColor("#555555"))
45
+ canvas.drawString(MARGIN, PAGE_HEIGHT - MARGIN + 8, f"Date: {date_str}")
46
+ canvas.setFont(BODY_FONT, 16)
47
+ canvas.setFillColor(colors.black)
48
+ canvas.drawCentredString(PAGE_WIDTH / 2.0, PAGE_HEIGHT - MARGIN + 4, title_text)
49
+ canvas.setFont(BODY_FONT, 9)
50
+ canvas.setFillColor(colors.HexColor("#666666"))
51
+ canvas.drawRightString(PAGE_WIDTH - MARGIN, MARGIN - 10, f"Page {doc.page}")
52
+ canvas.restoreState()
53
+
54
+ doc = BaseDocTemplate(filepath, pagesize=A4,
55
+ leftMargin=MARGIN, rightMargin=MARGIN,
56
+ topMargin=MARGIN, bottomMargin=MARGIN)
57
+ frame = Frame(MARGIN, MARGIN, usable_width, PAGE_HEIGHT - 2 * MARGIN, id='normal')
58
+ template = PageTemplate(id='report', frames=[frame], onPage=header_footer)
59
+ doc.addPageTemplates([template])
60
+
61
+ story = []
62
+
63
+ # Tiles (4 small summary tiles)
64
+ tile_values = tiles # list of 4 dicts: {'value': '12%', 'label': 'Plagiarism'}
65
+ tiles_data = [
66
+ [Paragraph(f"<b>{tile_values[i]['value']}</b>", styles['TileBig']) for i in range(4)],
67
+ [Paragraph(tile_values[i]['label'], styles['TileLabel']) for i in range(4)]
68
+ ]
69
+ tiles_table = Table(tiles_data, colWidths=[usable_width / 4.0] * 4, rowHeights=[46, 18])
70
+ tiles_table.setStyle(TableStyle([
71
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f7f7f9")),
72
+ ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
73
+ ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
74
+ ('BOX', (0, 0), (-1, -1), 0.6, colors.HexColor("#dddddd")),
75
+ ]))
76
+ story.append(tiles_table)
77
+ story.append(Spacer(1, 12))
78
+
79
+ # Counts row
80
+ if counts:
81
+ counts_table = Table([list(counts.keys()), list(counts.values())],
82
+ colWidths=[usable_width / len(counts)] * len(counts))
83
+ counts_table.setStyle(TableStyle([
84
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f4f6f7")),
85
+ ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
86
+ ('BOX', (0, 0), (-1, -1), 0.5, colors.HexColor("#e6e6e6")),
87
+ ]))
88
+ story.append(counts_table)
89
+ story.append(Spacer(1, 12))
90
+
91
+ # Sections + highlighting: sections is list of dicts: {'heading': 'Abstract', 'paragraphs': [p1, p2...]}
92
+ for sec in sections or []:
93
+ if sec.get('heading'):
94
+ story.append(Paragraph(sec['heading'], styles['SectionHeading']))
95
+ for para in sec.get('paragraphs', []):
96
+ # para may be dict {'text': '...', 'highlight':'yellow'/'red'/None}
97
+ if isinstance(para, dict):
98
+ text = para.get('text', '')
99
+ hl = para.get('highlight')
100
+ if hl == 'yellow':
101
+ story.append(Paragraph(text, styles['HighlightYellow']))
102
+ elif hl == 'red':
103
+ story.append(Paragraph(text, styles['HighlightRed']))
104
+ else:
105
+ story.append(Paragraph(text, styles['Body']))
106
+ else:
107
+ story.append(Paragraph(para, styles['Body']))
108
+ story.append(Spacer(1, 6))
109
+
110
+ story.append(Spacer(1, 10))
111
+
112
+ # Matched Sources table (if any)
113
+ if matched_sources:
114
+ story.append(Paragraph("Matched Sources", styles['MatchedHeader']))
115
+ ms_table_data = [["#", "Source Title", "URL", "Similarity"]]
116
+ for i, ms in enumerate(matched_sources, start=1):
117
+ title_par = Paragraph(ms.get('title', ''), styles['Body'])
118
+ url_par = Paragraph(f'<link href="{ms.get("url", "")}">{ms.get("url", "")}</link>', styles['Body'])
119
+ ms_table_data.append([str(i), title_par, url_par, ms.get('similarity', '')])
120
+ ms_table = Table(ms_table_data, colWidths=[30, usable_width * 0.35, usable_width * 0.45, usable_width * 0.15])
121
+ ms_table.setStyle(TableStyle([
122
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f2f4f5")),
123
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor("#333333")),
124
+ ('ALIGN', (0, 0), (-1, 0), 'CENTER'),
125
+ ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
126
+ ('BOX', (0, 0), (-1, -1), 0.6, colors.HexColor("#e0e0e0")),
127
+ ('INNERGRID', (0, 0), (-1, -1), 0.4, colors.HexColor("#efefef")),
128
+ ('LEFTPADDING', (1, 1), (1, -1), 6),
129
+ ('LEFTPADDING', (2, 1), (2, -1), 6),
130
+ ]))
131
+ story.append(ms_table)
132
+ story.append(Spacer(1, 14))
133
+
134
+ # Matched Source Overview block (footer_text)
135
+ if footer_text:
136
+ matched_table = Table(
137
+ [[Paragraph("<b>Matched Source Overview</b>", styles['Body'])],
138
+ [Paragraph(footer_text, styles['Body'])]],
139
+ colWidths=[usable_width]
140
+ )
141
+ matched_table.setStyle(TableStyle([
142
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f7fafb")),
143
+ ('BOX', (0, 0), (-1, -1), 0.5, colors.HexColor("#e6e6e6")),
144
+ ('LEFTPADDING', (0, 0), (-1, -1), 8),
145
+ ('RIGHTPADDING', (0, 0), (-1, -1), 8),
146
+ ('TOPPADDING', (0, 0), (-1, -1), 6),
147
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
148
+ ]))
149
+ story.append(matched_table)
150
+ story.append(Spacer(1, 24))
151
+
152
+ story.append(Paragraph("Generated by TrueWrite Scan • https://gopalkrushnamahapatra-truewrite-scan.static.hf.space", styles['Footer']))
153
+
154
+ doc.build(story)
155
+
156
+
157
+ def generate_report(report_type: str, out_dir: str = "/tmp", **kwargs) -> str:
158
+ """
159
+ report_type: "ai" | "grammar" | "plagiarism"
160
+ kwargs expected:
161
+ - title_text: str
162
+ - tiles: list of 4 dicts [{'value': '12%', 'label': 'Plagiarism'}, ...]
163
+ - counts: dict {'Words': 950, ...}
164
+ - sections: list [{'heading':'','paragraphs':[...]}]
165
+ - matched_sources: list [{'title','url','similarity'}]
166
+ - footer_text: str
167
+ Returns: path to generated PDF
168
+ """
169
+ os.makedirs(out_dir, exist_ok=True)
170
+ filename = f"{report_type}_report_{uuid.uuid4().hex[:8]}.pdf"
171
+ filepath = os.path.join(out_dir, filename)
172
+
173
+ title_text = kwargs.get('title_text', "Report")
174
+ tiles = kwargs.get('tiles') or [
175
+ {'value': '0%', 'label': 'Plagiarism'},
176
+ {'value': '0%', 'label': 'Exact Match'},
177
+ {'value': '0%', 'label': 'Partial Match'},
178
+ {'value': '100%', 'label': 'Unique'},
179
+ ]
180
+ counts = kwargs.get('counts') or {}
181
+ sections = kwargs.get('sections') or []
182
+ matched_sources = kwargs.get('matched_sources') or []
183
+ footer_text = kwargs.get('footer_text') or ''
184
+
185
+ _build_doc(filepath, title_text, tiles, counts, sections, matched_sources, footer_text)
186
+ return filepath