PEBL 2.2
Psychology Experiment Building Language - Cross-platform psychological experiment development system
FormatParser.cpp
Go to the documentation of this file.
1#include "FormatParser.h"
2#include "../objects/PColor.h"
3#include <cctype>
4#include <algorithm>
5
6namespace FormatParser {
7
9static std::string toLower(const std::string& str) {
10 std::string result = str;
11 std::transform(result.begin(), result.end(), result.begin(), ::tolower);
12 return result;
13}
14
18bool ParseColor(const std::string& colorStr, PColor& outColor) {
19 // Handle hex colors (#RRGGBB or #RGB)
20 if (colorStr.length() > 0 && colorStr[0] == '#') {
21 std::string hex = colorStr.substr(1);
22
23 // Convert 3-digit hex (#RGB) to 6-digit (#RRGGBB)
24 if (hex.length() == 3) {
25 hex = std::string(1, hex[0]) + std::string(1, hex[0]) +
26 std::string(1, hex[1]) + std::string(1, hex[1]) +
27 std::string(1, hex[2]) + std::string(1, hex[2]);
28 }
29
30 if (hex.length() == 6) {
31 try {
32 unsigned int r = std::stoi(hex.substr(0, 2), nullptr, 16);
33 unsigned int g = std::stoi(hex.substr(2, 2), nullptr, 16);
34 unsigned int b = std::stoi(hex.substr(4, 2), nullptr, 16);
35
36 outColor = PColor(r, g, b, 255);
37 return true;
38 } catch (...) {
39 return false;
40 }
41 }
42 return false;
43 }
44
45 // Handle named colors using PEBL's existing color system (752 colors)
46 try {
47 outColor = PColor(colorStr); // Uses PColor's SetColorByName() internally
48 return true;
49 } catch (...) {
50 // Color name not found
51 return false;
52 }
53}
54
56std::vector<FormatSegment> ParseFormattedText(const std::string& input, int charWidth) {
57 std::vector<FormatSegment> segments;
58
59 // Current formatting state
60 bool boldOn = false;
61 bool italicOn = false;
62 bool underlineOn = false;
63 bool colorOn = false;
64 PColor currentColor(0, 0, 0, 255); // Default black
65 bool sizeOn = false;
66 int currentSize = 0;
67 int currentIndent = 0; // Current indent in pixels
68 Justification currentJustification = JUSTIFY_NONE; // Current text justification
69
70 std::string currentText;
71 size_t pos = 0;
72
73 while (pos < input.length()) {
74 // Look for tag start
75 if (input[pos] == '<') {
76 // Save any accumulated text as a segment
77 if (!currentText.empty()) {
78 FormatSegment seg;
79 seg.text = currentText;
80 seg.style = (boldOn ? 1 : 0) + (italicOn ? 2 : 0) + (underlineOn ? 4 : 0);
81 seg.hasColorOverride = colorOn;
82 seg.colorOverride = currentColor;
83 seg.hasSizeOverride = sizeOn;
84 seg.sizeOverride = currentSize;
85 seg.indentPixels = currentIndent;
86 seg.justification = currentJustification;
87 segments.push_back(seg);
88 currentText.clear();
89 }
90
91 // Find tag end
92 size_t tagEnd = input.find('>', pos);
93 if (tagEnd == std::string::npos) {
94 // No closing >, treat as literal text
95 currentText += input[pos];
96 pos++;
97 continue;
98 }
99
100 // Extract tag content (between < and >)
101 std::string tag = input.substr(pos + 1, tagEnd - pos - 1);
102 pos = tagEnd + 1;
103
104 // Parse tag
105 bool isClosing = (tag.length() > 0 && tag[0] == '/');
106 std::string tagName = isClosing ? tag.substr(1) : tag;
107
108 // Extract just the tag name (first word before space or =)
109 // This handles both <tag=value> and <tag attr=value> formats
110 std::string param;
111 size_t spacePos = tagName.find(' ');
112 size_t eqPos = tagName.find('=');
113
114 // If there's a space before an =, the tag has attributes (e.g., <p size=80>)
115 // Otherwise it's a simple tag with parameter (e.g., <h1=center>)
116 if (spacePos != std::string::npos && (eqPos == std::string::npos || spacePos < eqPos)) {
117 // Tag with attributes: extract just the tag name, leave rest in tag
118 tagName = tagName.substr(0, spacePos);
119 } else if (eqPos != std::string::npos) {
120 // Simple tag with parameter: extract param
121 param = tagName.substr(eqPos + 1);
122 tagName = tagName.substr(0, eqPos);
123 }
124
125 tagName = toLower(tagName);
126
127 // Process tag
128 if (tagName == "br" || tagName == "br/") {
129 // Line break - add newline character to current text
130 currentText += '\n';
131
132 // Flush segment with newline
133 if (!currentText.empty()) {
134 FormatSegment seg;
135 seg.text = currentText;
136 seg.style = (boldOn ? 1 : 0) + (italicOn ? 2 : 0) + (underlineOn ? 4 : 0);
137 seg.hasColorOverride = colorOn;
138 seg.colorOverride = currentColor;
139 seg.hasSizeOverride = sizeOn;
140 seg.sizeOverride = currentSize;
141 seg.indentPixels = currentIndent;
142 seg.justification = currentJustification;
143 segments.push_back(seg);
144 currentText.clear();
145 }
146
147 // Reset indent and justification after line break
148 currentIndent = 0;
149 currentJustification = JUSTIFY_NONE;
150 } else if (tagName == "b") {
151 boldOn = !isClosing;
152 } else if (tagName == "i") {
153 italicOn = !isClosing;
154 } else if (tagName == "u") {
155 underlineOn = !isClosing;
156 } else if (tagName == "c") {
157 if (isClosing) {
158 colorOn = false;
159 } else if (!param.empty()) {
160 if (ParseColor(param, currentColor)) {
161 colorOn = true;
162 }
163 }
164 } else if (tagName == "size") {
165 // DEPRECATED: <size=N> tag (kept for backward compatibility)
166 // Now interpreted as proportional size (100 = base font)
167 // For absolute point sizes, the old behavior would break with adaptive textboxes
168 if (isClosing) {
169 sizeOn = false;
170 } else if (!param.empty()) {
171 try {
172 int size = std::stoi(param);
173 if (size > 0 && size < 1000) { // Sanity check (now percentage)
174 sizeOn = true;
175 currentSize = size; // Now proportional (100 = base)
176 }
177 } catch (...) {
178 // Invalid size, ignore
179 }
180 }
181 } else if (tagName == "h1" || tagName == "h2" || tagName == "h3" ||
182 tagName == "h4" || tagName == "h5" || tagName == "h6") {
183 // Header tags - shortcuts for bold + proportional size
184 // Can optionally include justification: <h1=center>, <h2=right>, etc.
185 if (!isClosing) {
186 boldOn = true;
187 sizeOn = true;
188 // Headers use proportional sizing (100 = base font)
189 // H1=230%, H2=200%, H3=170%, H4=140%, H5=130%, H6=115%
190 int level = tagName[1] - '0'; // Convert '1'-'6' to 1-6
191 if (level == 1) currentSize = 230;
192 else if (level == 2) currentSize = 200;
193 else if (level == 3) currentSize = 170;
194 else if (level == 4) currentSize = 140;
195 else if (level == 5) currentSize = 130;
196 else if (level == 6) currentSize = 115;
197
198 // Parse optional justification parameter
199 if (!param.empty()) {
200 std::string justifyParam = toLower(param);
201 if (justifyParam == "left") currentJustification = JUSTIFY_LEFT;
202 else if (justifyParam == "center") currentJustification = JUSTIFY_CENTER;
203 else if (justifyParam == "right") currentJustification = JUSTIFY_RIGHT;
204 }
205 } else {
206 // Closing header tag - reset formatting and add implicit newline
207 boldOn = false;
208 sizeOn = false;
209 currentJustification = JUSTIFY_NONE;
210
211 // Headers are block-level elements - add newline after closing tag
212 currentText += '\n';
213 }
214 } else if (tagName == "indent") {
215 // Indent tag - sets absolute horizontal position like a tab stop
216 // <indent> defaults to 4 characters, <indent=8> sets to 8 character widths from left
217 int indentChars = 4; // Default
218 if (!param.empty()) {
219 try {
220 indentChars = std::stoi(param);
221 } catch (...) {
222 // Invalid param, use default
223 }
224 }
225 currentIndent = indentChars * charWidth; // Absolute position, not cumulative
226 } else if (tagName == "p") {
227 // Paragraph tag - sets text justification and/or proportional size
228 // NEW syntax: <p align=center>, <p size=150>, <p align=center size=120>
229 // OLD syntax (deprecated): <p=center> (still supported for backward compatibility)
230
231 if (isClosing) {
232 // Closing </p> tag - reset paragraph-level formatting
233 sizeOn = false;
234 currentJustification = JUSTIFY_NONE;
235 } else {
236 // Opening <p> tag
237 // Check for old syntax first: <p=center>
238 if (!param.empty() && tag.find("align=") == std::string::npos && tag.find("size=") == std::string::npos) {
239 // Old syntax: <p=center> where param is just "center"
240 std::string justifyParam = toLower(param);
241 if (justifyParam == "left") currentJustification = JUSTIFY_LEFT;
242 else if (justifyParam == "center") currentJustification = JUSTIFY_CENTER;
243 else if (justifyParam == "right") currentJustification = JUSTIFY_RIGHT;
244 } else {
245 // New syntax: parse multiple key=value attributes
246 // Extract align= and size= from the tag
247 size_t alignPos = tag.find("align=");
248 if (alignPos != std::string::npos) {
249 size_t alignStart = alignPos + 6; // Skip "align="
250 size_t alignEnd = tag.find_first_of(" >", alignStart);
251 if (alignEnd == std::string::npos) alignEnd = tag.length();
252 std::string alignValue = toLower(tag.substr(alignStart, alignEnd - alignStart));
253
254 if (alignValue == "left") currentJustification = JUSTIFY_LEFT;
255 else if (alignValue == "center") currentJustification = JUSTIFY_CENTER;
256 else if (alignValue == "right") currentJustification = JUSTIFY_RIGHT;
257 }
258
259 size_t sizePos = tag.find("size=");
260 if (sizePos != std::string::npos) {
261 size_t sizeStart = sizePos + 5; // Skip "size="
262 size_t sizeEnd = tag.find_first_of(" >", sizeStart);
263 if (sizeEnd == std::string::npos) sizeEnd = tag.length();
264 std::string sizeValue = tag.substr(sizeStart, sizeEnd - sizeStart);
265
266 try {
267 int size = std::stoi(sizeValue);
268 if (size > 0 && size < 1000) { // Sanity check (percentage)
269 sizeOn = true;
270 currentSize = size; // Proportional (100 = base)
271 }
272 } catch (...) {
273 // Invalid size, ignore
274 }
275 }
276 }
277 }
278 } else if (tagName == "hr") {
279 // Horizontal rule - create special segment with newline so it has a position
280 FormatSegment hrSeg;
281 hrSeg.isHorizontalRule = true;
282 hrSeg.text = "\n"; // Give it a newline so it occupies a position in stripped text
283 hrSeg.indentPixels = currentIndent;
284 segments.push_back(hrSeg);
285 // Reset indent after horizontal rule
286 currentIndent = 0;
287 } else if (tagName == "li") {
288 // Bullet list item - automatically starts on a new line (like HTML <li>)
289
290 // First, flush any current text (without adding newline)
291 if (!currentText.empty()) {
292 FormatSegment seg;
293 seg.text = currentText; // No newline added here
294 seg.style = (boldOn ? 1 : 0) + (italicOn ? 2 : 0) + (underlineOn ? 4 : 0);
295 seg.hasColorOverride = colorOn;
296 seg.colorOverride = currentColor;
297 seg.hasSizeOverride = sizeOn;
298 seg.sizeOverride = currentSize;
299 seg.indentPixels = currentIndent;
300 seg.justification = currentJustification;
301 segments.push_back(seg);
302 currentText.clear();
303 }
304
305 // If there are existing segments, ensure the last one ends with a newline
306 if (!segments.empty()) {
307 FormatSegment& lastSeg = segments.back();
308 if (!lastSeg.text.empty() && lastSeg.text.back() != '\n') {
309 lastSeg.text += '\n';
310 }
311 currentIndent = 0; // Reset indent for new line
312 }
313
314 // Indent the bullet itself from the left margin
315 currentIndent += 2 * charWidth; // Indent bullet from left margin
316
317 FormatSegment liSeg;
318 liSeg.isBulletItem = true;
319 liSeg.text = "• "; // Unicode bullet character
320 liSeg.style = (boldOn ? 1 : 0) + (italicOn ? 2 : 0) + (underlineOn ? 4 : 0);
321 liSeg.hasColorOverride = colorOn;
322 liSeg.colorOverride = currentColor;
323 liSeg.hasSizeOverride = sizeOn;
324 liSeg.sizeOverride = currentSize;
325 liSeg.indentPixels = currentIndent;
326 liSeg.justification = currentJustification;
327 segments.push_back(liSeg);
328
329 // Indent the following text further
330 currentIndent += 2 * charWidth; // Total indent is now 4 * charWidth
331 } else {
332 // Unknown tag, treat as literal text
333 currentText += '<';
334 currentText += (isClosing ? "/" : "");
335 currentText += tagName;
336 if (!param.empty()) {
337 currentText += '=';
338 currentText += param;
339 }
340 currentText += '>';
341 }
342 } else {
343 // Regular character, add to current text
344 char ch = input[pos];
345 currentText += ch;
346
347 // If we hit a newline, flush the current segment and reset indent
348 if (ch == '\n') {
349 // Save segment with newline
350 if (!currentText.empty()) {
351 FormatSegment seg;
352 seg.text = currentText;
353 seg.style = (boldOn ? 1 : 0) + (italicOn ? 2 : 0) + (underlineOn ? 4 : 0);
354 seg.hasColorOverride = colorOn;
355 seg.colorOverride = currentColor;
356 seg.hasSizeOverride = sizeOn;
357 seg.sizeOverride = currentSize;
358 seg.indentPixels = currentIndent;
359 seg.justification = currentJustification;
360 segments.push_back(seg);
361 currentText.clear();
362 }
363 // Reset indent and justification for next line
364 currentIndent = 0;
365 currentJustification = JUSTIFY_NONE;
366 }
367
368 pos++;
369 }
370 }
371
372 // Add final segment if there's remaining text
373 if (!currentText.empty()) {
374 FormatSegment seg;
375 seg.text = currentText;
376 seg.style = (boldOn ? 1 : 0) + (italicOn ? 2 : 0) + (underlineOn ? 4 : 0);
377 seg.hasColorOverride = colorOn;
378 seg.colorOverride = currentColor;
379 seg.hasSizeOverride = sizeOn;
380 seg.sizeOverride = currentSize;
381 seg.indentPixels = currentIndent;
382 seg.justification = currentJustification;
383 segments.push_back(seg);
384 }
385
386 // If no segments were created, return a single empty segment
387 if (segments.empty()) {
388 segments.push_back(FormatSegment());
389 }
390
391 return segments;
392}
393
395std::string StripFormatting(const std::string& input) {
396 std::string result;
397 size_t pos = 0;
398
399 while (pos < input.length()) {
400 if (input[pos] == '<') {
401 // Find tag end
402 size_t tagEnd = input.find('>', pos);
403 if (tagEnd == std::string::npos) {
404 // No closing >, treat as literal
405 result += input[pos];
406 pos++;
407 continue;
408 }
409
410 // Extract tag content
411 std::string tag = input.substr(pos + 1, tagEnd - pos - 1);
412 std::string tagName = tag;
413
414 // Handle closing tags
415 if (tagName.length() > 0 && tagName[0] == '/') {
416 tagName = tagName.substr(1);
417 }
418
419 // Handle tags with parameters
420 size_t eqPos = tagName.find('=');
421 if (eqPos != std::string::npos) {
422 tagName = tagName.substr(0, eqPos);
423 }
424
425 tagName = toLower(tagName);
426
427 // Check if it's a recognized formatting tag
428 if (tagName == "b" || tagName == "i" || tagName == "u" || tagName == "c" ||
429 tagName == "size" || tagName == "br" || tagName == "br/" ||
430 tagName == "h1" || tagName == "h2" || tagName == "h3" ||
431 tagName == "h4" || tagName == "h5" || tagName == "h6" ||
432 tagName == "indent" || tagName == "hr" || tagName == "li" || tagName == "p") {
433 // Skip this tag (don't add to result)
434 // For <br> tags, add a newline instead
435 if (tagName == "br" || tagName == "br/") {
436 result += '\n';
437 }
438 // For <li> tags, add bullet point
439 else if (tagName == "li") {
440 result += "• ";
441 }
442 // For <hr> tags, add a line of dashes
443 else if (tagName == "hr") {
444 result += "--------------------\n";
445 }
446 pos = tagEnd + 1;
447 } else {
448 // Unknown tag, treat as literal text
449 result += input[pos];
450 pos++;
451 }
452 } else {
453 // Regular character
454 result += input[pos];
455 pos++;
456 }
457 }
458
459 return result;
460}
461
462} // namespace FormatParser
Justification
Justification types for paragraph-level alignment.
std::vector< FormatSegment > ParseFormattedText(const std::string &input, int charWidth)
Parse formatted text into segments.
std::string StripFormatting(const std::string &input)
Strip all formatting tags from text.
bool ParseColor(const std::string &colorStr, PColor &outColor)