No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

MMParser.m 41KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321
  1. //
  2. // MMParser.m
  3. // MMMarkdown
  4. //
  5. // Copyright (c) 2012 Matt Diephouse.
  6. //
  7. // Permission is hereby granted, free of charge, to any person obtaining a copy
  8. // of this software and associated documentation files (the "Software"), to deal
  9. // in the Software without restriction, including without limitation the rights
  10. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. // copies of the Software, and to permit persons to whom the Software is
  12. // furnished to do so, subject to the following conditions:
  13. //
  14. // The above copyright notice and this permission notice shall be included in
  15. // all copies or substantial portions of the Software.
  16. //
  17. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. // THE SOFTWARE.
  24. //
  25. #import "MMParser.h"
  26. #import "MMDocument.h"
  27. #import "MMDocument_Private.h"
  28. #import "MMElement.h"
  29. #import "MMHTMLParser.h"
  30. #import "MMScanner.h"
  31. #import "MMSpanParser.h"
  32. typedef NS_ENUM(NSInteger, MMListType) {
  33. MMListTypeBulleted,
  34. MMListTypeNumbered,
  35. };
  36. static NSString * __HTMLEntityForCharacter(unichar character)
  37. {
  38. switch (character)
  39. {
  40. case '&':
  41. return @"&";
  42. case '<':
  43. return @"&lt;";
  44. case '>':
  45. return @"&gt;";
  46. default:
  47. return @"";
  48. }
  49. }
  50. @interface MMParser ()
  51. @property (assign, nonatomic, readonly) MMMarkdownExtensions extensions;
  52. @property (strong, nonatomic, readonly) MMHTMLParser *htmlParser;
  53. @property (strong, nonatomic, readonly) MMSpanParser *spanParser;
  54. @end
  55. @implementation MMParser
  56. #pragma mark - Public Methods
  57. - (id)initWithExtensions:(MMMarkdownExtensions)extensions
  58. {
  59. self = [super init];
  60. if (self)
  61. {
  62. _extensions = extensions;
  63. _htmlParser = [MMHTMLParser new];
  64. _spanParser = [[MMSpanParser alloc] initWithExtensions:extensions];
  65. }
  66. return self;
  67. }
  68. - (MMDocument *)parseMarkdown:(NSString *)markdown error:(__autoreleasing NSError **)error
  69. {
  70. // It would be better to not replace all the tabs with spaces. But this will do for now.
  71. markdown = [self _removeTabsFromString:markdown];
  72. MMScanner *scanner = [MMScanner scannerWithString:markdown];
  73. MMDocument *document = [MMDocument documentWithMarkdown:markdown];
  74. document.elements = [self _parseElementsWithScanner:scanner];
  75. [self _updateLinksFromDefinitionsInDocument:document];
  76. return document;
  77. }
  78. #pragma mark - Private Methods
  79. // Add the remainder of the line as an inner range to the element.
  80. //
  81. // If the line contains the start of a multi-line HTML comment, then multiple lines will be added
  82. // to the element.
  83. - (void)_addTextLineToElement:(MMElement *)element withScanner:(MMScanner *)scanner
  84. {
  85. NSCharacterSet *nonAngleSet = [[NSCharacterSet characterSetWithCharactersInString:@"<"] invertedSet];
  86. NSCharacterSet *nonDashSet = [[NSCharacterSet characterSetWithCharactersInString:@"-"] invertedSet];
  87. NSRange lineRange = scanner.currentRange;
  88. // Check for an HTML comment, which could span blank lines
  89. [scanner beginTransaction];
  90. NSMutableArray *commentRanges = [NSMutableArray new];
  91. // Look for the start of a comment on the current line
  92. while (!scanner.atEndOfLine)
  93. {
  94. [scanner skipCharactersFromSet:nonAngleSet];
  95. if ([scanner matchString:@"<!--"])
  96. {
  97. // Look for the end of the comment
  98. while (!scanner.atEndOfString)
  99. {
  100. [scanner skipCharactersFromSet:nonDashSet];
  101. if (scanner.atEndOfLine)
  102. {
  103. [commentRanges addObject:[NSValue valueWithRange:lineRange]];
  104. [scanner advanceToNextLine];
  105. lineRange = scanner.currentRange;
  106. continue;
  107. }
  108. if ([scanner matchString:@"-->"])
  109. {
  110. break;
  111. }
  112. [scanner advance];
  113. }
  114. }
  115. else
  116. [scanner advance];
  117. }
  118. [scanner commitTransaction:commentRanges.count > 0];
  119. if (commentRanges.count > 0)
  120. {
  121. for (NSValue *value in commentRanges)
  122. {
  123. [element addInnerRange:value.rangeValue];
  124. }
  125. }
  126. [element addInnerRange:lineRange];
  127. [scanner advanceToNextLine];
  128. }
  129. - (NSString *)_removeTabsFromString:(NSString *)aString
  130. {
  131. NSMutableString *result = [aString mutableCopy];
  132. NSCharacterSet *tabAndNewline = [NSCharacterSet characterSetWithCharactersInString:@"\t\n"];
  133. NSRange searchRange = NSMakeRange(0, aString.length);
  134. NSRange resultRange;
  135. NSUInteger lineLocation;
  136. NSArray *strings = @[ @"", @" ", @" ", @" ", @" " ];
  137. resultRange = [result rangeOfCharacterFromSet:tabAndNewline options:0 range:searchRange];
  138. lineLocation = 0;
  139. while (resultRange.location != NSNotFound)
  140. {
  141. unichar character = [result characterAtIndex:resultRange.location];
  142. if (character == '\n')
  143. {
  144. lineLocation = 1 + resultRange.location;
  145. searchRange = NSMakeRange(lineLocation, result.length-lineLocation);
  146. }
  147. else
  148. {
  149. NSUInteger numOfSpaces = 4 - ((resultRange.location - lineLocation) % 4);
  150. [result replaceCharactersInRange:resultRange withString:[strings objectAtIndex:numOfSpaces]];
  151. searchRange = NSMakeRange(resultRange.location, result.length-resultRange.location);
  152. }
  153. resultRange = [result rangeOfCharacterFromSet:tabAndNewline options:0 range:searchRange];
  154. }
  155. return result;
  156. }
  157. - (NSArray *)_parseElementsWithScanner:(MMScanner *)scanner
  158. {
  159. NSMutableArray *result = [NSMutableArray new];
  160. while (!scanner.atEndOfString)
  161. {
  162. MMElement *element = [self _parseBlockElementWithScanner:scanner];
  163. if (element)
  164. {
  165. [result addObject:element];
  166. }
  167. else
  168. {
  169. [scanner skipCharactersFromSet:NSCharacterSet.whitespaceCharacterSet];
  170. if (scanner.atEndOfLine)
  171. {
  172. [scanner advanceToNextLine];
  173. }
  174. }
  175. }
  176. return result;
  177. }
  178. - (MMElement *)_parseBlockElementWithScanner:(MMScanner *)scanner
  179. {
  180. MMElement *element;
  181. [scanner beginTransaction];
  182. element = [self.htmlParser parseCommentWithScanner:scanner];
  183. [scanner commitTransaction:element != nil];
  184. if (element)
  185. return element;
  186. [scanner beginTransaction];
  187. element = [self _parseHTMLWithScanner:scanner];
  188. [scanner commitTransaction:element != nil];
  189. if (element)
  190. return element;
  191. [scanner beginTransaction];
  192. element = [self _parsePrefixHeaderWithScanner:scanner];
  193. [scanner commitTransaction:element != nil];
  194. if (element)
  195. return element;
  196. [scanner beginTransaction];
  197. element = [self _parseUnderlinedHeaderWithScanner:scanner];
  198. [scanner commitTransaction:element != nil];
  199. if (element)
  200. return element;
  201. [scanner beginTransaction];
  202. element = [self _parseBlockquoteWithScanner:scanner];
  203. [scanner commitTransaction:element != nil];
  204. if (element)
  205. return element;
  206. // Check code first because its four-space behavior trumps most else
  207. [scanner beginTransaction];
  208. element = [self _parseCodeBlockWithScanner:scanner];
  209. [scanner commitTransaction:element != nil];
  210. if (element)
  211. return element;
  212. if (self.extensions & MMMarkdownExtensionsFencedCodeBlocks)
  213. {
  214. [scanner beginTransaction];
  215. element = [self _parseFencedCodeBlockWithScanner:scanner];
  216. [scanner commitTransaction:element != nil];
  217. if (element)
  218. return element;
  219. }
  220. if (self.extensions & MMMarkdownExtensionsTables)
  221. {
  222. [scanner beginTransaction];
  223. element = [self _parseTableWithScanner:scanner];
  224. [scanner commitTransaction:element != nil];
  225. if (element)
  226. return element;
  227. }
  228. // Check horizontal rules before lists since they both start with * or -
  229. [scanner beginTransaction];
  230. element = [self _parseHorizontalRuleWithScanner:scanner];
  231. [scanner commitTransaction:element != nil];
  232. if (element)
  233. return element;
  234. [scanner beginTransaction];
  235. element = [self _parseListWithScanner:scanner];
  236. [scanner commitTransaction:element != nil];
  237. if (element)
  238. return element;
  239. [scanner beginTransaction];
  240. element = [self _parseLinkDefinitionWithScanner:scanner];
  241. [scanner commitTransaction:element != nil];
  242. if (element)
  243. return element;
  244. [scanner beginTransaction];
  245. element = [self _parseParagraphWithScanner:scanner];
  246. [scanner commitTransaction:element != nil];
  247. if (element)
  248. return element;
  249. return nil;
  250. }
  251. - (MMElement *)_parseHTMLWithScanner:(MMScanner *)scanner
  252. {
  253. // At the beginning of the line
  254. if (!scanner.atBeginningOfLine)
  255. return nil;
  256. return [self.htmlParser parseBlockTagWithScanner:scanner];
  257. }
  258. - (MMElement *)_parsePrefixHeaderWithScanner:(MMScanner *)scanner
  259. {
  260. NSUInteger level = 0;
  261. while (scanner.nextCharacter == '#' && level < 6)
  262. {
  263. level++;
  264. [scanner advance];
  265. }
  266. if (level == 0)
  267. return nil;
  268. if ([scanner skipWhitespace] == 0)
  269. return nil;
  270. NSRange headerRange = scanner.currentRange;
  271. // Check for trailing #s
  272. while (headerRange.length > 0)
  273. {
  274. unichar character = [scanner.string characterAtIndex:NSMaxRange(headerRange)-1];
  275. if (character == '#')
  276. headerRange.length--;
  277. else
  278. break;
  279. }
  280. // Remove trailing whitespace
  281. NSCharacterSet *whitespaceSet = NSCharacterSet.whitespaceCharacterSet;
  282. while (headerRange.length > 0)
  283. {
  284. unichar character = [scanner.string characterAtIndex:NSMaxRange(headerRange)-1];
  285. if ([whitespaceSet characterIsMember:character])
  286. headerRange.length--;
  287. else
  288. break;
  289. }
  290. [scanner advanceToNextLine];
  291. MMElement *element = [MMElement new];
  292. element.type = MMElementTypeHeader;
  293. element.range = NSMakeRange(scanner.startLocation, NSMaxRange(scanner.currentRange)-scanner.startLocation);
  294. element.level = level;
  295. [element addInnerRange:headerRange];
  296. if (element.innerRanges.count > 0)
  297. {
  298. MMScanner *innerScanner = [MMScanner scannerWithString:scanner.string lineRanges:element.innerRanges];
  299. element.children = [self.spanParser parseSpansInBlockElement:element withScanner:innerScanner];
  300. }
  301. return element;
  302. }
  303. - (MMElement *)_parseUnderlinedHeaderWithScanner:(MMScanner *)scanner
  304. {
  305. [scanner beginTransaction];
  306. // Make sure that the first line isn't empty
  307. [scanner skipCharactersFromSet:NSCharacterSet.whitespaceCharacterSet];
  308. if (scanner.atEndOfLine)
  309. {
  310. [scanner commitTransaction:NO];
  311. return nil;
  312. }
  313. [scanner advanceToNextLine];
  314. // There has to be more to the string
  315. if (scanner.atEndOfString)
  316. {
  317. [scanner commitTransaction:NO];
  318. return nil;
  319. }
  320. // The first character has to be a - or =
  321. unichar character = scanner.nextCharacter;
  322. if (character != '-' && character != '=')
  323. {
  324. [scanner commitTransaction:NO];
  325. return nil;
  326. }
  327. // Every other character must also be a - or =
  328. while (!scanner.atEndOfLine)
  329. {
  330. if (character != scanner.nextCharacter)
  331. {
  332. // If it's not a - or =, check if it's just optional whitespace before the newline
  333. [scanner skipCharactersFromSet:NSCharacterSet.whitespaceCharacterSet];
  334. if (scanner.atEndOfLine)
  335. break;
  336. [scanner commitTransaction:NO];
  337. return nil;
  338. }
  339. [scanner advance];
  340. }
  341. [scanner commitTransaction:NO];
  342. MMElement *element = [MMElement new];
  343. element.type = MMElementTypeHeader;
  344. element.level = character == '=' ? 1 : 2;
  345. [element addInnerRange:scanner.currentRange];
  346. [scanner advanceToNextLine]; // The header
  347. [scanner advanceToNextLine]; // The underlines
  348. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  349. if (element.innerRanges.count > 0)
  350. {
  351. MMScanner *innerScanner = [MMScanner scannerWithString:scanner.string lineRanges:element.innerRanges];
  352. element.children = [self.spanParser parseSpansInBlockElement:element withScanner:innerScanner];
  353. }
  354. return element;
  355. }
  356. - (MMElement *)_parseBlockquoteWithScanner:(MMScanner *)scanner
  357. {
  358. // Skip up to 3 leading spaces
  359. NSCharacterSet *spaceCharacterSet = [NSCharacterSet characterSetWithCharactersInString:@" "];
  360. [scanner skipCharactersFromSet:spaceCharacterSet max:3];
  361. // Must have a >
  362. if (scanner.nextCharacter != '>')
  363. return nil;
  364. [scanner advance];
  365. // Can be followed by a space
  366. if (scanner.nextCharacter == ' ')
  367. [scanner advance];
  368. MMElement *element = [MMElement new];
  369. element.type = MMElementTypeBlockquote;
  370. [element addInnerRange:scanner.currentRange];
  371. [scanner advanceToNextLine];
  372. // Parse each remaining line
  373. NSCharacterSet *whitespaceSet = NSCharacterSet.whitespaceCharacterSet;
  374. while (!scanner.atEndOfString)
  375. {
  376. [scanner beginTransaction];
  377. [scanner skipCharactersFromSet:whitespaceSet];
  378. // It's a continuation of the blockquote unless it's a blank line
  379. if (scanner.atEndOfLine)
  380. {
  381. [scanner commitTransaction:NO];
  382. break;
  383. }
  384. // If there's a >, then skip it and an optional space
  385. if (scanner.nextCharacter == '>')
  386. {
  387. [scanner advance];
  388. [scanner skipCharactersFromSet:whitespaceSet max:1];
  389. }
  390. else
  391. {
  392. //
  393. // If the following line is a list item
  394. // then break the blockquote parsering.
  395. //
  396. [scanner beginTransaction];
  397. [scanner skipIndentationUpTo:2];
  398. BOOL hasListMarker = [self _parseListMarkerWithScanner:scanner listType:MMListTypeBulleted]
  399. || [self _parseListMarkerWithScanner:scanner listType:MMListTypeNumbered];
  400. [scanner commitTransaction:NO];
  401. if (hasListMarker)
  402. break;
  403. }
  404. [element addInnerRange:scanner.currentRange];
  405. [scanner commitTransaction:YES];
  406. [scanner advanceToNextLine];
  407. }
  408. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  409. if (element.innerRanges.count > 0)
  410. {
  411. MMScanner *innerScanner = [MMScanner scannerWithString:scanner.string lineRanges:element.innerRanges];
  412. element.children = [self _parseElementsWithScanner:innerScanner];
  413. }
  414. return element;
  415. }
  416. - (NSArray *)_parseCodeLinesWithScanner:(MMScanner *)scanner
  417. {
  418. NSMutableArray *children = [NSMutableArray new];
  419. // &, <, and > need to be escaped
  420. NSCharacterSet *entities = [NSCharacterSet characterSetWithCharactersInString:@"&<>"];
  421. NSCharacterSet *nonEntities = [entities invertedSet];
  422. while (!scanner.atEndOfString)
  423. {
  424. NSUInteger textLocation = scanner.location;
  425. [scanner skipCharactersFromSet:nonEntities];
  426. if (textLocation != scanner.location)
  427. {
  428. MMElement *text = [MMElement new];
  429. text.type = MMElementTypeNone;
  430. text.range = NSMakeRange(textLocation, scanner.location-textLocation);
  431. [children addObject:text];
  432. }
  433. // Add the entity
  434. if (!scanner.atEndOfLine)
  435. {
  436. unichar character = [scanner.string characterAtIndex:scanner.location];
  437. MMElement *entity = [MMElement new];
  438. entity.type = MMElementTypeEntity;
  439. entity.range = NSMakeRange(scanner.location, 1);
  440. entity.stringValue = __HTMLEntityForCharacter(character);
  441. [children addObject:entity];
  442. [scanner advance];
  443. }
  444. if (scanner.atEndOfLine)
  445. {
  446. [scanner advanceToNextLine];
  447. // Add a newline
  448. MMElement *newline = [MMElement new];
  449. newline.type = MMElementTypeNone;
  450. newline.range = NSMakeRange(scanner.location, 0);
  451. [children addObject:newline];
  452. }
  453. }
  454. return children;
  455. }
  456. - (MMElement *)_parseCodeBlockWithScanner:(MMScanner *)scanner
  457. {
  458. NSUInteger indentation = [scanner skipIndentationUpTo:4];
  459. if (indentation != 4 || scanner.atEndOfLine)
  460. return nil;
  461. MMElement *element = [MMElement new];
  462. element.type = MMElementTypeCodeBlock;
  463. [element addInnerRange:scanner.currentRange];
  464. [scanner advanceToNextLine];
  465. while (!scanner.atEndOfString)
  466. {
  467. // Skip empty lines
  468. NSUInteger numOfEmptyLines = [scanner skipEmptyLines];
  469. for (NSUInteger idx=0; idx<numOfEmptyLines; idx++)
  470. {
  471. [element addInnerRange:NSMakeRange(scanner.location, 0)];
  472. }
  473. // Need 4 spaces to continue the code block
  474. [scanner beginTransaction];
  475. NSUInteger indentation = [scanner skipIndentationUpTo:4];
  476. if (indentation < 4)
  477. {
  478. [scanner commitTransaction:NO];
  479. break;
  480. }
  481. [scanner commitTransaction:YES];
  482. [element addInnerRange:scanner.currentRange];
  483. [scanner advanceToNextLine];
  484. }
  485. // Remove any trailing blank lines
  486. while (element.innerRanges.count > 0 && [[element.innerRanges lastObject] rangeValue].length == 0)
  487. {
  488. [element removeLastInnerRange];
  489. }
  490. // Remove any trailing whitespace from the last line
  491. if (element.innerRanges.count > 0)
  492. {
  493. NSRange lineRange = [[element.innerRanges lastObject] rangeValue];
  494. [element removeLastInnerRange];
  495. NSCharacterSet *whitespaceSet = NSCharacterSet.whitespaceCharacterSet;
  496. while (lineRange.length > 0)
  497. {
  498. unichar character = [scanner.string characterAtIndex:NSMaxRange(lineRange)-1];
  499. if ([whitespaceSet characterIsMember:character])
  500. lineRange.length--;
  501. else
  502. break;
  503. }
  504. [element addInnerRange:lineRange];
  505. }
  506. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  507. if (element.innerRanges.count > 0)
  508. {
  509. MMScanner *innerScanner = [MMScanner scannerWithString:scanner.string lineRanges:element.innerRanges];
  510. element.children = [self _parseCodeLinesWithScanner:innerScanner];
  511. }
  512. return element;
  513. }
  514. - (MMElement *)_parseFencedCodeBlockWithScanner:(MMScanner *)scanner
  515. {
  516. if (![scanner matchString:@"```"])
  517. return nil;
  518. // skip additional backticks and language
  519. [scanner skipWhitespace];
  520. NSMutableCharacterSet *languageNameSet = NSMutableCharacterSet.alphanumericCharacterSet;
  521. [languageNameSet addCharactersInString:@"-_"];
  522. NSString *language = [scanner nextWordWithCharactersFromSet:languageNameSet];
  523. scanner.location += language.length;
  524. [scanner skipWhitespace];
  525. if (!scanner.atEndOfLine)
  526. return nil;
  527. [scanner advanceToNextLine];
  528. MMElement *element = [MMElement new];
  529. element.type = MMElementTypeCodeBlock;
  530. element.language = (language.length == 0 ? nil : language);
  531. // block ends when it hints a line starting with ``` or the end of the string
  532. while (!scanner.atEndOfString)
  533. {
  534. [scanner beginTransaction];
  535. if ([scanner matchString:@"```"])
  536. {
  537. [scanner skipWhitespace];
  538. if (scanner.atEndOfLine)
  539. {
  540. [scanner commitTransaction:YES];
  541. break;
  542. }
  543. }
  544. [scanner commitTransaction:NO];
  545. [element addInnerRange:scanner.currentRange];
  546. [scanner advanceToNextLine];
  547. }
  548. [scanner advanceToNextLine];
  549. if (element.innerRanges.count > 0)
  550. {
  551. MMScanner *innerScanner = [MMScanner scannerWithString:scanner.string lineRanges:element.innerRanges];
  552. element.children = [self _parseCodeLinesWithScanner:innerScanner];
  553. }
  554. return element;
  555. }
  556. - (MMElement *)_parseHorizontalRuleWithScanner:(MMScanner *)scanner
  557. {
  558. // skip initial whitescape
  559. [scanner skipCharactersFromSet:NSCharacterSet.whitespaceCharacterSet];
  560. unichar character = scanner.nextCharacter;
  561. if (character != '*' && character != '-' && character != '_')
  562. return nil;
  563. unichar nextChar = character;
  564. NSUInteger count = 0;
  565. while (!scanner.atEndOfLine && nextChar == character)
  566. {
  567. count++;
  568. // The *, -, or _
  569. [scanner advance];
  570. nextChar = scanner.nextCharacter;
  571. // An optional space
  572. if (nextChar == ' ')
  573. {
  574. [scanner advance];
  575. nextChar = scanner.nextCharacter;
  576. }
  577. }
  578. // There must be at least 3 *, -, or _
  579. if (count < 3)
  580. return nil;
  581. // skip trailing whitespace
  582. [scanner skipCharactersFromSet:NSCharacterSet.whitespaceCharacterSet];
  583. // must be at the end of the line at this point
  584. if (!scanner.atEndOfLine)
  585. return nil;
  586. MMElement *element = [MMElement new];
  587. element.type = MMElementTypeHorizontalRule;
  588. element.range = NSMakeRange(scanner.startLocation, scanner.location - scanner.startLocation);
  589. return element;
  590. }
  591. - (BOOL)_parseListMarkerWithScanner:(MMScanner *)scanner listType:(MMListType)listType
  592. {
  593. switch (listType)
  594. {
  595. case MMListTypeBulleted:
  596. [scanner beginTransaction];
  597. unichar nextChar = scanner.nextCharacter;
  598. if (nextChar == '*' || nextChar == '-' || nextChar == '+')
  599. {
  600. [scanner advance];
  601. if (scanner.nextCharacter == ' ')
  602. {
  603. [scanner advance];
  604. [scanner commitTransaction:YES];
  605. return YES;
  606. }
  607. }
  608. [scanner commitTransaction:NO];
  609. break;
  610. case MMListTypeNumbered:
  611. [scanner beginTransaction];
  612. NSUInteger numOfNums = [scanner skipCharactersFromSet:[NSCharacterSet decimalDigitCharacterSet]];
  613. if (numOfNums != 0)
  614. {
  615. unichar nextChar = scanner.nextCharacter;
  616. if (nextChar == '.')
  617. {
  618. [scanner advance];
  619. if (scanner.nextCharacter == ' ')
  620. {
  621. [scanner advance];
  622. [scanner commitTransaction:YES];
  623. return YES;
  624. }
  625. }
  626. }
  627. [scanner commitTransaction:NO];
  628. break;
  629. }
  630. return NO;
  631. }
  632. - (MMElement *)_parseListItemWithScanner:(MMScanner *)scanner listType:(MMListType)listType
  633. {
  634. BOOL canContainBlocks = NO;
  635. if ([scanner skipEmptyLines])
  636. {
  637. canContainBlocks = YES;
  638. }
  639. [scanner skipIndentationUpTo:3]; // Optional space
  640. BOOL foundAnItem = [self _parseListMarkerWithScanner:scanner listType:listType];
  641. if (!foundAnItem)
  642. return nil;
  643. [scanner skipCharactersFromSet:NSCharacterSet.whitespaceCharacterSet];
  644. MMElement *element = [MMElement new];
  645. element.type = MMElementTypeListItem;
  646. BOOL afterBlankLine = NO;
  647. NSUInteger nestedListIndex = NSNotFound;
  648. NSUInteger nestedListIndentation = 0;
  649. while (!scanner.atEndOfString)
  650. {
  651. // Skip over any empty lines
  652. [scanner beginTransaction];
  653. NSUInteger numOfEmptyLines = [scanner skipEmptyLines];
  654. afterBlankLine = numOfEmptyLines != 0;
  655. // Check for a horizontal rule
  656. [scanner beginTransaction];
  657. BOOL newRule = [self _parseHorizontalRuleWithScanner:scanner] != nil;
  658. [scanner commitTransaction:NO];
  659. if (newRule)
  660. {
  661. [scanner commitTransaction:NO];
  662. break;
  663. }
  664. // Check for the start of a new list item
  665. [scanner beginTransaction];
  666. [scanner skipIndentationUpTo:1];
  667. BOOL newMarker = [self _parseListMarkerWithScanner:scanner listType:listType];
  668. [scanner commitTransaction:NO];
  669. if (newMarker)
  670. {
  671. [scanner commitTransaction:NO];
  672. if (afterBlankLine)
  673. {
  674. canContainBlocks = YES;
  675. }
  676. break;
  677. }
  678. // Check for a nested list
  679. [scanner beginTransaction];
  680. NSUInteger indentation = [scanner skipIndentationUpTo:4];
  681. [scanner beginTransaction];
  682. BOOL newList = [self _parseListMarkerWithScanner:scanner listType:MMListTypeBulleted]
  683. || [self _parseListMarkerWithScanner:scanner listType:MMListTypeNumbered];
  684. [scanner commitTransaction:NO];
  685. if (indentation >= 2 && newList && nestedListIndex == NSNotFound)
  686. {
  687. [element addInnerRange:NSMakeRange(scanner.location, 0)];
  688. nestedListIndex = element.innerRanges.count;
  689. [element addInnerRange:scanner.currentRange];
  690. [scanner commitTransaction:YES];
  691. [scanner commitTransaction:YES];
  692. [scanner advanceToNextLine];
  693. nestedListIndentation = indentation;
  694. continue;
  695. }
  696. [scanner commitTransaction:NO];
  697. if (afterBlankLine)
  698. {
  699. // Must be 4 spaces past the indentation level to start a new paragraph
  700. [scanner beginTransaction];
  701. NSUInteger indentation = [scanner skipIndentationUpTo:4];
  702. if (indentation < 4)
  703. {
  704. [scanner commitTransaction:NO];
  705. [scanner commitTransaction:NO];
  706. break;
  707. }
  708. [scanner commitTransaction:YES];
  709. [scanner commitTransaction:YES];
  710. [element addInnerRange:NSMakeRange(scanner.location, 0)];
  711. canContainBlocks = YES;
  712. }
  713. else
  714. {
  715. [scanner commitTransaction:YES];
  716. // Don't skip past where a nested list would start because that list
  717. // could have its own nested list, so the whitespace will be needed.
  718. [scanner skipIndentationUpTo:nestedListIndentation];
  719. }
  720. if (nestedListIndex != NSNotFound)
  721. {
  722. [element addInnerRange:scanner.currentRange];
  723. [scanner advanceToNextLine];
  724. }
  725. else
  726. {
  727. [self _addTextLineToElement:element withScanner:scanner];
  728. }
  729. [scanner beginTransaction];
  730. [scanner skipIndentationUpTo:4];
  731. if (scanner.nextCharacter == '>')
  732. {
  733. //
  734. // If next line is start with blockquote mark
  735. // then break current list parsering.
  736. //
  737. // for example:
  738. //
  739. // > 123
  740. // + abc
  741. //
  742. // "+ abs" should not consider as part of blockquote
  743. //
  744. // > 234
  745. // 567
  746. //
  747. // "567" is part of the blockquote
  748. //
  749. [scanner commitTransaction:NO];
  750. break;
  751. }
  752. [scanner commitTransaction:NO];
  753. }
  754. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  755. if (element.innerRanges.count > 0)
  756. {
  757. if (nestedListIndex != NSNotFound)
  758. {
  759. NSArray *preListRanges = [element.innerRanges subarrayWithRange:NSMakeRange(0, nestedListIndex)];
  760. NSArray *postListRanges = [element.innerRanges subarrayWithRange:NSMakeRange(nestedListIndex, element.innerRanges.count - nestedListIndex)];
  761. MMScanner *preListScanner = [MMScanner scannerWithString:scanner.string lineRanges:preListRanges];
  762. MMScanner *postListScanner = [MMScanner scannerWithString:scanner.string lineRanges:postListRanges];
  763. if (canContainBlocks)
  764. {
  765. element.children = [self _parseElementsWithScanner:preListScanner];
  766. }
  767. else
  768. {
  769. element.children = [self.spanParser parseSpansInBlockElement:element withScanner:preListScanner];
  770. }
  771. element.children = [element.children arrayByAddingObjectsFromArray:[self _parseElementsWithScanner:postListScanner]];
  772. }
  773. else
  774. {
  775. MMScanner *innerScanner = [MMScanner scannerWithString:scanner.string lineRanges:element.innerRanges];
  776. if (canContainBlocks)
  777. {
  778. element.children = [self _parseElementsWithScanner:innerScanner];
  779. }
  780. else
  781. {
  782. element.children = [self.spanParser parseSpansInBlockElement:element withScanner:innerScanner];
  783. }
  784. }
  785. }
  786. return element;
  787. }
  788. - (MMElement *)_parseListWithScanner:(MMScanner *)scanner
  789. {
  790. [scanner beginTransaction];
  791. [scanner skipIndentationUpTo:3]; // Optional space
  792. unichar nextChar = scanner.nextCharacter;
  793. BOOL isBulleted = (nextChar == '*' || nextChar == '-' || nextChar == '+');
  794. MMListType listType = isBulleted ? MMListTypeBulleted : MMListTypeNumbered;
  795. BOOL hasMarker = [self _parseListMarkerWithScanner:scanner listType:listType];
  796. [scanner commitTransaction:NO];
  797. if (!hasMarker)
  798. return nil;
  799. MMElement *element = [MMElement new];
  800. element.type = isBulleted ? MMElementTypeBulletedList : MMElementTypeNumberedList;
  801. while (!scanner.atEndOfString)
  802. {
  803. [scanner beginTransaction];
  804. // Check for a horizontal rule first -- they look like a list marker
  805. [scanner skipEmptyLines];
  806. MMElement *rule = [self _parseHorizontalRuleWithScanner:scanner];
  807. [scanner commitTransaction:NO];
  808. if (rule)
  809. break;
  810. [scanner beginTransaction];
  811. MMElement *item = [self _parseListItemWithScanner:scanner listType:listType];
  812. if (!item)
  813. {
  814. [scanner commitTransaction:NO];
  815. break;
  816. }
  817. [scanner commitTransaction:YES];
  818. [element addChild:item];
  819. }
  820. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  821. return element;
  822. }
  823. - (MMElement *)_parseLinkDefinitionWithScanner:(MMScanner *)scanner
  824. {
  825. NSUInteger location;
  826. NSUInteger length;
  827. NSCharacterSet *whitespaceSet = NSCharacterSet.whitespaceCharacterSet;
  828. [scanner skipIndentationUpTo:3];
  829. // find the identifier
  830. location = scanner.location;
  831. length = [scanner skipNestedBracketsWithDelimiter:'['];
  832. if (length == 0)
  833. return nil;
  834. NSRange idRange = NSMakeRange(location+1, length-2);
  835. // and the semicolon
  836. if (scanner.nextCharacter != ':')
  837. return nil;
  838. [scanner advance];
  839. // skip any whitespace
  840. [scanner skipCharactersFromSet:whitespaceSet];
  841. // find the url
  842. location = scanner.location;
  843. [scanner skipCharactersFromSet:[whitespaceSet invertedSet]];
  844. NSRange urlRange = NSMakeRange(location, scanner.location-location);
  845. NSString *urlString = [scanner.string substringWithRange:urlRange];
  846. // Check if the URL is surrounded by angle brackets
  847. if ([urlString hasPrefix:@"<"] && [urlString hasSuffix:@">"])
  848. {
  849. urlString = [urlString substringWithRange:NSMakeRange(1, urlString.length-2)];
  850. }
  851. // skip trailing whitespace
  852. [scanner skipCharactersFromSet:whitespaceSet];
  853. // If at the end of the line, then try to find the title on the next line
  854. [scanner beginTransaction];
  855. if (scanner.atEndOfLine)
  856. {
  857. [scanner advanceToNextLine];
  858. [scanner skipCharactersFromSet:whitespaceSet];
  859. }
  860. // check for a title
  861. NSRange titleRange = NSMakeRange(NSNotFound, 0);
  862. unichar nextChar = scanner.nextCharacter;
  863. if (nextChar == '"' || nextChar == '\'' || nextChar == '(')
  864. {
  865. [scanner advance];
  866. unichar endChar = (nextChar == '(') ? ')' : nextChar;
  867. NSUInteger titleLocation = scanner.location;
  868. NSUInteger titleLength = [scanner skipToLastCharacterOfLine];
  869. if (scanner.nextCharacter == endChar)
  870. {
  871. [scanner advance];
  872. titleRange = NSMakeRange(titleLocation, titleLength);
  873. }
  874. }
  875. [scanner commitTransaction:titleRange.location != NSNotFound];
  876. // skip trailing whitespace
  877. [scanner skipCharactersFromSet:whitespaceSet];
  878. // make sure we're at the end of the line
  879. if (!scanner.atEndOfLine)
  880. return nil;
  881. MMElement *element = [MMElement new];
  882. element.type = MMElementTypeDefinition;
  883. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  884. element.identifier = [scanner.string substringWithRange:idRange];
  885. element.href = urlString;
  886. if (titleRange.location != NSNotFound)
  887. {
  888. element.title = [scanner.string substringWithRange:titleRange];
  889. }
  890. return element;
  891. }
  892. - (MMElement *)_parseParagraphWithScanner:(MMScanner *)scanner
  893. {
  894. MMElement *element = [MMElement new];
  895. element.type = MMElementTypeParagraph;
  896. NSCharacterSet *whitespaceSet = NSCharacterSet.whitespaceCharacterSet;
  897. while (!scanner.atEndOfString)
  898. {
  899. [scanner skipWhitespace];
  900. if (scanner.atEndOfLine)
  901. {
  902. [scanner advanceToNextLine];
  903. break;
  904. }
  905. // Check for a blockquote
  906. [scanner beginTransaction];
  907. [scanner skipCharactersFromSet:whitespaceSet];
  908. if (scanner.nextCharacter == '>')
  909. {
  910. [scanner commitTransaction:YES];
  911. break;
  912. }
  913. [scanner commitTransaction:NO];
  914. BOOL hasElement;
  915. // Check for a link definition
  916. [scanner beginTransaction];
  917. hasElement = [self _parseLinkDefinitionWithScanner:scanner] != nil;
  918. [scanner commitTransaction:NO];
  919. if (hasElement)
  920. break;
  921. // Check for an underlined header
  922. [scanner beginTransaction];
  923. hasElement = [self _parseUnderlinedHeaderWithScanner:scanner] != nil;
  924. [scanner commitTransaction:NO];
  925. if (hasElement)
  926. break;
  927. // Also check for a prefixed header
  928. [scanner beginTransaction];
  929. hasElement = [self _parsePrefixHeaderWithScanner:scanner] != nil;
  930. [scanner commitTransaction:NO];
  931. if (hasElement)
  932. break;
  933. // Check for a fenced code block under GFM
  934. if (self.extensions & MMMarkdownExtensionsFencedCodeBlocks)
  935. {
  936. [scanner beginTransaction];
  937. hasElement = [self _parseFencedCodeBlockWithScanner:scanner] != nil;
  938. [scanner commitTransaction:NO];
  939. if (hasElement)
  940. break;
  941. }
  942. // Check for a list item
  943. [scanner beginTransaction];
  944. [scanner skipIndentationUpTo:2];
  945. hasElement = [self _parseListMarkerWithScanner:scanner listType:MMListTypeBulleted]
  946. || [self _parseListMarkerWithScanner:scanner listType:MMListTypeNumbered];
  947. [scanner commitTransaction:NO];
  948. if (hasElement)
  949. break;
  950. [self _addTextLineToElement:element withScanner:scanner];
  951. }
  952. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  953. if (element.innerRanges.count == 0)
  954. return nil;
  955. MMScanner *innerScanner = [MMScanner scannerWithString:scanner.string lineRanges:element.innerRanges];
  956. element.children = [self.spanParser parseSpansInBlockElement:element withScanner:innerScanner];
  957. return element;
  958. }
  959. - (NSArray *)_parseTableHeaderWithScanner:(MMScanner *)scanner
  960. {
  961. NSCharacterSet *dashSet = [NSCharacterSet characterSetWithCharactersInString:@"-"];
  962. [scanner skipWhitespace];
  963. if (scanner.nextCharacter == '|')
  964. [scanner advance];
  965. [scanner skipWhitespace];
  966. NSMutableArray *alignments = [NSMutableArray new];
  967. while (!scanner.atEndOfLine)
  968. {
  969. BOOL left = NO;
  970. if (scanner.nextCharacter == ':')
  971. {
  972. left = YES;
  973. [scanner advance];
  974. }
  975. NSUInteger dashes = [scanner skipCharactersFromSet:dashSet];
  976. if (dashes < 3)
  977. return nil;
  978. BOOL right = NO;
  979. if (scanner.nextCharacter == ':')
  980. {
  981. right = YES;
  982. [scanner advance];
  983. }
  984. MMTableCellAlignment alignment
  985. = left && right ? MMTableCellAlignmentCenter
  986. : left ? MMTableCellAlignmentLeft
  987. : right ? MMTableCellAlignmentRight
  988. : MMTableCellAlignmentNone;
  989. [alignments addObject:@(alignment)];
  990. [scanner skipWhitespace];
  991. if (scanner.nextCharacter != '|')
  992. break;
  993. [scanner advance];
  994. [scanner skipWhitespace];
  995. }
  996. if (!scanner.atEndOfLine)
  997. return nil;
  998. return alignments;
  999. }
  1000. - (MMElement *)_parseTableRowWithScanner:(MMScanner *)scanner columns:(NSArray *)columns
  1001. {
  1002. NSMutableCharacterSet *trimmingSet = NSMutableCharacterSet.whitespaceCharacterSet;
  1003. [trimmingSet addCharactersInString:@"|"];
  1004. NSValue *lineRange = [NSValue valueWithRange:scanner.currentRange];
  1005. MMScanner *lineScanner = [MMScanner scannerWithString:scanner.string lineRanges:@[ lineRange ]];
  1006. [lineScanner skipCharactersFromSet:trimmingSet];
  1007. NSArray *cells = [self.spanParser parseSpansInTableColumns:columns withScanner:lineScanner];
  1008. [lineScanner skipCharactersFromSet:trimmingSet];
  1009. if (!cells || !lineScanner.atEndOfLine)
  1010. return nil;
  1011. [scanner advanceToNextLine];
  1012. MMElement *row = [MMElement new];
  1013. row.type = MMElementTypeTableRow;
  1014. row.children = cells;
  1015. row.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  1016. return row;
  1017. }
  1018. - (MMElement *)_parseTableWithScanner:(MMScanner *)scanner
  1019. {
  1020. // Look for the header first
  1021. [scanner advanceToNextLine];
  1022. NSArray *alignments = [self _parseTableHeaderWithScanner:scanner];
  1023. if (!alignments)
  1024. return nil;
  1025. // Undo the outer transaction to begin at the header content again
  1026. [scanner commitTransaction:NO];
  1027. [scanner beginTransaction];
  1028. MMElement *header = [self _parseTableRowWithScanner:scanner columns:alignments];
  1029. if (!header)
  1030. return nil;
  1031. header.type = MMElementTypeTableHeader;
  1032. for (MMElement *cell in header.children)
  1033. cell.type = MMElementTypeTableHeaderCell;
  1034. [scanner advanceToNextLine];
  1035. NSMutableArray *rows = [NSMutableArray arrayWithObject:header];
  1036. while (!scanner.atEndOfString)
  1037. {
  1038. [scanner beginTransaction];
  1039. MMElement *row = [self _parseTableRowWithScanner:scanner columns:alignments];
  1040. [scanner commitTransaction:row != nil];
  1041. if (row == nil)
  1042. break;
  1043. [rows addObject:row];
  1044. }
  1045. if (rows.count < 2)
  1046. return nil;
  1047. MMElement *table = [MMElement new];
  1048. table.type = MMElementTypeTable;
  1049. table.children = rows;
  1050. table.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  1051. return table;
  1052. }
  1053. - (void)_updateLinksFromDefinitionsInDocument:(MMDocument *)document
  1054. {
  1055. NSMutableArray *references = [NSMutableArray new];
  1056. NSMutableDictionary *definitions = [NSMutableDictionary new];
  1057. NSMutableArray *queue = [NSMutableArray new];
  1058. [queue addObjectsFromArray:document.elements];
  1059. // First, find the references and definitions
  1060. while (queue.count > 0)
  1061. {
  1062. MMElement *element = [queue objectAtIndex:0];
  1063. [queue removeObjectAtIndex:0];
  1064. [queue addObjectsFromArray:element.children];
  1065. switch (element.type)
  1066. {
  1067. case MMElementTypeDefinition:
  1068. definitions[element.identifier.lowercaseString] = element;
  1069. break;
  1070. case MMElementTypeImage:
  1071. case MMElementTypeLink:
  1072. if (element.identifier && !element.href)
  1073. {
  1074. [references addObject:element];
  1075. }
  1076. break;
  1077. default:
  1078. break;
  1079. }
  1080. }
  1081. // Set the hrefs for all the references
  1082. for (MMElement *link in references)
  1083. {
  1084. MMElement *definition = definitions[link.identifier.lowercaseString];
  1085. // If there's no definition, change the link to a text element and remove its children
  1086. if (!definition)
  1087. {
  1088. link.type = MMElementTypeNone;
  1089. while (link.children.count > 0)
  1090. {
  1091. [link removeLastChild];
  1092. }
  1093. }
  1094. // otherwise, set the href and title
  1095. {
  1096. link.href = definition.href;
  1097. link.title = definition.title;
  1098. }
  1099. }
  1100. }
  1101. @end