Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

1299 lines
40KB

  1. //
  2. // MMSpanParser.m
  3. // MMMarkdown
  4. //
  5. // Copyright (c) 2012 Matt Diephouse.
  6. //
  7. // Permission is hereby granted, free of charge, to any person obtaining a copy
  8. // of this software and associated documentation files (the "Software"), to deal
  9. // in the Software without restriction, including without limitation the rights
  10. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. // copies of the Software, and to permit persons to whom the Software is
  12. // furnished to do so, subject to the following conditions:
  13. //
  14. // The above copyright notice and this permission notice shall be included in
  15. // all copies or substantial portions of the Software.
  16. //
  17. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. // THE SOFTWARE.
  24. //
  25. #import "MMSpanParser.h"
  26. #import "MMElement.h"
  27. #import "MMHTMLParser.h"
  28. #import "MMScanner.h"
  29. static NSString * const ESCAPABLE_CHARS = @"\\`*_{}[]()#+-.!>";
  30. @interface MMSpanParser ()
  31. @property (assign, nonatomic, readonly) MMMarkdownExtensions extensions;
  32. @property (strong, nonatomic, readonly) MMHTMLParser *htmlParser;
  33. @property (strong, nonatomic) NSMutableArray *elements;
  34. @property (strong, nonatomic) NSMutableArray *openElements;
  35. @property (strong, nonatomic) MMElement *blockElement;
  36. @property (assign, nonatomic) BOOL parseEm;
  37. @property (assign, nonatomic) BOOL parseImages;
  38. @property (assign, nonatomic) BOOL parseLinks;
  39. @property (assign, nonatomic) BOOL parseStrong;
  40. @end
  41. @implementation MMSpanParser
  42. #pragma mark - Public Methods
  43. - (id)initWithExtensions:(MMMarkdownExtensions)extensions
  44. {
  45. self = [super init];
  46. if (self)
  47. {
  48. _extensions = extensions;
  49. _htmlParser = [MMHTMLParser new];
  50. self.parseEm = YES;
  51. self.parseImages = YES;
  52. self.parseLinks = YES;
  53. self.parseStrong = YES;
  54. }
  55. return self;
  56. }
  57. - (NSArray *)parseSpansInBlockElement:(MMElement *)block withScanner:(MMScanner *)scanner
  58. {
  59. self.blockElement = block;
  60. [scanner skipWhitespace];
  61. return [self _parseWithScanner:scanner untilTestPasses:^{ return scanner.atEndOfString; }];
  62. }
  63. - (NSArray *)parseSpansInTableColumns:(NSArray *)columns withScanner:(MMScanner *)scanner
  64. {
  65. NSMutableArray *cells = [NSMutableArray new];
  66. for (NSNumber *alignment in columns)
  67. {
  68. [scanner skipWhitespace];
  69. NSUInteger startLocation = scanner.location;
  70. NSArray *spans = scanner.nextCharacter == '|' ? @[] : [self _parseWithScanner:scanner untilTestPasses:^ BOOL {
  71. [scanner skipWhitespace];
  72. return scanner.nextCharacter == '|' || scanner.atEndOfLine;
  73. }];
  74. if (!spans)
  75. return nil;
  76. MMElement *cell = [MMElement new];
  77. cell.type = MMElementTypeTableRowCell;
  78. cell.children = spans;
  79. cell.range = NSMakeRange(startLocation, scanner.location-startLocation);
  80. cell.alignment = alignment.integerValue;
  81. [cells addObject:cell];
  82. if (scanner.nextCharacter == '|')
  83. [scanner advance];
  84. }
  85. return cells;
  86. }
  87. #pragma mark - Private Methods
  88. - (NSArray *)_parseWithScanner:(MMScanner *)scanner untilTestPasses:(BOOL (^)(void))test
  89. {
  90. NSMutableArray *result = [NSMutableArray array];
  91. NSCharacterSet *specialChars = [NSCharacterSet characterSetWithCharactersInString:@"\\`*_<&[! ~w:@|"];
  92. NSCharacterSet *boringChars = [specialChars invertedSet];
  93. [scanner beginTransaction];
  94. while (!scanner.atEndOfString)
  95. {
  96. MMElement *element = [self _parseNextElementWithScanner:scanner];
  97. if (element)
  98. {
  99. if (scanner.startLocation != element.range.location)
  100. {
  101. MMElement *text = [MMElement new];
  102. text.type = MMElementTypeNone;
  103. text.range = NSMakeRange(scanner.startLocation, element.range.location-scanner.startLocation);
  104. [result addObject:text];
  105. }
  106. [result addObject:element];
  107. [scanner commitTransaction:YES];
  108. [scanner beginTransaction];
  109. }
  110. else if (scanner.atEndOfLine)
  111. {
  112. // This is done here (and not in _parseNextElementWithScanner:)
  113. // because it can result in 2 elements.
  114. if (scanner.startLocation != scanner.location)
  115. {
  116. MMElement *text = [MMElement new];
  117. text.type = MMElementTypeNone;
  118. text.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  119. [result addObject:text];
  120. }
  121. if (self.extensions & MMMarkdownExtensionsHardNewlines && self.blockElement.type == MMElementTypeParagraph)
  122. {
  123. MMElement *lineBreak = [MMElement new];
  124. lineBreak.range = NSMakeRange(scanner.location, 1);
  125. lineBreak.type = MMElementTypeLineBreak;
  126. [result addObject:lineBreak];
  127. }
  128. // Add a newline
  129. MMElement *newline = [MMElement new];
  130. newline.range = NSMakeRange(scanner.location, 1);
  131. newline.type = MMElementTypeEntity;
  132. newline.stringValue = @"\n";
  133. [result addObject:newline];
  134. [scanner advanceToNextLine];
  135. [scanner commitTransaction:YES];
  136. [scanner beginTransaction];
  137. }
  138. else if ([scanner skipCharactersFromSet:boringChars])
  139. {
  140. }
  141. else
  142. {
  143. [scanner advance];
  144. }
  145. // Check for the end character
  146. [scanner beginTransaction];
  147. NSUInteger location = scanner.location;
  148. if (test())
  149. {
  150. [scanner commitTransaction:YES];
  151. if (scanner.startLocation != location)
  152. {
  153. MMElement *text = [MMElement new];
  154. text.type = MMElementTypeNone;
  155. text.range = NSMakeRange(scanner.startLocation, location-scanner.startLocation);
  156. [result addObject:text];
  157. }
  158. [scanner commitTransaction:YES];
  159. return result;
  160. }
  161. [scanner commitTransaction:NO];
  162. }
  163. [scanner commitTransaction:NO];
  164. return nil;
  165. }
  166. - (MMElement *)_parseNextElementWithScanner:(MMScanner *)scanner
  167. {
  168. MMElement *element;
  169. if (self.extensions & MMMarkdownExtensionsStrikethroughs)
  170. {
  171. [scanner beginTransaction];
  172. element = [self _parseStrikethroughWithScanner:scanner];
  173. [scanner commitTransaction:element != nil];
  174. if (element)
  175. return element;
  176. }
  177. // URL Autolinking
  178. if (self.parseLinks && self.extensions & MMMarkdownExtensionsAutolinkedURLs)
  179. {
  180. [scanner beginTransaction];
  181. element = [self _parseAutolinkEmailAddressWithScanner:scanner];
  182. [scanner commitTransaction:element != nil];
  183. if (element)
  184. return element;
  185. [scanner beginTransaction];
  186. element = [self _parseAutolinkURLWithScanner:scanner];
  187. [scanner commitTransaction:element != nil];
  188. if (element)
  189. return element;
  190. [scanner beginTransaction];
  191. element = [self _parseAutolinkWWWURLWithScanner:scanner];
  192. [scanner commitTransaction:element != nil];
  193. if (element)
  194. return element;
  195. }
  196. [scanner beginTransaction];
  197. element = [self _parseBackslashWithScanner:scanner];
  198. [scanner commitTransaction:element != nil];
  199. if (element)
  200. return element;
  201. [scanner beginTransaction];
  202. element = [self _parseEmAndStrongWithScanner:scanner];
  203. [scanner commitTransaction:element != nil];
  204. if (element)
  205. return element;
  206. [scanner beginTransaction];
  207. element = [self _parseCodeSpanWithScanner:scanner];
  208. [scanner commitTransaction:element != nil];
  209. if (element)
  210. return element;
  211. [scanner beginTransaction];
  212. element = [self _parseLineBreakWithScanner:scanner];
  213. [scanner commitTransaction:element != nil];
  214. if (element)
  215. return element;
  216. if (self.parseLinks)
  217. {
  218. [scanner beginTransaction];
  219. element = [self _parseAutomaticLinkWithScanner:scanner];
  220. [scanner commitTransaction:element != nil];
  221. if (element)
  222. return element;
  223. [scanner beginTransaction];
  224. element = [self _parseAutomaticEmailLinkWithScanner:scanner];
  225. [scanner commitTransaction:element != nil];
  226. if (element)
  227. return element;
  228. [scanner beginTransaction];
  229. element = [self _parseLinkWithScanner:scanner];
  230. [scanner commitTransaction:element != nil];
  231. if (element)
  232. return element;
  233. }
  234. if (self.parseImages)
  235. {
  236. [scanner beginTransaction];
  237. element = [self _parseImageWithScanner:scanner];
  238. [scanner commitTransaction:element != nil];
  239. if (element)
  240. return element;
  241. }
  242. [scanner beginTransaction];
  243. element = [self.htmlParser parseInlineTagWithScanner:scanner];
  244. [scanner commitTransaction:element != nil];
  245. if (element)
  246. return element;
  247. [scanner beginTransaction];
  248. element = [self.htmlParser parseCommentWithScanner:scanner];
  249. [scanner commitTransaction:element != nil];
  250. if (element)
  251. return element;
  252. [scanner beginTransaction];
  253. element = [self _parseAmpersandWithScanner:scanner];
  254. [scanner commitTransaction:element != nil];
  255. if (element)
  256. return element;
  257. [scanner beginTransaction];
  258. element = [self _parseLeftAngleBracketWithScanner:scanner];
  259. [scanner commitTransaction:element != nil];
  260. if (element)
  261. return element;
  262. return nil;
  263. }
  264. - (BOOL)_parseAutolinkDomainWithScanner:(MMScanner *)scanner
  265. {
  266. NSCharacterSet *alphanumerics = NSCharacterSet.alphanumericCharacterSet;
  267. NSMutableCharacterSet *domainChars = [alphanumerics mutableCopy];
  268. [domainChars addCharactersInString:@"-:"];
  269. // Domain should be at least one alphanumeric
  270. if (![alphanumerics characterIsMember:scanner.nextCharacter])
  271. return NO;
  272. [scanner skipCharactersFromSet:domainChars];
  273. // Dot between domain and TLD
  274. if (scanner.nextCharacter != '.')
  275. return NO;
  276. [scanner advance];
  277. // TLD must be at least 1 character
  278. if ([scanner skipCharactersFromSet:domainChars] == 0)
  279. return NO;
  280. return YES;
  281. }
  282. - (void)_parseAutolinkPathWithScanner:(MMScanner *)scanner
  283. {
  284. NSCharacterSet *alphanumerics = NSCharacterSet.alphanumericCharacterSet;
  285. NSMutableCharacterSet *boringChars = [alphanumerics mutableCopy];
  286. [boringChars addCharactersInString:@",_-/:?&;%~!#+=@"];
  287. NSUInteger parenLevel = 0;
  288. while (1)
  289. {
  290. if ([scanner skipCharactersFromSet:boringChars] > 0)
  291. {
  292. continue;
  293. }
  294. else if (scanner.nextCharacter == '\\')
  295. {
  296. [scanner advance];
  297. if (scanner.nextCharacter == '(' || scanner.nextCharacter == ')')
  298. [scanner advance];
  299. }
  300. else if (scanner.nextCharacter == '(')
  301. {
  302. parenLevel++;
  303. [scanner advance];
  304. }
  305. else if (scanner.nextCharacter == ')' && parenLevel > 0)
  306. {
  307. parenLevel--;
  308. [scanner advance];
  309. }
  310. else if (scanner.nextCharacter == '.')
  311. {
  312. // Can't end on a '.'
  313. [scanner beginTransaction];
  314. [scanner advance];
  315. if ([boringChars characterIsMember:scanner.nextCharacter])
  316. {
  317. [scanner commitTransaction:YES];
  318. }
  319. else
  320. {
  321. [scanner commitTransaction:NO];
  322. break;
  323. }
  324. }
  325. else
  326. {
  327. break;
  328. }
  329. }
  330. }
  331. - (MMElement *)_parseAutolinkEmailAddressWithScanner:(MMScanner *)scanner
  332. {
  333. if (scanner.nextCharacter != '@')
  334. return nil;
  335. NSCharacterSet *alphanumerics = NSCharacterSet.alphanumericCharacterSet;
  336. NSMutableCharacterSet *localChars = [alphanumerics mutableCopy];
  337. [localChars addCharactersInString:@"._-+"];
  338. NSMutableCharacterSet *domainChars = [alphanumerics mutableCopy];
  339. [domainChars addCharactersInString:@"._-"];
  340. // Look for the previous word outside of the current transaction
  341. [scanner commitTransaction:NO];
  342. NSString *localPart = [scanner previousWordWithCharactersFromSet:localChars];
  343. [scanner beginTransaction];
  344. if (localPart.length == 0)
  345. return nil;
  346. // '@'
  347. [scanner advance];
  348. NSString *domainPart = [scanner nextWordWithCharactersFromSet:localChars];
  349. // Must end on a letter or number
  350. NSRange lastAlphanum = [domainPart rangeOfCharacterFromSet:alphanumerics options:NSBackwardsSearch];
  351. if (lastAlphanum.location == NSNotFound)
  352. return nil;
  353. domainPart = [domainPart substringToIndex:NSMaxRange(lastAlphanum)];
  354. // Must contain at least one .
  355. if ([domainPart rangeOfString:@"."].location == NSNotFound)
  356. return nil;
  357. scanner.location += domainPart.length;
  358. NSUInteger startLocation = scanner.startLocation - localPart.length;
  359. NSRange range = NSMakeRange(startLocation, scanner.location-startLocation);
  360. MMElement *element = [MMElement new];
  361. element.type = MMElementTypeMailTo;
  362. element.range = range;
  363. element.href = [scanner.string substringWithRange:range];
  364. return element;
  365. }
  366. - (MMElement *)_parseAutolinkURLWithScanner:(MMScanner *)scanner
  367. {
  368. if (scanner.nextCharacter != ':')
  369. return nil;
  370. NSArray *protocols = @[ @"https", @"http", @"ftp" ];
  371. // Look for the previous word outside of the current transaction
  372. [scanner commitTransaction:NO];
  373. NSString *previousWord = scanner.previousWord;
  374. [scanner beginTransaction];
  375. if (![protocols containsObject:previousWord.lowercaseString])
  376. return nil;
  377. if (![scanner matchString:@"://"])
  378. return nil;
  379. if (![self _parseAutolinkDomainWithScanner:scanner])
  380. return nil;
  381. [self _parseAutolinkPathWithScanner:scanner];
  382. NSUInteger startLocation = scanner.startLocation - previousWord.length;
  383. NSRange range = NSMakeRange(startLocation, scanner.location-startLocation);
  384. MMElement *element = [MMElement new];
  385. element.type = MMElementTypeLink;
  386. element.range = range;
  387. element.href = [scanner.string substringWithRange:range];
  388. MMElement *text = [MMElement new];
  389. text.type = MMElementTypeNone;
  390. text.range = range;
  391. [element addChild:text];
  392. return element;
  393. }
  394. - (MMElement *)_parseAutolinkWWWURLWithScanner:(MMScanner *)scanner
  395. {
  396. if (![scanner matchString:@"www."])
  397. return nil;
  398. if (![self _parseAutolinkDomainWithScanner:scanner])
  399. return nil;
  400. [self _parseAutolinkPathWithScanner:scanner];
  401. NSRange range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  402. NSString *link = [scanner.string substringWithRange:range];
  403. MMElement *element = [MMElement new];
  404. element.type = MMElementTypeLink;
  405. element.range = range;
  406. element.href = [@"http://" stringByAppendingString:link];
  407. MMElement *text = [MMElement new];
  408. text.type = MMElementTypeNone;
  409. text.range = range;
  410. [element addChild:text];
  411. return element;
  412. }
  413. - (MMElement *)_parseStrikethroughWithScanner:(MMScanner *)scanner
  414. {
  415. if (![scanner matchString:@"~~"])
  416. return nil;
  417. NSCharacterSet *whitespaceSet = NSCharacterSet.whitespaceCharacterSet;
  418. NSArray *children = [self _parseWithScanner:scanner untilTestPasses:^{
  419. // Can't be at the beginning of the line
  420. if (scanner.atBeginningOfLine)
  421. return NO;
  422. // Must follow the end of a word
  423. if ([whitespaceSet characterIsMember:scanner.previousCharacter])
  424. return NO;
  425. if (![scanner matchString:@"~~"])
  426. return NO;
  427. return YES;
  428. }];
  429. if (!children)
  430. return nil;
  431. MMElement *element = [MMElement new];
  432. element.type = MMElementTypeStrikethrough;
  433. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  434. element.children = children;
  435. return element;
  436. }
  437. - (MMElement *)_parseEmAndStrongWithScanner:(MMScanner *)scanner
  438. {
  439. // Must have 1-3 *s or _s
  440. unichar character = scanner.nextCharacter;
  441. if (!(character == '*' || character == '_'))
  442. return nil;
  443. NSCharacterSet *alphanumericSet = NSCharacterSet.alphanumericCharacterSet;
  444. if (self.extensions & MMMarkdownExtensionsUnderscoresInWords && character == '_')
  445. {
  446. // GFM doesn't italicize parts of words
  447. [scanner commitTransaction:NO];
  448. // Look for the previous char outside of the current transaction
  449. unichar prevChar = scanner.previousCharacter;
  450. [scanner beginTransaction];
  451. BOOL isWordChar = [alphanumericSet characterIsMember:prevChar];
  452. if (isWordChar)
  453. return nil;
  454. }
  455. // Must not be preceded by one of the same
  456. if (scanner.previousCharacter == character)
  457. return nil;
  458. NSUInteger numberOfChars = 0;
  459. while (scanner.nextCharacter == character)
  460. {
  461. numberOfChars++;
  462. [scanner advance];
  463. }
  464. if (numberOfChars > 3)
  465. return nil;
  466. BOOL parseEm = numberOfChars == 1 || numberOfChars == 3;
  467. BOOL parseStrong = numberOfChars == 2 || numberOfChars == 3;
  468. if ((parseEm && !self.parseEm) || (parseStrong && !self.parseStrong))
  469. return nil;
  470. NSCharacterSet *whitespaceSet = NSCharacterSet.whitespaceCharacterSet;
  471. __block NSUInteger remainingChars = numberOfChars;
  472. BOOL (^atEnd)(void) = ^{
  473. // Can't be at the beginning of the line
  474. if (scanner.atBeginningOfLine)
  475. return NO;
  476. // Must follow the end of a word
  477. if ([whitespaceSet characterIsMember:scanner.previousCharacter])
  478. return NO;
  479. // Must have 1-3 *s or _s
  480. NSUInteger numberOfEndChars = 0;
  481. while (scanner.nextCharacter == character && numberOfEndChars < remainingChars)
  482. {
  483. numberOfEndChars++;
  484. [scanner advance];
  485. }
  486. if (numberOfEndChars == 0 || (numberOfEndChars != remainingChars && remainingChars != 3))
  487. return NO;
  488. if (self.extensions & MMMarkdownExtensionsUnderscoresInWords && character == '_')
  489. {
  490. // GFM doesn't italicize parts of words
  491. unichar nextChar = scanner.nextCharacter;
  492. BOOL isWordChar = [alphanumericSet characterIsMember:nextChar];
  493. if (isWordChar)
  494. return NO;
  495. }
  496. remainingChars -= numberOfEndChars;
  497. return YES;
  498. };
  499. if (parseEm)
  500. self.parseEm = NO;
  501. if (parseStrong)
  502. self.parseStrong = NO;
  503. NSArray *children = [self _parseWithScanner:scanner untilTestPasses:atEnd];
  504. if (parseEm && (!children || remainingChars != 1))
  505. self.parseEm = YES;
  506. if (parseStrong && (!children || remainingChars != 2))
  507. self.parseStrong = YES;
  508. if (!children)
  509. return nil;
  510. BOOL isEm = (numberOfChars == 1) || (numberOfChars == 3 && remainingChars != 1);
  511. NSUInteger startLocation = scanner.startLocation + remainingChars;
  512. MMElement *element = [MMElement new];
  513. element.type = isEm ? MMElementTypeEm : MMElementTypeStrong;
  514. element.range = NSMakeRange(startLocation, scanner.location-startLocation);
  515. element.children = children;
  516. if (numberOfChars == 3 && remainingChars == 0)
  517. {
  518. NSArray *outerChildren = @[ element ];
  519. element = [MMElement new];
  520. element.type = MMElementTypeStrong;
  521. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  522. element.children = outerChildren;
  523. }
  524. else if (remainingChars > 0)
  525. {
  526. NSMutableArray *outerChildren = [[self _parseWithScanner:scanner untilTestPasses:atEnd] mutableCopy];
  527. if (parseEm)
  528. self.parseEm = YES;
  529. if (parseStrong)
  530. self.parseStrong = YES;
  531. if (!outerChildren)
  532. return nil;
  533. [outerChildren insertObject:element atIndex:0];
  534. element = [MMElement new];
  535. element.type = !isEm ? MMElementTypeEm : MMElementTypeStrong;
  536. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  537. element.children = outerChildren;
  538. }
  539. return element;
  540. }
  541. - (MMElement *)_parseCodeSpanWithScanner:(MMScanner *)scanner
  542. {
  543. if (scanner.nextCharacter != '`')
  544. return nil;
  545. [scanner advance];
  546. MMElement *element = [MMElement new];
  547. element.type = MMElementTypeCodeSpan;
  548. // Check for more `s
  549. NSUInteger level = 1;
  550. while (scanner.nextCharacter == '`')
  551. {
  552. level++;
  553. [scanner advance];
  554. }
  555. // skip leading whitespace
  556. [scanner skipCharactersFromSet:NSCharacterSet.whitespaceCharacterSet];
  557. // Skip to the next '`'
  558. NSCharacterSet *boringChars = [[NSCharacterSet characterSetWithCharactersInString:@"`&<>"] invertedSet];
  559. NSUInteger textLocation = scanner.location;
  560. while (1)
  561. {
  562. if (scanner.atEndOfString)
  563. return nil;
  564. // Skip other characters
  565. [scanner skipCharactersFromSet:boringChars];
  566. // Add the code as text
  567. if (textLocation != scanner.location)
  568. {
  569. MMElement *text = [MMElement new];
  570. text.type = MMElementTypeNone;
  571. text.range = NSMakeRange(textLocation, scanner.location-textLocation);
  572. [element addChild:text];
  573. }
  574. // Check for closing `s
  575. if (scanner.nextCharacter == '`')
  576. {
  577. // Set the text location to catch the ` in case it isn't the closing `s
  578. textLocation = scanner.location;
  579. NSUInteger idx;
  580. for (idx=0; idx<level; idx++)
  581. {
  582. if (scanner.nextCharacter != '`')
  583. break;
  584. [scanner advance];
  585. }
  586. if (idx >= level)
  587. break;
  588. else
  589. continue;
  590. }
  591. unichar nextChar = scanner.nextCharacter;
  592. // Check for entities
  593. if (nextChar == '&')
  594. {
  595. MMElement *entity = [MMElement new];
  596. entity.type = MMElementTypeEntity;
  597. entity.range = NSMakeRange(scanner.location, 1);
  598. entity.stringValue = @"&amp;";
  599. [element addChild:entity];
  600. [scanner advance];
  601. }
  602. else if (nextChar == '<')
  603. {
  604. MMElement *entity = [MMElement new];
  605. entity.type = MMElementTypeEntity;
  606. entity.range = NSMakeRange(scanner.location, 1);
  607. entity.stringValue = @"&lt;";
  608. [element addChild:entity];
  609. [scanner advance];
  610. }
  611. else if (nextChar == '>')
  612. {
  613. MMElement *entity = [MMElement new];
  614. entity.type = MMElementTypeEntity;
  615. entity.range = NSMakeRange(scanner.location, 1);
  616. entity.stringValue = @"&gt;";
  617. [element addChild:entity];
  618. [scanner advance];
  619. }
  620. // Or did we hit the end of the line?
  621. else if (scanner.atEndOfLine)
  622. {
  623. textLocation = scanner.location;
  624. [scanner advanceToNextLine];
  625. continue;
  626. }
  627. textLocation = scanner.location;
  628. }
  629. // remove trailing whitespace
  630. if (element.children.count > 0)
  631. {
  632. MMElement *lastText = element.children.lastObject;
  633. unichar lastCharacter = [scanner.string characterAtIndex:NSMaxRange(lastText.range)-1];
  634. while ([NSCharacterSet.whitespaceCharacterSet characterIsMember:lastCharacter])
  635. {
  636. NSRange range = lastText.range;
  637. range.length -= 1;
  638. lastText.range = range;
  639. lastCharacter = [scanner.string characterAtIndex:NSMaxRange(lastText.range)-1];
  640. }
  641. }
  642. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  643. return element;
  644. }
  645. - (MMElement *)_parseLineBreakWithScanner:(MMScanner *)scanner
  646. {
  647. NSCharacterSet *spaces = [NSCharacterSet characterSetWithCharactersInString:@" "];
  648. if ([scanner skipCharactersFromSet:spaces] < 2)
  649. return nil;
  650. if (!scanner.atEndOfLine)
  651. return nil;
  652. // Don't ever add a line break to the last line
  653. if (scanner.atEndOfString)
  654. return nil;
  655. NSUInteger startLocation = scanner.startLocation + 1;
  656. MMElement *element = [MMElement new];
  657. element.type = MMElementTypeLineBreak;
  658. element.range = NSMakeRange(startLocation, scanner.location-startLocation);
  659. return element;
  660. }
  661. - (MMElement *)_parseAutomaticLinkWithScanner:(MMScanner *)scanner
  662. {
  663. // Leading <
  664. if (scanner.nextCharacter != '<')
  665. return nil;
  666. [scanner advance];
  667. NSUInteger textLocation = scanner.location;
  668. // Find the trailing >
  669. [scanner skipCharactersFromSet:[[NSCharacterSet characterSetWithCharactersInString:@">"] invertedSet]];
  670. if (scanner.atEndOfLine)
  671. return nil;
  672. [scanner advance];
  673. NSRange linkRange = NSMakeRange(textLocation, (scanner.location-1)-textLocation);
  674. NSString *linkText = [scanner.string substringWithRange:linkRange];
  675. // Make sure it looks like a link
  676. static NSRegularExpression *regex;
  677. static dispatch_once_t onceToken;
  678. dispatch_once(&onceToken, ^{
  679. regex = [NSRegularExpression regularExpressionWithPattern:@"^(\\w+)://" options:0 error:nil];
  680. });
  681. NSRange matchRange;
  682. matchRange = [regex rangeOfFirstMatchInString:linkText options:0 range:NSMakeRange(0, linkText.length)];
  683. if (matchRange.location == NSNotFound)
  684. return nil;
  685. NSURL *url = [NSURL URLWithString:linkText];
  686. if (!url)
  687. return nil;
  688. MMElement *element = [MMElement new];
  689. element.type = MMElementTypeLink;
  690. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  691. element.href = linkText;
  692. // Do the text the hard way to take care of ampersands
  693. NSRange textRange = NSMakeRange(textLocation, NSMaxRange(linkRange)-textLocation);
  694. NSCharacterSet *ampersands = [NSCharacterSet characterSetWithCharactersInString:@"&"];
  695. while (textRange.length > 0)
  696. {
  697. NSRange result = [scanner.string rangeOfCharacterFromSet:ampersands
  698. options:0
  699. range:textRange];
  700. if (result.location != NSNotFound)
  701. {
  702. if (textRange.location != result.location)
  703. {
  704. MMElement *text = [MMElement new];
  705. text.type = MMElementTypeNone;
  706. text.range = NSMakeRange(textRange.location, result.location-textRange.location);
  707. [element addChild:text];
  708. }
  709. MMElement *ampersand = [MMElement new];
  710. ampersand.type = MMElementTypeEntity;
  711. ampersand.range = NSMakeRange(textRange.location, 1);
  712. ampersand.stringValue = @"&amp;";
  713. [element addChild:ampersand];
  714. textRange = NSMakeRange(result.location+1, NSMaxRange(textRange)-(result.location+1));
  715. }
  716. else
  717. {
  718. if (textRange.length > 0)
  719. {
  720. MMElement *text = [MMElement new];
  721. text.type = MMElementTypeNone;
  722. text.range = textRange;
  723. [element addChild:text];
  724. }
  725. break;
  726. }
  727. }
  728. return element;
  729. }
  730. - (MMElement *)_parseAutomaticEmailLinkWithScanner:(MMScanner *)scanner
  731. {
  732. // Leading <
  733. if (scanner.nextCharacter != '<')
  734. return nil;
  735. [scanner advance];
  736. NSUInteger textLocation = scanner.location;
  737. // Find the trailing >
  738. [scanner skipCharactersFromSet:[[NSCharacterSet characterSetWithCharactersInString:@">"] invertedSet]];
  739. if (scanner.atEndOfLine)
  740. return nil;
  741. [scanner advance];
  742. NSRange linkRange = NSMakeRange(textLocation, (scanner.location-1)-textLocation);
  743. NSString *linkText = [scanner.string substringWithRange:linkRange];
  744. // Make sure it looks like a link
  745. static NSRegularExpression *regex;
  746. static dispatch_once_t onceToken;
  747. dispatch_once(&onceToken, ^{
  748. regex = [NSRegularExpression regularExpressionWithPattern:@"^[-._0-9\\p{L}]+@[-\\p{L}0-9][-.\\p{L}0-9]*\\.\\p{L}+$"
  749. options:NSRegularExpressionCaseInsensitive
  750. error:nil];
  751. });
  752. NSRange matchRange;
  753. matchRange = [regex rangeOfFirstMatchInString:linkText options:0 range:NSMakeRange(0, linkText.length)];
  754. if (matchRange.location == NSNotFound)
  755. return nil;
  756. MMElement *element = [MMElement new];
  757. element.type = MMElementTypeMailTo;
  758. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  759. element.href = linkText;
  760. return element;
  761. }
  762. - (NSArray *)_parseLinkTextBodyWithScanner:(MMScanner *)scanner
  763. {
  764. NSMutableArray *ranges = [NSMutableArray new];
  765. NSCharacterSet *boringChars;
  766. NSUInteger level;
  767. if (scanner.nextCharacter != '[')
  768. return nil;
  769. [scanner advance];
  770. boringChars = [[NSCharacterSet characterSetWithCharactersInString:@"[]\\"] invertedSet];
  771. level = 1;
  772. NSRange textRange = scanner.currentRange;
  773. while (level > 0)
  774. {
  775. if (scanner.atEndOfString)
  776. return nil;
  777. if (scanner.atEndOfLine)
  778. {
  779. if (textRange.length > 0)
  780. {
  781. [ranges addObject:[NSValue valueWithRange:textRange]];
  782. }
  783. [scanner advanceToNextLine];
  784. textRange = scanner.currentRange;
  785. }
  786. [scanner skipCharactersFromSet:boringChars];
  787. unichar character = scanner.nextCharacter;
  788. if (character == '[')
  789. {
  790. level += 1;
  791. }
  792. else if (character == ']')
  793. {
  794. level -= 1;
  795. }
  796. else if (character == '\\')
  797. {
  798. [scanner advance];
  799. }
  800. textRange.length = scanner.location - textRange.location;
  801. [scanner advance];
  802. }
  803. if (textRange.length > 0)
  804. {
  805. [ranges addObject:[NSValue valueWithRange:textRange]];
  806. }
  807. return ranges;
  808. }
  809. - (MMElement *)_parseInlineLinkWithScanner:(MMScanner *)scanner
  810. {
  811. NSCharacterSet *boringChars;
  812. NSUInteger level;
  813. MMElement *element = [MMElement new];
  814. element.type = MMElementTypeLink;
  815. // Find the []
  816. element.innerRanges = [self _parseLinkTextBodyWithScanner:scanner];
  817. if (!element.innerRanges)
  818. return nil;
  819. // Find the ()
  820. if (scanner.nextCharacter != '(')
  821. return nil;
  822. [scanner advance];
  823. [scanner skipWhitespace];
  824. NSUInteger urlLocation = scanner.location;
  825. NSUInteger urlEnd = urlLocation;
  826. boringChars = [[NSCharacterSet characterSetWithCharactersInString:@"()\\ \t"] invertedSet];
  827. level = 1;
  828. while (level > 0)
  829. {
  830. [scanner skipCharactersFromSet:boringChars];
  831. if (scanner.atEndOfLine)
  832. return nil;
  833. urlEnd = scanner.location;
  834. unichar character = scanner.nextCharacter;
  835. if (character == '(')
  836. {
  837. level += 1;
  838. }
  839. else if (character == ')')
  840. {
  841. level -= 1;
  842. }
  843. else if (character == '\\')
  844. {
  845. [scanner advance]; // skip over the backslash
  846. // skip over the next character below
  847. }
  848. else if ([NSCharacterSet.whitespaceCharacterSet characterIsMember:character])
  849. {
  850. if (level != 1)
  851. return nil;
  852. [scanner skipWhitespace];
  853. if (scanner.nextCharacter == ')')
  854. {
  855. [scanner advance];
  856. level -= 1;
  857. }
  858. break;
  859. }
  860. urlEnd = scanner.location;
  861. [scanner advance];
  862. }
  863. NSUInteger titleLocation = NSNotFound;
  864. NSUInteger titleEnd = NSNotFound;
  865. // If the level is still 1, then we hit a space.
  866. if (level == 1)
  867. {
  868. // make sure there's a "
  869. if (scanner.nextCharacter != '"')
  870. return nil;
  871. [scanner advance];
  872. titleLocation = scanner.location;
  873. boringChars = [[NSCharacterSet characterSetWithCharactersInString:@"\""] invertedSet];
  874. while (1)
  875. {
  876. [scanner skipCharactersFromSet:boringChars];
  877. if (scanner.atEndOfLine)
  878. return nil;
  879. [scanner advance];
  880. if (scanner.nextCharacter == ')')
  881. {
  882. titleEnd = scanner.location - 1;
  883. [scanner advance];
  884. break;
  885. }
  886. }
  887. }
  888. NSRange urlRange = NSMakeRange(urlLocation, urlEnd-urlLocation);
  889. NSString *href = [scanner.string substringWithRange:urlRange];
  890. // If the URL is surrounded by angle brackets, ditch them
  891. if ([href hasPrefix:@"<"] && [href hasSuffix:@">"])
  892. {
  893. href = [href substringWithRange:NSMakeRange(1, href.length-2)];
  894. }
  895. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  896. element.href = [self _stringWithBackslashEscapesRemoved:href];
  897. if (titleLocation != NSNotFound)
  898. {
  899. NSRange titleRange = NSMakeRange(titleLocation, titleEnd-titleLocation);
  900. element.title = [scanner.string substringWithRange:titleRange];
  901. }
  902. return element;
  903. }
  904. - (MMElement *)_parseReferenceLinkWithScanner:(MMScanner *)scanner
  905. {
  906. MMElement *element = [MMElement new];
  907. element.type = MMElementTypeLink;
  908. // Find the []
  909. element.innerRanges = [self _parseLinkTextBodyWithScanner:scanner];
  910. if (!element.innerRanges.count)
  911. return nil;
  912. // Skip optional whitespace
  913. if (scanner.nextCharacter == ' ')
  914. [scanner advance];
  915. // or possible newline
  916. else if (scanner.atEndOfLine)
  917. [scanner advanceToNextLine];
  918. // Look for the second []
  919. NSArray *idRanges = [self _parseLinkTextBodyWithScanner:scanner];
  920. if (!idRanges)
  921. return nil;
  922. if (!idRanges.count)
  923. {
  924. idRanges = element.innerRanges;
  925. }
  926. NSMutableString *idString = [NSMutableString new];
  927. for (NSValue *value in idRanges)
  928. {
  929. NSRange range = [value rangeValue];
  930. [idString appendString:[scanner.string substringWithRange:range]];
  931. [idString appendString:@" "]; // newlines are replaced by spaces for the id
  932. }
  933. // Delete the last space
  934. [idString deleteCharactersInRange:NSMakeRange(idString.length-1, 1)];
  935. element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation);
  936. element.identifier = idString;
  937. return element;
  938. }
  939. - (MMElement *)_parseLinkWithScanner:(MMScanner *)scanner
  940. {
  941. MMElement *element;
  942. element = [self _parseInlineLinkWithScanner:scanner];
  943. if (element == nil)
  944. {
  945. // Assume that this method will already be wrapped in a transaction
  946. [scanner commitTransaction:NO];
  947. [scanner beginTransaction];
  948. element = [self _parseReferenceLinkWithScanner:scanner];
  949. }
  950. if (element != nil && element.innerRanges.count > 0)
  951. {
  952. self.parseLinks = NO;
  953. MMScanner *innerScanner = [MMScanner scannerWithString:scanner.string lineRanges:element.innerRanges];
  954. element.children = [self _parseWithScanner:innerScanner untilTestPasses:^{ return [innerScanner atEndOfString]; }];
  955. self.parseLinks = YES;
  956. }
  957. return element;
  958. }
  959. - (MMElement *)_parseImageWithScanner:(MMScanner *)scanner
  960. {
  961. MMElement *element;
  962. // An image starts with a !, but then is a link
  963. if (scanner.nextCharacter != '!')
  964. return nil;
  965. [scanner advance];
  966. // Add a transaction to protect the ! that was scanned
  967. [scanner beginTransaction];
  968. self.parseImages = NO;
  969. element = [self _parseInlineLinkWithScanner:scanner];
  970. self.parseImages = YES;
  971. if (element == nil)
  972. {
  973. // Assume that this method will already be wrapped in a transaction
  974. [scanner commitTransaction:NO];
  975. [scanner beginTransaction];
  976. element = [self _parseReferenceLinkWithScanner:scanner];
  977. }
  978. [scanner commitTransaction:YES];
  979. if (element != nil)
  980. {
  981. element.type = MMElementTypeImage;
  982. // Adjust the range to include the !
  983. NSRange range = element.range;
  984. range.location -= 1;
  985. range.length += 1;
  986. element.range = range;
  987. NSMutableString *altText = [NSMutableString new];
  988. for (NSValue *value in element.innerRanges)
  989. {
  990. NSRange range = [value rangeValue];
  991. [altText appendString:[scanner.string substringWithRange:range]];
  992. }
  993. element.stringValue = altText;
  994. }
  995. return element;
  996. }
  997. - (MMElement *)_parseAmpersandWithScanner:(MMScanner *)scanner
  998. {
  999. if (scanner.nextCharacter != '&')
  1000. return nil;
  1001. [scanner advance];
  1002. // check if this is an html entity
  1003. [scanner beginTransaction];
  1004. if (scanner.nextCharacter == '#')
  1005. [scanner advance];
  1006. [scanner skipCharactersFromSet:NSCharacterSet.alphanumericCharacterSet];
  1007. if (scanner.nextCharacter == ';')
  1008. {
  1009. [scanner commitTransaction:NO];
  1010. return nil;
  1011. }
  1012. [scanner commitTransaction:NO];
  1013. MMElement *element = [MMElement new];
  1014. element.type = MMElementTypeEntity;
  1015. element.range = NSMakeRange(scanner.location-1, 1);
  1016. element.stringValue = @"&amp;";
  1017. return element;
  1018. }
  1019. - (MMElement *)_parseBackslashWithScanner:(MMScanner *)scanner
  1020. {
  1021. if (scanner.nextCharacter != '\\')
  1022. return nil;
  1023. [scanner advance];
  1024. NSCharacterSet *escapable = [NSCharacterSet characterSetWithCharactersInString:ESCAPABLE_CHARS];
  1025. if (![escapable characterIsMember:scanner.nextCharacter])
  1026. return nil;
  1027. // Return the character
  1028. MMElement *character = [MMElement new];
  1029. character.type = MMElementTypeEntity;
  1030. character.range = NSMakeRange(scanner.location-1, 2);
  1031. character.stringValue = [scanner.string substringWithRange:NSMakeRange(scanner.location, 1)];
  1032. [scanner advance];
  1033. return character;
  1034. }
  1035. - (MMElement *)_parseLeftAngleBracketWithScanner:(MMScanner *)scanner
  1036. {
  1037. if (scanner.nextCharacter != '<')
  1038. return nil;
  1039. [scanner advance];
  1040. MMElement *element = [MMElement new];
  1041. element.type = MMElementTypeEntity;
  1042. element.range = NSMakeRange(scanner.location-1, 1);
  1043. element.stringValue = @"&lt;";
  1044. return element;
  1045. }
  1046. - (NSString *)_stringWithBackslashEscapesRemoved:(NSString *)string
  1047. {
  1048. NSMutableString *result = [string mutableCopy];
  1049. NSCharacterSet *escapableChars = [NSCharacterSet characterSetWithCharactersInString:ESCAPABLE_CHARS];
  1050. NSRange searchRange = NSMakeRange(0, result.length);
  1051. while (searchRange.length > 0)
  1052. {
  1053. NSRange range = [result rangeOfString:@"\\" options:0 range:searchRange];
  1054. if (range.location == NSNotFound || NSMaxRange(range) == NSMaxRange(searchRange))
  1055. break;
  1056. // If it is escapable, than remove the backslash
  1057. unichar nextChar = [result characterAtIndex:range.location + 1];
  1058. if ([escapableChars characterIsMember:nextChar])
  1059. {
  1060. [result replaceCharactersInRange:range withString:@""];
  1061. }
  1062. searchRange.location = range.location + 1;
  1063. searchRange.length = result.length - searchRange.location;
  1064. }
  1065. return result;
  1066. }
  1067. @end