// // MMSpanParser.m // MMMarkdown // // Copyright (c) 2012 Matt Diephouse. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // #import "MMSpanParser.h" #import "MMElement.h" #import "MMHTMLParser.h" #import "MMScanner.h" static NSString * const ESCAPABLE_CHARS = @"\\`*_{}[]()#+-.!>"; @interface MMSpanParser () @property (assign, nonatomic, readonly) MMMarkdownExtensions extensions; @property (strong, nonatomic, readonly) MMHTMLParser *htmlParser; @property (strong, nonatomic) NSMutableArray *elements; @property (strong, nonatomic) NSMutableArray *openElements; @property (strong, nonatomic) MMElement *blockElement; @property (assign, nonatomic) BOOL parseEm; @property (assign, nonatomic) BOOL parseImages; @property (assign, nonatomic) BOOL parseLinks; @property (assign, nonatomic) BOOL parseStrong; @end @implementation MMSpanParser #pragma mark - Public Methods - (id)initWithExtensions:(MMMarkdownExtensions)extensions { self = [super init]; if (self) { _extensions = extensions; _htmlParser = [MMHTMLParser new]; self.parseEm = YES; self.parseImages = YES; self.parseLinks = YES; self.parseStrong = YES; } return self; } - (NSArray *)parseSpansInBlockElement:(MMElement *)block withScanner:(MMScanner *)scanner { self.blockElement = block; [scanner skipWhitespace]; return [self _parseWithScanner:scanner untilTestPasses:^{ return scanner.atEndOfString; }]; } - (NSArray *)parseSpansInTableColumns:(NSArray *)columns withScanner:(MMScanner *)scanner { NSMutableArray *cells = [NSMutableArray new]; for (NSNumber *alignment in columns) { [scanner skipWhitespace]; NSUInteger startLocation = scanner.location; NSArray *spans = scanner.nextCharacter == '|' ? @[] : [self _parseWithScanner:scanner untilTestPasses:^ BOOL { [scanner skipWhitespace]; return scanner.nextCharacter == '|' || scanner.atEndOfLine; }]; if (!spans) return nil; MMElement *cell = [MMElement new]; cell.type = MMElementTypeTableRowCell; cell.children = spans; cell.range = NSMakeRange(startLocation, scanner.location-startLocation); cell.alignment = alignment.integerValue; [cells addObject:cell]; if (scanner.nextCharacter == '|') [scanner advance]; } return cells; } #pragma mark - Private Methods - (NSArray *)_parseWithScanner:(MMScanner *)scanner untilTestPasses:(BOOL (^)(void))test { NSMutableArray *result = [NSMutableArray array]; NSCharacterSet *specialChars = [NSCharacterSet characterSetWithCharactersInString:@"\\`*_<&[! ~w:@|"]; NSCharacterSet *boringChars = [specialChars invertedSet]; [scanner beginTransaction]; while (!scanner.atEndOfString) { MMElement *element = [self _parseNextElementWithScanner:scanner]; if (element) { if (scanner.startLocation != element.range.location) { MMElement *text = [MMElement new]; text.type = MMElementTypeNone; text.range = NSMakeRange(scanner.startLocation, element.range.location-scanner.startLocation); [result addObject:text]; } [result addObject:element]; [scanner commitTransaction:YES]; [scanner beginTransaction]; } else if (scanner.atEndOfLine) { // This is done here (and not in _parseNextElementWithScanner:) // because it can result in 2 elements. if (scanner.startLocation != scanner.location) { MMElement *text = [MMElement new]; text.type = MMElementTypeNone; text.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); [result addObject:text]; } if (self.extensions & MMMarkdownExtensionsHardNewlines && self.blockElement.type == MMElementTypeParagraph) { MMElement *lineBreak = [MMElement new]; lineBreak.range = NSMakeRange(scanner.location, 1); lineBreak.type = MMElementTypeLineBreak; [result addObject:lineBreak]; } // Add a newline MMElement *newline = [MMElement new]; newline.range = NSMakeRange(scanner.location, 1); newline.type = MMElementTypeEntity; newline.stringValue = @"\n"; [result addObject:newline]; [scanner advanceToNextLine]; [scanner commitTransaction:YES]; [scanner beginTransaction]; } else if ([scanner skipCharactersFromSet:boringChars]) { } else { [scanner advance]; } // Check for the end character [scanner beginTransaction]; NSUInteger location = scanner.location; if (test()) { [scanner commitTransaction:YES]; if (scanner.startLocation != location) { MMElement *text = [MMElement new]; text.type = MMElementTypeNone; text.range = NSMakeRange(scanner.startLocation, location-scanner.startLocation); [result addObject:text]; } [scanner commitTransaction:YES]; return result; } [scanner commitTransaction:NO]; } [scanner commitTransaction:NO]; return nil; } - (MMElement *)_parseNextElementWithScanner:(MMScanner *)scanner { MMElement *element; if (self.extensions & MMMarkdownExtensionsStrikethroughs) { [scanner beginTransaction]; element = [self _parseStrikethroughWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; } // URL Autolinking if (self.parseLinks && self.extensions & MMMarkdownExtensionsAutolinkedURLs) { [scanner beginTransaction]; element = [self _parseAutolinkEmailAddressWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self _parseAutolinkURLWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self _parseAutolinkWWWURLWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; } [scanner beginTransaction]; element = [self _parseBackslashWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self _parseEmAndStrongWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self _parseCodeSpanWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self _parseLineBreakWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; if (self.parseLinks) { [scanner beginTransaction]; element = [self _parseAutomaticLinkWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self _parseAutomaticEmailLinkWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self _parseLinkWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; } if (self.parseImages) { [scanner beginTransaction]; element = [self _parseImageWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; } [scanner beginTransaction]; element = [self.htmlParser parseInlineTagWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self.htmlParser parseCommentWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self _parseAmpersandWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; [scanner beginTransaction]; element = [self _parseLeftAngleBracketWithScanner:scanner]; [scanner commitTransaction:element != nil]; if (element) return element; return nil; } - (BOOL)_parseAutolinkDomainWithScanner:(MMScanner *)scanner { NSCharacterSet *alphanumerics = NSCharacterSet.alphanumericCharacterSet; NSMutableCharacterSet *domainChars = [alphanumerics mutableCopy]; [domainChars addCharactersInString:@"-:"]; // Domain should be at least one alphanumeric if (![alphanumerics characterIsMember:scanner.nextCharacter]) return NO; [scanner skipCharactersFromSet:domainChars]; // Dot between domain and TLD if (scanner.nextCharacter != '.') return NO; [scanner advance]; // TLD must be at least 1 character if ([scanner skipCharactersFromSet:domainChars] == 0) return NO; return YES; } - (void)_parseAutolinkPathWithScanner:(MMScanner *)scanner { NSCharacterSet *alphanumerics = NSCharacterSet.alphanumericCharacterSet; NSMutableCharacterSet *boringChars = [alphanumerics mutableCopy]; [boringChars addCharactersInString:@",_-/:?&;%~!#+=@"]; NSUInteger parenLevel = 0; while (1) { if ([scanner skipCharactersFromSet:boringChars] > 0) { continue; } else if (scanner.nextCharacter == '\\') { [scanner advance]; if (scanner.nextCharacter == '(' || scanner.nextCharacter == ')') [scanner advance]; } else if (scanner.nextCharacter == '(') { parenLevel++; [scanner advance]; } else if (scanner.nextCharacter == ')' && parenLevel > 0) { parenLevel--; [scanner advance]; } else if (scanner.nextCharacter == '.') { // Can't end on a '.' [scanner beginTransaction]; [scanner advance]; if ([boringChars characterIsMember:scanner.nextCharacter]) { [scanner commitTransaction:YES]; } else { [scanner commitTransaction:NO]; break; } } else { break; } } } - (MMElement *)_parseAutolinkEmailAddressWithScanner:(MMScanner *)scanner { if (scanner.nextCharacter != '@') return nil; NSCharacterSet *alphanumerics = NSCharacterSet.alphanumericCharacterSet; NSMutableCharacterSet *localChars = [alphanumerics mutableCopy]; [localChars addCharactersInString:@"._-+"]; NSMutableCharacterSet *domainChars = [alphanumerics mutableCopy]; [domainChars addCharactersInString:@"._-"]; // Look for the previous word outside of the current transaction [scanner commitTransaction:NO]; NSString *localPart = [scanner previousWordWithCharactersFromSet:localChars]; [scanner beginTransaction]; if (localPart.length == 0) return nil; // '@' [scanner advance]; NSString *domainPart = [scanner nextWordWithCharactersFromSet:localChars]; // Must end on a letter or number NSRange lastAlphanum = [domainPart rangeOfCharacterFromSet:alphanumerics options:NSBackwardsSearch]; if (lastAlphanum.location == NSNotFound) return nil; domainPart = [domainPart substringToIndex:NSMaxRange(lastAlphanum)]; // Must contain at least one . if ([domainPart rangeOfString:@"."].location == NSNotFound) return nil; scanner.location += domainPart.length; NSUInteger startLocation = scanner.startLocation - localPart.length; NSRange range = NSMakeRange(startLocation, scanner.location-startLocation); MMElement *element = [MMElement new]; element.type = MMElementTypeMailTo; element.range = range; element.href = [scanner.string substringWithRange:range]; return element; } - (MMElement *)_parseAutolinkURLWithScanner:(MMScanner *)scanner { if (scanner.nextCharacter != ':') return nil; NSArray *protocols = @[ @"https", @"http", @"ftp" ]; // Look for the previous word outside of the current transaction [scanner commitTransaction:NO]; NSString *previousWord = scanner.previousWord; [scanner beginTransaction]; if (![protocols containsObject:previousWord.lowercaseString]) return nil; if (![scanner matchString:@"://"]) return nil; if (![self _parseAutolinkDomainWithScanner:scanner]) return nil; [self _parseAutolinkPathWithScanner:scanner]; NSUInteger startLocation = scanner.startLocation - previousWord.length; NSRange range = NSMakeRange(startLocation, scanner.location-startLocation); MMElement *element = [MMElement new]; element.type = MMElementTypeLink; element.range = range; element.href = [scanner.string substringWithRange:range]; MMElement *text = [MMElement new]; text.type = MMElementTypeNone; text.range = range; [element addChild:text]; return element; } - (MMElement *)_parseAutolinkWWWURLWithScanner:(MMScanner *)scanner { if (![scanner matchString:@"www."]) return nil; if (![self _parseAutolinkDomainWithScanner:scanner]) return nil; [self _parseAutolinkPathWithScanner:scanner]; NSRange range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); NSString *link = [scanner.string substringWithRange:range]; MMElement *element = [MMElement new]; element.type = MMElementTypeLink; element.range = range; element.href = [@"http://" stringByAppendingString:link]; MMElement *text = [MMElement new]; text.type = MMElementTypeNone; text.range = range; [element addChild:text]; return element; } - (MMElement *)_parseStrikethroughWithScanner:(MMScanner *)scanner { if (![scanner matchString:@"~~"]) return nil; NSCharacterSet *whitespaceSet = NSCharacterSet.whitespaceCharacterSet; NSArray *children = [self _parseWithScanner:scanner untilTestPasses:^{ // Can't be at the beginning of the line if (scanner.atBeginningOfLine) return NO; // Must follow the end of a word if ([whitespaceSet characterIsMember:scanner.previousCharacter]) return NO; if (![scanner matchString:@"~~"]) return NO; return YES; }]; if (!children) return nil; MMElement *element = [MMElement new]; element.type = MMElementTypeStrikethrough; element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); element.children = children; return element; } - (MMElement *)_parseEmAndStrongWithScanner:(MMScanner *)scanner { // Must have 1-3 *s or _s unichar character = scanner.nextCharacter; if (!(character == '*' || character == '_')) return nil; NSCharacterSet *alphanumericSet = NSCharacterSet.alphanumericCharacterSet; if (self.extensions & MMMarkdownExtensionsUnderscoresInWords && character == '_') { // GFM doesn't italicize parts of words [scanner commitTransaction:NO]; // Look for the previous char outside of the current transaction unichar prevChar = scanner.previousCharacter; [scanner beginTransaction]; BOOL isWordChar = [alphanumericSet characterIsMember:prevChar]; if (isWordChar) return nil; } // Must not be preceded by one of the same if (scanner.previousCharacter == character) return nil; NSUInteger numberOfChars = 0; while (scanner.nextCharacter == character) { numberOfChars++; [scanner advance]; } if (numberOfChars > 3) return nil; BOOL parseEm = numberOfChars == 1 || numberOfChars == 3; BOOL parseStrong = numberOfChars == 2 || numberOfChars == 3; if ((parseEm && !self.parseEm) || (parseStrong && !self.parseStrong)) return nil; NSCharacterSet *whitespaceSet = NSCharacterSet.whitespaceCharacterSet; __block NSUInteger remainingChars = numberOfChars; BOOL (^atEnd)(void) = ^{ // Can't be at the beginning of the line if (scanner.atBeginningOfLine) return NO; // Must follow the end of a word if ([whitespaceSet characterIsMember:scanner.previousCharacter]) return NO; // Must have 1-3 *s or _s NSUInteger numberOfEndChars = 0; while (scanner.nextCharacter == character && numberOfEndChars < remainingChars) { numberOfEndChars++; [scanner advance]; } if (numberOfEndChars == 0 || (numberOfEndChars != remainingChars && remainingChars != 3)) return NO; if (self.extensions & MMMarkdownExtensionsUnderscoresInWords && character == '_') { // GFM doesn't italicize parts of words unichar nextChar = scanner.nextCharacter; BOOL isWordChar = [alphanumericSet characterIsMember:nextChar]; if (isWordChar) return NO; } remainingChars -= numberOfEndChars; return YES; }; if (parseEm) self.parseEm = NO; if (parseStrong) self.parseStrong = NO; NSArray *children = [self _parseWithScanner:scanner untilTestPasses:atEnd]; if (parseEm && (!children || remainingChars != 1)) self.parseEm = YES; if (parseStrong && (!children || remainingChars != 2)) self.parseStrong = YES; if (!children) return nil; BOOL isEm = (numberOfChars == 1) || (numberOfChars == 3 && remainingChars != 1); NSUInteger startLocation = scanner.startLocation + remainingChars; MMElement *element = [MMElement new]; element.type = isEm ? MMElementTypeEm : MMElementTypeStrong; element.range = NSMakeRange(startLocation, scanner.location-startLocation); element.children = children; if (numberOfChars == 3 && remainingChars == 0) { NSArray *outerChildren = @[ element ]; element = [MMElement new]; element.type = MMElementTypeStrong; element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); element.children = outerChildren; } else if (remainingChars > 0) { NSMutableArray *outerChildren = [[self _parseWithScanner:scanner untilTestPasses:atEnd] mutableCopy]; if (parseEm) self.parseEm = YES; if (parseStrong) self.parseStrong = YES; if (!outerChildren) return nil; [outerChildren insertObject:element atIndex:0]; element = [MMElement new]; element.type = !isEm ? MMElementTypeEm : MMElementTypeStrong; element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); element.children = outerChildren; } return element; } - (MMElement *)_parseCodeSpanWithScanner:(MMScanner *)scanner { if (scanner.nextCharacter != '`') return nil; [scanner advance]; MMElement *element = [MMElement new]; element.type = MMElementTypeCodeSpan; // Check for more `s NSUInteger level = 1; while (scanner.nextCharacter == '`') { level++; [scanner advance]; } // skip leading whitespace [scanner skipCharactersFromSet:NSCharacterSet.whitespaceCharacterSet]; // Skip to the next '`' NSCharacterSet *boringChars = [[NSCharacterSet characterSetWithCharactersInString:@"`&<>"] invertedSet]; NSUInteger textLocation = scanner.location; while (1) { if (scanner.atEndOfString) return nil; // Skip other characters [scanner skipCharactersFromSet:boringChars]; // Add the code as text if (textLocation != scanner.location) { MMElement *text = [MMElement new]; text.type = MMElementTypeNone; text.range = NSMakeRange(textLocation, scanner.location-textLocation); [element addChild:text]; } // Check for closing `s if (scanner.nextCharacter == '`') { // Set the text location to catch the ` in case it isn't the closing `s textLocation = scanner.location; NSUInteger idx; for (idx=0; idx= level) break; else continue; } unichar nextChar = scanner.nextCharacter; // Check for entities if (nextChar == '&') { MMElement *entity = [MMElement new]; entity.type = MMElementTypeEntity; entity.range = NSMakeRange(scanner.location, 1); entity.stringValue = @"&"; [element addChild:entity]; [scanner advance]; } else if (nextChar == '<') { MMElement *entity = [MMElement new]; entity.type = MMElementTypeEntity; entity.range = NSMakeRange(scanner.location, 1); entity.stringValue = @"<"; [element addChild:entity]; [scanner advance]; } else if (nextChar == '>') { MMElement *entity = [MMElement new]; entity.type = MMElementTypeEntity; entity.range = NSMakeRange(scanner.location, 1); entity.stringValue = @">"; [element addChild:entity]; [scanner advance]; } // Or did we hit the end of the line? else if (scanner.atEndOfLine) { textLocation = scanner.location; [scanner advanceToNextLine]; continue; } textLocation = scanner.location; } // remove trailing whitespace if (element.children.count > 0) { MMElement *lastText = element.children.lastObject; unichar lastCharacter = [scanner.string characterAtIndex:NSMaxRange(lastText.range)-1]; while ([NSCharacterSet.whitespaceCharacterSet characterIsMember:lastCharacter]) { NSRange range = lastText.range; range.length -= 1; lastText.range = range; lastCharacter = [scanner.string characterAtIndex:NSMaxRange(lastText.range)-1]; } } element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); return element; } - (MMElement *)_parseLineBreakWithScanner:(MMScanner *)scanner { NSCharacterSet *spaces = [NSCharacterSet characterSetWithCharactersInString:@" "]; if ([scanner skipCharactersFromSet:spaces] < 2) return nil; if (!scanner.atEndOfLine) return nil; // Don't ever add a line break to the last line if (scanner.atEndOfString) return nil; NSUInteger startLocation = scanner.startLocation + 1; MMElement *element = [MMElement new]; element.type = MMElementTypeLineBreak; element.range = NSMakeRange(startLocation, scanner.location-startLocation); return element; } - (MMElement *)_parseAutomaticLinkWithScanner:(MMScanner *)scanner { // Leading < if (scanner.nextCharacter != '<') return nil; [scanner advance]; NSUInteger textLocation = scanner.location; // Find the trailing > [scanner skipCharactersFromSet:[[NSCharacterSet characterSetWithCharactersInString:@">"] invertedSet]]; if (scanner.atEndOfLine) return nil; [scanner advance]; NSRange linkRange = NSMakeRange(textLocation, (scanner.location-1)-textLocation); NSString *linkText = [scanner.string substringWithRange:linkRange]; // Make sure it looks like a link static NSRegularExpression *regex; static dispatch_once_t onceToken; dispatch_once(&onceToken, ^{ regex = [NSRegularExpression regularExpressionWithPattern:@"^(\\w+)://" options:0 error:nil]; }); NSRange matchRange; matchRange = [regex rangeOfFirstMatchInString:linkText options:0 range:NSMakeRange(0, linkText.length)]; if (matchRange.location == NSNotFound) return nil; NSURL *url = [NSURL URLWithString:linkText]; if (!url) return nil; MMElement *element = [MMElement new]; element.type = MMElementTypeLink; element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); element.href = linkText; // Do the text the hard way to take care of ampersands NSRange textRange = NSMakeRange(textLocation, NSMaxRange(linkRange)-textLocation); NSCharacterSet *ampersands = [NSCharacterSet characterSetWithCharactersInString:@"&"]; while (textRange.length > 0) { NSRange result = [scanner.string rangeOfCharacterFromSet:ampersands options:0 range:textRange]; if (result.location != NSNotFound) { if (textRange.location != result.location) { MMElement *text = [MMElement new]; text.type = MMElementTypeNone; text.range = NSMakeRange(textRange.location, result.location-textRange.location); [element addChild:text]; } MMElement *ampersand = [MMElement new]; ampersand.type = MMElementTypeEntity; ampersand.range = NSMakeRange(textRange.location, 1); ampersand.stringValue = @"&"; [element addChild:ampersand]; textRange = NSMakeRange(result.location+1, NSMaxRange(textRange)-(result.location+1)); } else { if (textRange.length > 0) { MMElement *text = [MMElement new]; text.type = MMElementTypeNone; text.range = textRange; [element addChild:text]; } break; } } return element; } - (MMElement *)_parseAutomaticEmailLinkWithScanner:(MMScanner *)scanner { // Leading < if (scanner.nextCharacter != '<') return nil; [scanner advance]; NSUInteger textLocation = scanner.location; // Find the trailing > [scanner skipCharactersFromSet:[[NSCharacterSet characterSetWithCharactersInString:@">"] invertedSet]]; if (scanner.atEndOfLine) return nil; [scanner advance]; NSRange linkRange = NSMakeRange(textLocation, (scanner.location-1)-textLocation); NSString *linkText = [scanner.string substringWithRange:linkRange]; // Make sure it looks like a link static NSRegularExpression *regex; static dispatch_once_t onceToken; dispatch_once(&onceToken, ^{ regex = [NSRegularExpression regularExpressionWithPattern:@"^[-._0-9\\p{L}]+@[-\\p{L}0-9][-.\\p{L}0-9]*\\.\\p{L}+$" options:NSRegularExpressionCaseInsensitive error:nil]; }); NSRange matchRange; matchRange = [regex rangeOfFirstMatchInString:linkText options:0 range:NSMakeRange(0, linkText.length)]; if (matchRange.location == NSNotFound) return nil; MMElement *element = [MMElement new]; element.type = MMElementTypeMailTo; element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); element.href = linkText; return element; } - (NSArray *)_parseLinkTextBodyWithScanner:(MMScanner *)scanner { NSMutableArray *ranges = [NSMutableArray new]; NSCharacterSet *boringChars; NSUInteger level; if (scanner.nextCharacter != '[') return nil; [scanner advance]; boringChars = [[NSCharacterSet characterSetWithCharactersInString:@"[]\\"] invertedSet]; level = 1; NSRange textRange = scanner.currentRange; while (level > 0) { if (scanner.atEndOfString) return nil; if (scanner.atEndOfLine) { if (textRange.length > 0) { [ranges addObject:[NSValue valueWithRange:textRange]]; } [scanner advanceToNextLine]; textRange = scanner.currentRange; } [scanner skipCharactersFromSet:boringChars]; unichar character = scanner.nextCharacter; if (character == '[') { level += 1; } else if (character == ']') { level -= 1; } else if (character == '\\') { [scanner advance]; } textRange.length = scanner.location - textRange.location; [scanner advance]; } if (textRange.length > 0) { [ranges addObject:[NSValue valueWithRange:textRange]]; } return ranges; } - (MMElement *)_parseInlineLinkWithScanner:(MMScanner *)scanner { NSCharacterSet *boringChars; NSUInteger level; MMElement *element = [MMElement new]; element.type = MMElementTypeLink; // Find the [] element.innerRanges = [self _parseLinkTextBodyWithScanner:scanner]; if (!element.innerRanges) return nil; // Find the () if (scanner.nextCharacter != '(') return nil; [scanner advance]; [scanner skipWhitespace]; NSUInteger urlLocation = scanner.location; NSUInteger urlEnd = urlLocation; boringChars = [[NSCharacterSet characterSetWithCharactersInString:@"()\\ \t"] invertedSet]; level = 1; while (level > 0) { [scanner skipCharactersFromSet:boringChars]; if (scanner.atEndOfLine) return nil; urlEnd = scanner.location; unichar character = scanner.nextCharacter; if (character == '(') { level += 1; } else if (character == ')') { level -= 1; } else if (character == '\\') { [scanner advance]; // skip over the backslash // skip over the next character below } else if ([NSCharacterSet.whitespaceCharacterSet characterIsMember:character]) { if (level != 1) return nil; [scanner skipWhitespace]; if (scanner.nextCharacter == ')') { [scanner advance]; level -= 1; } break; } urlEnd = scanner.location; [scanner advance]; } NSUInteger titleLocation = NSNotFound; NSUInteger titleEnd = NSNotFound; // If the level is still 1, then we hit a space. if (level == 1) { // make sure there's a " if (scanner.nextCharacter != '"') return nil; [scanner advance]; titleLocation = scanner.location; boringChars = [[NSCharacterSet characterSetWithCharactersInString:@"\""] invertedSet]; while (1) { [scanner skipCharactersFromSet:boringChars]; if (scanner.atEndOfLine) return nil; [scanner advance]; if (scanner.nextCharacter == ')') { titleEnd = scanner.location - 1; [scanner advance]; break; } } } NSRange urlRange = NSMakeRange(urlLocation, urlEnd-urlLocation); NSString *href = [scanner.string substringWithRange:urlRange]; // If the URL is surrounded by angle brackets, ditch them if ([href hasPrefix:@"<"] && [href hasSuffix:@">"]) { href = [href substringWithRange:NSMakeRange(1, href.length-2)]; } element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); element.href = [self _stringWithBackslashEscapesRemoved:href]; if (titleLocation != NSNotFound) { NSRange titleRange = NSMakeRange(titleLocation, titleEnd-titleLocation); element.title = [scanner.string substringWithRange:titleRange]; } return element; } - (MMElement *)_parseReferenceLinkWithScanner:(MMScanner *)scanner { MMElement *element = [MMElement new]; element.type = MMElementTypeLink; // Find the [] element.innerRanges = [self _parseLinkTextBodyWithScanner:scanner]; if (!element.innerRanges.count) return nil; // Skip optional whitespace if (scanner.nextCharacter == ' ') [scanner advance]; // or possible newline else if (scanner.atEndOfLine) [scanner advanceToNextLine]; // Look for the second [] NSArray *idRanges = [self _parseLinkTextBodyWithScanner:scanner]; if (!idRanges) return nil; if (!idRanges.count) { idRanges = element.innerRanges; } NSMutableString *idString = [NSMutableString new]; for (NSValue *value in idRanges) { NSRange range = [value rangeValue]; [idString appendString:[scanner.string substringWithRange:range]]; [idString appendString:@" "]; // newlines are replaced by spaces for the id } // Delete the last space [idString deleteCharactersInRange:NSMakeRange(idString.length-1, 1)]; element.range = NSMakeRange(scanner.startLocation, scanner.location-scanner.startLocation); element.identifier = idString; return element; } - (MMElement *)_parseLinkWithScanner:(MMScanner *)scanner { MMElement *element; element = [self _parseInlineLinkWithScanner:scanner]; if (element == nil) { // Assume that this method will already be wrapped in a transaction [scanner commitTransaction:NO]; [scanner beginTransaction]; element = [self _parseReferenceLinkWithScanner:scanner]; } if (element != nil && element.innerRanges.count > 0) { self.parseLinks = NO; MMScanner *innerScanner = [MMScanner scannerWithString:scanner.string lineRanges:element.innerRanges]; element.children = [self _parseWithScanner:innerScanner untilTestPasses:^{ return [innerScanner atEndOfString]; }]; self.parseLinks = YES; } return element; } - (MMElement *)_parseImageWithScanner:(MMScanner *)scanner { MMElement *element; // An image starts with a !, but then is a link if (scanner.nextCharacter != '!') return nil; [scanner advance]; // Add a transaction to protect the ! that was scanned [scanner beginTransaction]; self.parseImages = NO; element = [self _parseInlineLinkWithScanner:scanner]; self.parseImages = YES; if (element == nil) { // Assume that this method will already be wrapped in a transaction [scanner commitTransaction:NO]; [scanner beginTransaction]; element = [self _parseReferenceLinkWithScanner:scanner]; } [scanner commitTransaction:YES]; if (element != nil) { element.type = MMElementTypeImage; // Adjust the range to include the ! NSRange range = element.range; range.location -= 1; range.length += 1; element.range = range; NSMutableString *altText = [NSMutableString new]; for (NSValue *value in element.innerRanges) { NSRange range = [value rangeValue]; [altText appendString:[scanner.string substringWithRange:range]]; } element.stringValue = altText; } return element; } - (MMElement *)_parseAmpersandWithScanner:(MMScanner *)scanner { if (scanner.nextCharacter != '&') return nil; [scanner advance]; // check if this is an html entity [scanner beginTransaction]; if (scanner.nextCharacter == '#') [scanner advance]; [scanner skipCharactersFromSet:NSCharacterSet.alphanumericCharacterSet]; if (scanner.nextCharacter == ';') { [scanner commitTransaction:NO]; return nil; } [scanner commitTransaction:NO]; MMElement *element = [MMElement new]; element.type = MMElementTypeEntity; element.range = NSMakeRange(scanner.location-1, 1); element.stringValue = @"&"; return element; } - (MMElement *)_parseBackslashWithScanner:(MMScanner *)scanner { if (scanner.nextCharacter != '\\') return nil; [scanner advance]; NSCharacterSet *escapable = [NSCharacterSet characterSetWithCharactersInString:ESCAPABLE_CHARS]; if (![escapable characterIsMember:scanner.nextCharacter]) return nil; // Return the character MMElement *character = [MMElement new]; character.type = MMElementTypeEntity; character.range = NSMakeRange(scanner.location-1, 2); character.stringValue = [scanner.string substringWithRange:NSMakeRange(scanner.location, 1)]; [scanner advance]; return character; } - (MMElement *)_parseLeftAngleBracketWithScanner:(MMScanner *)scanner { if (scanner.nextCharacter != '<') return nil; [scanner advance]; MMElement *element = [MMElement new]; element.type = MMElementTypeEntity; element.range = NSMakeRange(scanner.location-1, 1); element.stringValue = @"<"; return element; } - (NSString *)_stringWithBackslashEscapesRemoved:(NSString *)string { NSMutableString *result = [string mutableCopy]; NSCharacterSet *escapableChars = [NSCharacterSet characterSetWithCharactersInString:ESCAPABLE_CHARS]; NSRange searchRange = NSMakeRange(0, result.length); while (searchRange.length > 0) { NSRange range = [result rangeOfString:@"\\" options:0 range:searchRange]; if (range.location == NSNotFound || NSMaxRange(range) == NSMaxRange(searchRange)) break; // If it is escapable, than remove the backslash unichar nextChar = [result characterAtIndex:range.location + 1]; if ([escapableChars characterIsMember:nextChar]) { [result replaceCharactersInRange:range withString:@""]; } searchRange.location = range.location + 1; searchRange.length = result.length - searchRange.location; } return result; } @end