MediaWiki master
ParserOutputAccess.php
Go to the documentation of this file.
1<?php
20namespace MediaWiki\Page;
21
22use InvalidArgumentException;
23use MapCacheLRU;
24use MediaWiki\Logger\Spi as LoggerSpi;
44use Wikimedia\Assert\Assert;
45use Wikimedia\Parsoid\Parsoid;
51
62
64 public const PARSOID_PCACHE_NAME = 'parsoid-' . ParserCacheFactory::DEFAULT_NAME;
65
67 public const PARSOID_RCACHE_NAME = 'parsoid-' . ParserCacheFactory::DEFAULT_RCACHE_NAME;
68
72 public const OPT_NO_CHECK_CACHE = 1;
73
75 public const OPT_FORCE_PARSE = self::OPT_NO_CHECK_CACHE;
76
80 public const OPT_NO_UPDATE_CACHE = 2;
81
87 public const OPT_NO_AUDIENCE_CHECK = 4;
88
93 public const OPT_NO_CACHE = self::OPT_NO_UPDATE_CACHE | self::OPT_NO_CHECK_CACHE;
94
99 public const OPT_LINKS_UPDATE = 8;
100
113 public const OPT_FOR_ARTICLE_VIEW = 16;
114
119 public const OPT_IGNORE_PROFILE_VERSION = 128;
120
122 private const CACHE_NONE = 'none';
123
125 private const CACHE_PRIMARY = 'primary';
126
128 private const CACHE_SECONDARY = 'secondary';
129
135 private MapCacheLRU $localCache;
136
137 private ParserCacheFactory $parserCacheFactory;
138 private RevisionLookup $revisionLookup;
139 private RevisionRenderer $revisionRenderer;
140 private StatsFactory $statsFactory;
141 private ILBFactory $lbFactory;
142 private ChronologyProtector $chronologyProtector;
143 private LoggerSpi $loggerSpi;
144 private WikiPageFactory $wikiPageFactory;
145 private TitleFormatter $titleFormatter;
146 private TracerInterface $tracer;
147
148 public function __construct(
149 ParserCacheFactory $parserCacheFactory,
150 RevisionLookup $revisionLookup,
151 RevisionRenderer $revisionRenderer,
152 StatsFactory $statsFactory,
153 ILBFactory $lbFactory,
154 ChronologyProtector $chronologyProtector,
155 LoggerSpi $loggerSpi,
156 WikiPageFactory $wikiPageFactory,
157 TitleFormatter $titleFormatter,
158 TracerInterface $tracer
159 ) {
160 $this->parserCacheFactory = $parserCacheFactory;
161 $this->revisionLookup = $revisionLookup;
162 $this->revisionRenderer = $revisionRenderer;
163 $this->statsFactory = $statsFactory;
164 $this->lbFactory = $lbFactory;
165 $this->chronologyProtector = $chronologyProtector;
166 $this->loggerSpi = $loggerSpi;
167 $this->wikiPageFactory = $wikiPageFactory;
168 $this->titleFormatter = $titleFormatter;
169 $this->tracer = $tracer;
170
171 $this->localCache = new MapCacheLRU( 10 );
172 }
173
182 private function shouldUseCache(
183 PageRecord $page,
184 ?RevisionRecord $rev
185 ) {
186 if ( $rev && !$rev->getId() ) {
187 / The revision isn't from the database, so the output can't safely be cached.
188 return self::CACHE_NONE;
189 }
190
191 / NOTE: Keep in sync with ParserWikiPage::shouldCheckParserCache().
192 / NOTE: when we allow caching of old revisions in the future,
193 / we must not allow caching of deleted revisions.
194
195 $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
196 if ( !$page->exists() || !$wikiPage->getContentHandler()->isParserCacheSupported() ) {
197 return self::CACHE_NONE;
198 }
199
200 $isOld = $rev && $rev->getId() !== $page->getLatest();
201 if ( !$isOld ) {
202 return self::CACHE_PRIMARY;
203 }
204
205 if ( !$rev->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) {
206 / deleted/suppressed revision
207 return self::CACHE_NONE;
208 }
209
210 return self::CACHE_SECONDARY;
211 }
212
223 public function getCachedParserOutput(
224 PageRecord $page,
225 ParserOptions $parserOptions,
226 ?RevisionRecord $revision = null,
227 int $options = 0
228 ): ?ParserOutput {
229 $span = $this->startOperationSpan( __FUNCTION__, $page, $revision );
230 $isOld = $revision && $revision->getId() !== $page->getLatest();
231 $useCache = $this->shouldUseCache( $page, $revision );
232 $primaryCache = $this->getPrimaryCache( $parserOptions );
233 $classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions );
234
235 if ( $useCache === self::CACHE_PRIMARY ) {
236 if ( $this->localCache->hasField( $classCacheKey, $page->getLatest() ) && !$isOld ) {
237 return $this->localCache->getField( $classCacheKey, $page->getLatest() );
238 }
239 $output = $primaryCache->get( $page, $parserOptions );
240 } elseif ( $useCache === self::CACHE_SECONDARY && $revision ) {
241 $secondaryCache = $this->getSecondaryCache( $parserOptions );
242 $output = $secondaryCache->get( $revision, $parserOptions );
243 } else {
244 $output = null;
245 }
246
247 $notHitReason = 'miss';
248 if (
249 $output && !( $options & self::OPT_IGNORE_PROFILE_VERSION ) &&
250 $parserOptions->getUseParsoid()
251 ) {
252 $pageBundleData = $output->getExtensionData(
253 PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY
254 );
255 / T333606: Force a reparse if the version coming from cache is not the default
256 $cachedVersion = $pageBundleData['version'] ?? null;
257 if (
258 $cachedVersion !== null && / T325137: BadContentModel, no sense in reparsing
259 $cachedVersion !== Parsoid::defaultHTMLVersion()
260 ) {
261 $notHitReason = 'obsolete';
262 $output = null;
263 }
264 }
265
266 if ( $output && !$isOld ) {
267 $this->localCache->setField( $classCacheKey, $page->getLatest(), $output );
268 }
269
270 if ( $output ) {
271 $this->statsFactory
272 ->getCounter( 'parseroutputaccess_cache' )
273 ->setLabel( 'cache', $useCache )
274 ->setLabel( 'reason', 'hit' )
275 ->setLabel( 'type', 'hit' )
276 ->copyToStatsdAt( "ParserOutputAccess.Cache.$useCache.hit" )
277 ->increment();
278 } else {
279 $this->statsFactory
280 ->getCounter( 'parseroutputaccess_cache' )
281 ->setLabel( 'reason', $notHitReason )
282 ->setLabel( 'cache', $useCache )
283 ->setLabel( 'type', 'miss' )
284 ->copyToStatsdAt( "ParserOutputAccess.Cache.$useCache.$notHitReason" )
285 ->increment();
286 }
287
288 return $output ?: null; / convert false to null
289 }
290
313 public function getParserOutput(
314 PageRecord $page,
315 ParserOptions $parserOptions,
316 ?RevisionRecord $revision = null,
317 int $options = 0
318 ): Status {
319 $span = $this->startOperationSpan( __FUNCTION__, $page, $revision );
320 $error = $this->checkPreconditions( $page, $revision, $options );
321 if ( $error ) {
322 $this->statsFactory
323 ->getCounter( 'parseroutputaccess_case' )
324 ->setLabel( 'case', 'error' )
325 ->copyToStatsdAt( 'ParserOutputAccess.Case.error' )
326 ->increment();
327 return $error;
328 }
329
330 $isOld = $revision && $revision->getId() !== $page->getLatest();
331 if ( $isOld ) {
332 $this->statsFactory
333 ->getCounter( 'parseroutputaccess_case' )
334 ->setLabel( 'case', 'old' )
335 ->copyToStatsdAt( 'ParserOutputAccess.Case.old' )
336 ->increment();
337 } else {
338 $this->statsFactory
339 ->getCounter( 'parseroutputaccess_case' )
340 ->setLabel( 'case', 'current' )
341 ->copyToStatsdAt( 'ParserOutputAccess.Case.current' )
342 ->increment();
343 }
344
345 if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) {
346 $output = $this->getCachedParserOutput( $page, $parserOptions, $revision );
347 if ( $output ) {
348 return Status::newGood( $output );
349 }
350 }
351
352 if ( !$revision ) {
353 $revId = $page->getLatest();
354 $revision = $revId ? $this->revisionLookup->getRevisionById( $revId ) : null;
355
356 if ( !$revision ) {
357 $this->statsFactory
358 ->getCounter( 'parseroutputaccess_status' )
359 ->setLabel( 'status', 'norev' )
360 ->copyToStatsdAt( "ParserOutputAccess.Status.norev" )
361 ->increment();
362 return Status::newFatal( 'missing-revision', $revId );
363 }
364 }
365
366 if ( $options & self::OPT_FOR_ARTICLE_VIEW ) {
367 $work = $this->newPoolWorkArticleView( $page, $parserOptions, $revision, $options );
369 $status = $work->execute();
370 } else {
371 / XXX: we could try harder to reuse a cache lookup above to
372 / provide the $previous argument here
373 $status = $this->renderRevision( $page, $parserOptions, $revision, $options, null );
374 }
375
376 $output = $status->getValue();
377 Assert::postcondition( $output || !$status->isOK(), 'Inconsistent status' );
378
379 if ( $output && !$isOld ) {
380 $primaryCache = $this->getPrimaryCache( $parserOptions );
381 $classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions );
382 $this->localCache->setField( $classCacheKey, $page->getLatest(), $output );
383 }
384
385 if ( $status->isGood() ) {
386 $this->statsFactory->getCounter( 'parseroutputaccess_status' )
387 ->setLabel( 'status', 'good' )
388 ->copyToStatsdAt( 'ParserOutputAccess.Status.good' )
389 ->increment();
390 } elseif ( $status->isOK() ) {
391 $this->statsFactory->getCounter( 'parseroutputaccess_status' )
392 ->setLabel( 'status', 'ok' )
393 ->copyToStatsdAt( 'ParserOutputAccess.Status.ok' )
394 ->increment();
395 } else {
396 $this->statsFactory->getCounter( 'parseroutputaccess_status' )
397 ->setLabel( 'status', 'error' )
398 ->copyToStatsdAt( 'ParserOutputAccess.Status.error' )
399 ->increment();
400 }
401
402 return $status;
403 }
404
419 private function renderRevision(
420 PageRecord $page,
421 ParserOptions $parserOptions,
422 RevisionRecord $revision,
423 int $options,
424 ?ParserOutput $previousOutput = null
425 ): Status {
426 $span = $this->startOperationSpan( __FUNCTION__, $page, $revision );
427 $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
428 ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.None' )
429 ->setLabel( 'cache', self::CACHE_NONE )
430 ->increment();
431
432 $useCache = $this->shouldUseCache( $page, $revision );
433
434 / T371713: Temporary statistics collection code to determine
435 / feasibility of Parsoid selective update
436 $sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get(
437 MainConfigNames::ParsoidSelectiveUpdateSampleRate
438 );
439 $doSample = ( $sampleRate && mt_rand( 1, $sampleRate ) === 1 );
440
441 if ( $previousOutput === null && ( $doSample || $parserOptions->getUseParsoid() ) ) {
442 / If $useCache === self::CACHE_SECONDARY we could potentially
443 / try to reuse the parse of $revision-1 from the secondary cache,
444 / but it is likely those template transclusions are out of date.
445 / Try to reuse the template transclusions from the most recent
446 / parse, which are more likely to reflect the current template.
447 if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) {
448 $previousOutput = $this->getPrimaryCache( $parserOptions )->getDirty( $page, $parserOptions ) ?: null;
449 }
450 }
451
452 $renderedRev = $this->revisionRenderer->getRenderedRevision(
453 $revision,
454 $parserOptions,
455 null,
456 [
457 'audience' => RevisionRecord::RAW,
458 'previous-output' => $previousOutput,
459 ]
460 );
461
462 $output = $renderedRev->getRevisionParserOutput();
463
464 if ( $doSample ) {
465 $content = $revision->getContent( SlotRecord::MAIN );
466 $labels = [
467 'source' => 'ParserOutputAccess',
468 'type' => $previousOutput === null ? 'full' : 'selective',
469 'reason' => $parserOptions->getRenderReason(),
470 'parser' => $parserOptions->getUseParsoid() ? 'parsoid' : 'legacy',
471 'opportunistic' => 'false',
472 'wiki' => WikiMap::getCurrentWikiId(),
473 'model' => $content ? $content->getModel() : 'unknown',
474 ];
475 $this->statsFactory
476 ->getCounter( 'ParserCache_selective_total' )
477 ->setLabels( $labels )
478 ->increment();
479 $this->statsFactory
480 ->getCounter( 'ParserCache_selective_cpu_seconds' )
481 ->setLabels( $labels )
482 ->incrementBy( $output->getTimeProfile( 'cpu' ) );
483 }
484
485 if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $output->isCacheable() ) {
486 if ( $useCache === self::CACHE_PRIMARY ) {
487 $primaryCache = $this->getPrimaryCache( $parserOptions );
488 $primaryCache->save( $output, $page, $parserOptions );
489 } elseif ( $useCache === self::CACHE_SECONDARY ) {
490 $secondaryCache = $this->getSecondaryCache( $parserOptions );
491 $secondaryCache->save( $output, $revision, $parserOptions );
492 }
493 }
494
495 if ( $options & self::OPT_LINKS_UPDATE ) {
496 $this->wikiPageFactory->newFromTitle( $page )
497 ->triggerOpportunisticLinksUpdate( $output );
498 }
499
500 return Status::newGood( $output );
501 }
502
510 private function checkPreconditions(
511 PageRecord $page,
512 ?RevisionRecord $revision = null,
513 int $options = 0
514 ): ?Status {
515 if ( !$page->exists() ) {
516 return Status::newFatal( 'nopagetext' );
517 }
518
519 if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $revision && !$revision->getId() ) {
520 throw new InvalidArgumentException(
521 'The revision does not have a known ID. Use OPT_NO_CACHE.'
522 );
523 }
524
525 if ( $revision && $revision->getPageId() !== $page->getId() ) {
526 throw new InvalidArgumentException(
527 'The revision does not belong to the given page.'
528 );
529 }
530
531 if ( $revision && !( $options & self::OPT_NO_AUDIENCE_CHECK ) ) {
532 / NOTE: If per-user checks are desired, the caller should perform them and
533 / then set OPT_NO_AUDIENCE_CHECK if they passed.
534 if ( !$revision->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) {
535 return Status::newFatal(
536 'missing-revision-permission',
537 $revision->getId(),
538 $revision->getTimestamp(),
539 $this->titleFormatter->getPrefixedDBkey( $page )
540 );
541 }
542 }
543
544 return null;
545 }
546
555 protected function newPoolWorkArticleView(
556 PageRecord $page,
557 ParserOptions $parserOptions,
558 RevisionRecord $revision,
559 int $options
560 ): PoolCounterWork {
561 $useCache = $this->shouldUseCache( $page, $revision );
562
563 switch ( $useCache ) {
564 case self::CACHE_PRIMARY:
565 $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
566 ->setLabel( 'cache', self::CACHE_PRIMARY )
567 ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Current' )
568 ->increment();
569 $primaryCache = $this->getPrimaryCache( $parserOptions );
570 $parserCacheMetadata = $primaryCache->getMetadata( $page );
571 $cacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions,
572 $parserCacheMetadata ? $parserCacheMetadata->getUsedOptions() : null
573 );
574
575 $workKey = $cacheKey . ':revid:' . $revision->getId();
576
578 $workKey,
579 $page,
580 $revision,
581 $parserOptions,
582 $this->revisionRenderer,
583 $primaryCache,
584 $this->lbFactory,
585 $this->chronologyProtector,
586 $this->loggerSpi,
587 $this->wikiPageFactory,
588 !( $options & self::OPT_NO_UPDATE_CACHE ),
589 (bool)( $options & self::OPT_LINKS_UPDATE )
590 );
591
592 case self::CACHE_SECONDARY:
593 $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
594 ->setLabel( 'cache', self::CACHE_SECONDARY )
595 ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Old' )
596 ->increment();
597 $secondaryCache = $this->getSecondaryCache( $parserOptions );
598 $workKey = $secondaryCache->makeParserOutputKey( $revision, $parserOptions );
599 return new PoolWorkArticleViewOld(
600 $workKey,
601 $secondaryCache,
602 $revision,
603 $parserOptions,
604 $this->revisionRenderer,
605 $this->loggerSpi
606 );
607
608 default:
609 $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
610 ->setLabel( 'cache', self::CACHE_NONE )
611 ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Uncached' )
612 ->increment();
613 $secondaryCache = $this->getSecondaryCache( $parserOptions );
614 $workKey = $secondaryCache->makeParserOutputKeyOptionalRevId( $revision, $parserOptions );
615 return new PoolWorkArticleView(
616 $workKey,
617 $revision,
618 $parserOptions,
619 $this->revisionRenderer,
620 $this->loggerSpi
621 );
622 }
623
624 / unreachable
625 }
626
627 private function getPrimaryCache( ParserOptions $pOpts ): ParserCache {
628 if ( $pOpts->getUseParsoid() ) {
629 return $this->parserCacheFactory->getParserCache(
630 self::PARSOID_PCACHE_NAME
631 );
632 }
633
634 return $this->parserCacheFactory->getParserCache(
635 ParserCacheFactory::DEFAULT_NAME
636 );
637 }
638
639 private function getSecondaryCache( ParserOptions $pOpts ): RevisionOutputCache {
640 if ( $pOpts->getUseParsoid() ) {
641 return $this->parserCacheFactory->getRevisionOutputCache(
642 self::PARSOID_RCACHE_NAME
643 );
644 }
645
646 return $this->parserCacheFactory->getRevisionOutputCache(
647 ParserCacheFactory::DEFAULT_RCACHE_NAME
648 );
649 }
650
651 private function startOperationSpan(
652 string $opName,
653 PageRecord $page,
654 ?RevisionRecord $revision = null
655 ): SpanInterface {
656 $span = $this->tracer->createSpan( "ParserOutputAccess::$opName" );
657 if ( $span->getContext()->isSampled() ) {
658 $span->setAttributes( [
659 'org.wikimedia.parser.page' => $page->__toString(),
660 'org.wikimedia.parser.page.id' => $page->getId(),
661 'org.wikimedia.parser.page.wiki' => $page->getWikiId(),
662 ] );
663 if ( $revision ) {
664 $span->setAttributes( [
665 'org.wikimedia.parser.revision.id' => $revision->getId(),
666 'org.wikimedia.parser.revision.parent_id' => $revision->getParentId(),
667 ] );
668 }
669 }
670 return $span->start()->activate();
671 }
672}
const CACHE_NONE
Definition Defines.php:87
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Store key-value entries in a size-limited in-memory LRU cache.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Service for getting rendered output of a given page.
const OPT_FOR_ARTICLE_VIEW
Apply page view semantics.
getCachedParserOutput(PageRecord $page, ParserOptions $parserOptions, ?RevisionRecord $revision=null, int $options=0)
Returns the rendered output for the given page if it is present in the cache.
newPoolWorkArticleView(PageRecord $page, ParserOptions $parserOptions, RevisionRecord $revision, int $options)
__construct(ParserCacheFactory $parserCacheFactory, RevisionLookup $revisionLookup, RevisionRenderer $revisionRenderer, StatsFactory $statsFactory, ILBFactory $lbFactory, ChronologyProtector $chronologyProtector, LoggerSpi $loggerSpi, WikiPageFactory $wikiPageFactory, TitleFormatter $titleFormatter, TracerInterface $tracer)
getParserOutput(PageRecord $page, ParserOptions $parserOptions, ?RevisionRecord $revision=null, int $options=0)
Returns the rendered output for the given page.
Service for creating WikiPage objects.
Cache for ParserOutput objects corresponding to the latest page revisions.
Set options of the Parser.
getUseParsoid()
Parsoid-format HTML output, or legacy wikitext parser HTML?
ParserOutput is a rendering of a Content object or a message.
Provides methods for conversion between PageBundle and ParserOutput TODO: Convert to a trait once we ...
Cache for ParserOutput objects.
Class for dealing with PoolCounters using class members.
PoolWorkArticleView for the current revision of a page, using ParserCache.
PoolWorkArticleView for an old revision of a page, using a simple cache.
PoolCounter protected work wrapping RenderedRevision->getRevisionParserOutput.
Page revision base class.
getParentId( $wikiId=self::LOCAL)
Get parent revision ID (the original previous page revision).
audienceCan( $field, $audience, ?Authority $performer=null)
Check that the given audience has access to the given field.
getId( $wikiId=self::LOCAL)
Get revision ID.
The RevisionRenderer service provides access to rendered output for revisions.
Value object representing a content slot associated with a page revision.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
A title formatter service for MediaWiki.
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:31
Provide a given client with protection against visible database lag.
This is the primary interface for validating metrics definitions, caching defined metrics,...
Service provider interface to create \Psr\Log\LoggerInterface objects.
Definition Spi.php:64
exists()
Checks if the page currently exists.
Data record representing a page that is (or used to be, or could be) an editable page on a wiki.
getLatest( $wikiId=self::LOCAL)
The ID of the page's latest revision.
Service for looking up page revisions.
Manager of ILoadBalancer objects and, indirectly, IDatabase connections.
Represents an OpenTelemetry span, i.e.
Base interface for an OpenTelemetry tracer responsible for creating spans.

Follow Lee on X/Twitter - Father, Husband, Serial builder creating AI, crypto, games & web tools. We are friends :) AI Will Come To Life!

Check out: eBank.nz (Art Generator) | Netwrck.com (AI Tools) | Text-Generator.io (AI API) | BitBank.nz (Crypto AI) | ReadingTime (Kids Reading) | RewordGame | BigMultiplayerChess | WebFiddle | How.nz | Helix AI Assistant