123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444 |
- cfg-common.h | 4 +
- cfg-mencoder.h | 4 +
- cfg-mplayer.h | 4 +
- configure | 13 +-
- libaf/af_format.c | 7 +
- libavcodec/Makefile | 7 +
- libavcodec/avr32/dsputil_avr32.c | 2678 ++++++++++++++++++++++++++++++++++++++
- libavcodec/avr32/fdct.S | 541 ++++++++
- libavcodec/avr32/h264idct.S | 451 +++++++
- libavcodec/avr32/idct.S | 829 ++++++++++++
- libavcodec/avr32/mc.S | 434 ++++++
- libavcodec/avr32/pico.h | 260 ++++
- libavcodec/bitstream.h | 77 +-
- libavcodec/dsputil.c | 3 +
- libavcodec/h264.c | 15 +
- libavutil/common.h | 16 +
- libavutil/internal.h | 9 +
- libfaad2/common.h | 2 +-
- libmpcodecs/ad_libmad.c | 5 +
- libswscale/pico-avr32.h | 137 ++
- libswscale/swscale_internal.h | 2 +-
- libswscale/yuv2rgb.c | 14 +
- libswscale/yuv2rgb_avr32.c | 416 ++++++
- libvo/vo_fbdev2.c | 101 ++-
- version.sh | 2 +-
- 25 files changed, 6011 insertions(+), 20 deletions(-)
- create mode 100644 libavcodec/avr32/dsputil_avr32.c
- create mode 100644 libavcodec/avr32/fdct.S
- create mode 100644 libavcodec/avr32/h264idct.S
- create mode 100644 libavcodec/avr32/idct.S
- create mode 100644 libavcodec/avr32/mc.S
- create mode 100644 libavcodec/avr32/pico.h
- create mode 100644 libswscale/pico-avr32.h
- create mode 100644 libswscale/yuv2rgb_avr32.c
- diff --git a/cfg-common.h b/cfg-common.h
- index 780df38..7d878a8 100644
- --- a/cfg-common.h
- +++ b/cfg-common.h
- @@ -235,6 +235,10 @@
- {"tsprobe", &ts_probe, CONF_TYPE_POSITION, 0, 0, TS_MAX_PROBE_SIZE, NULL},
- {"tskeepbroken", &ts_keep_broken, CONF_TYPE_FLAG, 0, 0, 1, NULL},
-
- +#ifdef ARCH_AVR32
- + {"use-pico", &avr32_use_pico, CONF_TYPE_FLAG, 0, 0, 1, NULL},
- + {"nouse-pico", &avr32_use_pico, CONF_TYPE_FLAG, 0, 1, 0, NULL},
- +#endif
- // draw by slices or whole frame (useful with libmpeg2/libavcodec)
- {"slices", &vd_use_slices, CONF_TYPE_FLAG, 0, 0, 1, NULL},
- {"noslices", &vd_use_slices, CONF_TYPE_FLAG, 0, 1, 0, NULL},
- diff --git a/cfg-mencoder.h b/cfg-mencoder.h
- index 411b748..addf791 100644
- --- a/cfg-mencoder.h
- +++ b/cfg-mencoder.h
- @@ -5,6 +5,10 @@
-
- #include "cfg-common.h"
-
- +#ifdef ARCH_AVR32
- +extern int avr32_use_pico;
- +#endif
- +
- #ifdef USE_FAKE_MONO
- extern int fakemono; // defined in dec_audio.c
- #endif
- diff --git a/cfg-mplayer.h b/cfg-mplayer.h
- index 62b6eac..31499c2 100644
- --- a/cfg-mplayer.h
- +++ b/cfg-mplayer.h
- @@ -4,6 +4,10 @@
-
- #include "cfg-common.h"
-
- +#ifdef ARCH_AVR32
- +extern int avr32_use_pico;
- +#endif
- +
- extern int noconsolecontrols;
-
- #if defined(HAVE_FBDEV)||defined(HAVE_VESA)
- diff --git a/configure b/configure
- index 29002c8..56c6fe4 100755
- --- a/configure
- +++ b/configure
- @@ -1203,6 +1203,15 @@ EOF
- _optimizing="$proc"
- ;;
-
- + avr32)
- + _def_arch='#define ARCH_AVR32'
- + _target_arch='TARGET_ARCH_AVR32 = yes'
- + iproc='avr32'
- + proc=''
- + _march=''
- + _mcpu=''
- + _optimizing=''
- + ;;
- arm|armv4l|armv5tel)
- _def_arch='#define ARCH_ARMV4L 1'
- _target_arch='TARGET_ARCH_ARMV4L = yes'
- @@ -1533,7 +1542,7 @@ echores $_named_asm_args
- # Checking for CFLAGS
- _stripbinaries=yes
- if test "$_profile" != "" || test "$_debug" != "" ; then
- - CFLAGS="-W -Wall -O2 $_march $_mcpu $_debug $_profile"
- + CFLAGS="-W -Wall -O4 $_march $_mcpu $_debug $_profile"
- if test "$_cc_major" -ge "3" ; then
- CFLAGS=`echo "$CFLAGS" | sed -e 's/\(-Wall\)/\1 -Wno-unused-parameter/'`
- fi
- @@ -3794,7 +3803,7 @@ fi
-
-
- echocheck "X11 headers presence"
- - for I in `echo $_inc_extra | sed s/-I//g` /usr/X11/include /usr/X11R6/include /usr/include/X11R6 /usr/include /usr/openwin/include ; do
- + for I in `echo $_inc_extra | sed s/-I//g`; do
- if test -f "$I/X11/Xlib.h" ; then
- _inc_x11="-I$I"
- _x11_headers="yes"
- diff --git a/libaf/af_format.c b/libaf/af_format.c
- index e5b7cc9..5d7ea6d 100644
- --- a/libaf/af_format.c
- +++ b/libaf/af_format.c
- @@ -20,7 +20,14 @@
- // Integer to float conversion through lrintf()
- #ifdef HAVE_LRINTF
- #include <math.h>
- +
- +#ifdef ARCH_AVR32
- +#define lrintf(x) rint(x)
- +#define llrint(x) (long long)rint(x)
- +#else
- long int lrintf(float);
- +#endif
- +
- #else
- #define lrintf(x) ((int)(x))
- #endif
- diff --git a/libavcodec/Makefile b/libavcodec/Makefile
- index 17b6c45..8e1dc96 100644
- --- a/libavcodec/Makefile
- +++ b/libavcodec/Makefile
- @@ -360,6 +360,12 @@ OBJS-$(TARGET_ARCH_SPARC) += sparc/dsputil_vis.o \
-
- sparc/dsputil_vis.o: CFLAGS += -mcpu=ultrasparc -mtune=ultrasparc
-
- +# avr32 specific stuff
- +ifeq ($(TARGET_ARCH_AVR32),yes)
- +ASM_OBJS += avr32/idct.o avr32/fdct.o avr32/mc.o avr32/h264idct.o
- +OBJS += avr32/dsputil_avr32.o
- +endif
- +
- # sun mediaLib specific stuff
- OBJS-$(HAVE_MLIB) += mlib/dsputil_mlib.o \
-
- @@ -419,6 +425,7 @@ tests: apiexample $(TESTS)
- clean::
- rm -f \
- i386/*.o i386/*~ \
- + avr32/*.o avr32/*~ \
- armv4l/*.o armv4l/*~ \
- mlib/*.o mlib/*~ \
- alpha/*.o alpha/*~ \
- diff --git a/libavcodec/avr32/dsputil_avr32.c b/libavcodec/avr32/dsputil_avr32.c
- new file mode 100644
- index 0000000..200284d
- --- /dev/null
- +++ b/libavcodec/avr32/dsputil_avr32.c
- @@ -0,0 +1,2678 @@
- +/*
- + * Copyright (c) 2007 Atmel Corporation. All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + *
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + *
- + * 2. Redistributions in binary form must reproduce the above
- + * copyright notice, this list of conditions and the following
- + * disclaimer in the documentation and/or other materials provided
- + * with the distribution.
- + *
- + * 3. The name of ATMEL may not be used to endorse or promote products
- + * derived from this software without specific prior written
- + * permission.
- + *
- + * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
- + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- + * DAMAGE.
- + */
- +
- +#include "../dsputil.h"
- +#include "pico.h"
- +
- +int avr32_use_pico = 1;
- +
- +//#define CHECK_DSP_FUNCS_AGAINST_C
- +
- +#ifdef CHECK_DSP_FUNCS_AGAINST_C
- +#define DSP_FUNC_NAME(name) test_ ## name
- +#else
- +#define DSP_FUNC_NAME(name) name
- +#endif
- +
- +union doubleword {
- + int64_t doubleword;
- + struct {
- + int32_t top;
- + int32_t bottom;
- + } words;
- +};
- +
- +#undef LD16
- +#undef LD32
- +#undef LD64
- +
- +#define LD16(a) (*((uint16_t*)(a)))
- +#define LD32(a) (*((uint32_t*)(a)))
- +#define LD64(a) (*((uint64_t*)(a)))
- +#define LD64_UNALIGNED(a) \
- + ({ union doubleword __tmp__; \
- + __tmp__.words.top = LD32(a); \
- + __tmp__.words.bottom = LD32(a + 4); \
- + __tmp__.doubleword; })
- +
- +#undef ST32
- +#undef ST16
- +
- +#define ST16(a, b) *((uint16_t*)(a)) = (b)
- +#define ST32(a, b) *((uint32_t*)(a)) = (b)
- +
- +#undef rnd_avg32
- +#define rnd_avg32(a, b) \
- + ({ uint32_t __tmp__;\
- + asm("pavg.ub\t%0, %1, %2" : "=r"(__tmp__) : "r"(a), "r"(b));\
- + __tmp__;})
- +
- +void idct_avr32(DCTELEM *data);
- +void fdct_avr32(DCTELEM *data);
- +
- +void idct_put_avr32(uint8_t *dest, int line_size, DCTELEM *data);
- +void idct_add_avr32(uint8_t *dest, int line_size, DCTELEM *data);
- +
- +void h264_idct_add_avr32(uint8_t *dest, DCTELEM *data, int stride);
- +void h264_idct8_add_avr32(uint8_t *dest, DCTELEM *data, int stride);
- +
- +#define extern_dspfunc(PFX, NUM) \
- + void PFX ## _pixels ## NUM ## _avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
- + void PFX ## _pixels ## NUM ## _h_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
- + void PFX ## _pixels ## NUM ## _v_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
- + void PFX ## _pixels ## NUM ## _hv_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h )
- +
- +extern_dspfunc(put, 8);
- +extern_dspfunc(put_no_rnd, 8);
- +extern_dspfunc(avg, 8);
- +extern_dspfunc(avg_no_rnd, 8);
- +#undef extern_dspfunc
- +
- +#ifdef CHECK_DSP_FUNCS_AGAINST_C
- +#define extern_dspfunc(PFX, NUM) \
- + void PFX ## _pixels ## NUM ## _c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
- + void PFX ## _pixels ## NUM ## _x2_c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
- + void PFX ## _pixels ## NUM ## _y2_c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
- + void PFX ## _pixels ## NUM ## _xy2_c(uint8_t *dst, const uint8_t *pixels, int line_size, int h )
- +
- +extern_dspfunc(put, 4);
- +extern_dspfunc(put_no_rnd, 4);
- +extern_dspfunc(put, 8);
- +extern_dspfunc(put_no_rnd, 8);
- +extern_dspfunc(put, 16);
- +extern_dspfunc(put_no_rnd, 16);
- +extern_dspfunc(avg, 8);
- +extern_dspfunc(avg_no_rnd, 8);
- +extern_dspfunc(avg, 16);
- +extern_dspfunc(avg_no_rnd, 16);
- +
- +
- +#undef extern_dspfunc
- +#define extern_dspfunc(PFX, NUM) \
- +void PFX ## NUM ## _mc00_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc10_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc20_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc30_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc01_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc11_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc21_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc31_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc02_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc12_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc22_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc32_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc03_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc13_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc23_c(uint8_t *dst, uint8_t *src, int stride); \
- +void PFX ## NUM ## _mc33_c(uint8_t *dst, uint8_t *src, int stride); \
- +
- +extern_dspfunc(put_h264_qpel, 16);
- +extern_dspfunc(put_h264_qpel, 8);
- +extern_dspfunc(put_h264_qpel, 4);
- +extern_dspfunc(avg_h264_qpel, 16);
- +extern_dspfunc(avg_h264_qpel, 8);
- +extern_dspfunc(avg_h264_qpel, 4);
- +
- +#undef extern_dspfunc
- +
- +void put_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
- +void put_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
- +void put_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
- +
- +void avg_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
- +void avg_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
- +void avg_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
- +
- +
- +void dump_block8(uint8_t *block, int line_size, int h);
- +void dump_block4(uint8_t *block, int line_size, int h);
- +void dump_block(uint8_t *block, int line_size, int h, int w);
- +
- +void check_block8(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct,
- + int h, char *name, int max_dev);
- +void check_block4(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct,
- + int h, char *name, int max_dev);
- +void check_block(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct,
- + int h, int width, char *name, int max_dev);
- +
- +#define PIXOP2( OPNAME, OP ) \
- +void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
- + int i;\
- + for(i=0; i<h; i++){\
- + OP(*((uint32_t*)(block )), LD32(pixels ));\
- + pixels+=line_size;\
- + block +=line_size;\
- + }\
- +}\
- +void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
- + int src_stride1, int src_stride2, int h){\
- + int i;\
- + for(i=0; i<h; i++){\
- + uint32_t a,b;\
- + a= LD32(&src1[i*src_stride1 ]);\
- + b= LD32(&src2[i*src_stride2 ]);\
- + OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
- + a= LD32(&src1[i*src_stride1+4]);\
- + b= LD32(&src2[i*src_stride2+4]);\
- + OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
- + }\
- +}\
- +\
- +void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
- + int src_stride1, int src_stride2, int h){\
- + int i;\
- + for(i=0; i<h; i++){\
- + uint32_t a,b;\
- + a= LD32(&src1[i*src_stride1 ]);\
- + b= LD32(&src2[i*src_stride2 ]);\
- + OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
- + }\
- +}\
- +\
- +void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
- + int src_stride1, int src_stride2, int h){\
- + OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
- + OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
- +}\
- +
- +#else
- +#define PIXOP2( OPNAME, OP ) \
- +static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
- + int i;\
- + for(i=0; i<h; i++){\
- + OP(*((uint32_t*)(block )), LD32(pixels ));\
- + pixels+=line_size;\
- + block +=line_size;\
- + }\
- +}\
- +static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
- + int i;\
- + for(i=0; i<h; i++){\
- + OP(*((uint32_t*)(block )), LD32(pixels ));\
- + OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
- + pixels+=line_size;\
- + block +=line_size;\
- + }\
- +}\
- +static void OPNAME ## _pixels16_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
- + int i;\
- + for(i=0; i<h; i++){\
- + OP(*((uint32_t*)(block )), LD32(pixels ));\
- + OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
- + OP(*((uint32_t*)(block+8)), LD32(pixels+8));\
- + OP(*((uint32_t*)(block+12)), LD32(pixels+12));\
- + pixels+=line_size;\
- + block +=line_size;\
- + }\
- +}\
- +static void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
- + int src_stride1, int src_stride2, int h){\
- + int i;\
- + for(i=0; i<h; i++){\
- + uint32_t a,b;\
- + a= LD32(&src1[i*src_stride1 ]);\
- + b= LD32(&src2[i*src_stride2 ]);\
- + OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
- + a= LD32(&src1[i*src_stride1+4]);\
- + b= LD32(&src2[i*src_stride2+4]);\
- + OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
- + }\
- +}\
- +\
- +static void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
- + int src_stride1, int src_stride2, int h){\
- + int i;\
- + for(i=0; i<h; i++){\
- + uint32_t a,b;\
- + a= LD32(&src1[i*src_stride1 ]);\
- + b= LD32(&src2[i*src_stride2 ]);\
- + OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
- + }\
- +}\
- +\
- +static void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
- + int src_stride1, int src_stride2, int h){\
- + OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
- + OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
- +}\
- +
- +#endif
- +
- +#define op_avg(a, b) a = rnd_avg32(a, b)
- +#define op_put(a, b) a = b
- +
- +PIXOP2(avg, op_avg)
- +PIXOP2(put, op_put)
- +#undef op_avg
- +#undef op_put
- +
- +
- +
- +static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
- +{
- + int i;
- + for(i=0; i<h; i++)
- + {
- + ST32(dst , LD32(src ));
- + dst+=dstStride;
- + src+=srcStride;
- + }
- +}
- +
- +static void clear_blocks_avr32(DCTELEM *blocks)
- +{
- + int n = 12;
- + uint64_t tmp1, tmp2;
- + blocks += 6*64;
- + asm volatile ( "mov\t%1, 0\n"
- + "mov\t%m1, 0\n"
- + "mov\t%2, 0\n"
- + "mov\t%m2, 0\n"
- + "0:\n"
- + "stm\t--%3, %1, %m1, %2, %m2\n"
- + "stm\t--%3, %1, %m1, %2, %m2\n"
- + "stm\t--%3, %1, %m1, %2, %m2\n"
- + "stm\t--%3, %1, %m1, %2, %m2\n"
- + "sub\t%0, 1\n"
- + "brne\t0b\n"
- + : "+r"(n), "=&r"(tmp1), "=&r"(tmp2),
- + "+r"(blocks));
- +}
- +
- +
- +static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
- +{
- + int i;
- + for(i=0; i<h; i++)
- + {
- + ST32(dst , LD32(src ));
- + ST32(dst+4 , LD32(src+4 ));
- + dst+=dstStride;
- + src+=srcStride;
- + }
- +}
- +
- +static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
- +{
- + int i;
- + for(i=0; i<h; i++)
- + {
- + ST32(dst , LD32(src ));
- + ST32(dst+4 , LD32(src+4 ));
- + ST32(dst+8 , LD32(src+8 ));
- + ST32(dst+12, LD32(src+12));
- + dst+=dstStride;
- + src+=srcStride;
- + }
- +}
- +
- +
- +static void put_h264_chroma_mc2_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
- + const int A=(8-x)*(8-y);
- + const int B=( x)*(8-y);
- + const int C=(8-x)*( y);
- + const int D=( x)*( y);
- + int i;
- +
- + PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF0_B, 32);
- + PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF1_B, 0);
- + PICO_PUT_W(PICO_COEFF2_A, 0);
- + PICO_PUT_W(PICO_COEFF2_B, 0);
- + PICO_PUT_W(PICO_CONFIG,
- + PICO_OUTPUT_MODE(PICO_PLANAR_MODE)
- + | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE)
- + | PICO_COEFF_FRAC_BITS(6)
- + | PICO_OFFSET_FRAC_BITS(6));
- +
- + for(i=0; i<h; i++)
- + {
- +
- + int src0 = LD32(src);
- + int src1 = LD32(src + stride);
- +
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 0, 4, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 1, 5, 0);
- + src += stride;
- + ST16(dst,(short)PICO_GET_W(PICO_OUTPIX0));
- + dst += stride;
- + }
- +}
- +
- +
- +static void put_h264_chroma_mc4_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
- + const int A=(8-x)*(8-y);\
- + const int B=( x)*(8-y);
- + const int C=(8-x)*( y);
- + const int D=( x)*( y);
- + int i;
- +
- + PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF0_B, 32);
- + PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF1_B, 0);
- + PICO_PUT_W(PICO_COEFF2_A, 0);
- + PICO_PUT_W(PICO_COEFF2_B, 0);
- + PICO_PUT_W(PICO_CONFIG,
- + PICO_OUTPUT_MODE(PICO_PLANAR_MODE)
- + | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE)
- + | PICO_COEFF_FRAC_BITS(6)
- + | PICO_OFFSET_FRAC_BITS(6));
- +
- + for(i=0; i<h; i++)
- + {
- + /*
- + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));
- + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));
- + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));
- + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));
- + dst+= stride;
- + src+= stride;
- + */
- +
- + int src0 = LD32(src);
- + int src1 = (((int)src[4] << 24) | (int)src[stride]);
- + int src2 = LD32(src + stride + 1);
- +
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0);
- + src += stride;
- + ST32(dst, PICO_GET_W(PICO_OUTPIX0));
- +
- + dst += stride;
- + }
- +}
- +
- +static void put_h264_chroma_mc8_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
- + const int A=(8-x)*(8-y);
- + const int B=( x)*(8-y);
- + const int C=(8-x)*( y);
- + const int D=( x)*( y);
- + int i;
- +
- + PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF0_B, 32);
- + PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF1_B, 0);
- + PICO_PUT_W(PICO_COEFF2_A, 0);
- + PICO_PUT_W(PICO_COEFF2_B, 0);
- + PICO_PUT_W(PICO_CONFIG,
- + PICO_OUTPUT_MODE(PICO_PLANAR_MODE)
- + | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE)
- + | PICO_COEFF_FRAC_BITS(6)
- + | PICO_OFFSET_FRAC_BITS(6));
- +
- + for(i=0; i<h; i++)
- + {
- + /*
- + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));
- + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));
- + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));
- + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));
- + OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));
- + OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));
- + OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));
- + OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));
- + dst+= stride;
- + src+= stride;
- + */
- + int src0 = LD32(src);
- + int src1 = (((int)src[4] << 24) | (int)src[stride]);
- + int src2 = LD32(src + stride + 1);
- +
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0);
- + ST32(dst, PICO_GET_W(PICO_OUTPIX0));
- +
- + src0 = LD32(src + 4);
- + src1 = (src[8] << 24) | src[stride + 4];
- + src2 = LD32(src + stride + 5);
- +
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0);
- + src += stride;
- + ST32(dst + 4, PICO_GET_W(PICO_OUTPIX0));
- +
- + dst += stride;
- + }
- +}
- +
- +
- +static void avg_h264_chroma_mc2_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
- + const int A=(8-x)*(8-y);
- + const int B=( x)*(8-y);
- + const int C=(8-x)*( y);
- + const int D=( x)*( y);
- + int i;
- +
- + PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF0_B, 32);
- + PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF1_B, 0);
- + PICO_PUT_W(PICO_COEFF2_A, 0);
- + PICO_PUT_W(PICO_COEFF2_B, 0);
- + PICO_PUT_W(PICO_CONFIG,
- + PICO_OUTPUT_MODE(PICO_PLANAR_MODE)
- + | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE)
- + | PICO_COEFF_FRAC_BITS(6)
- + | PICO_OFFSET_FRAC_BITS(6));
- +
- + for(i=0; i<h; i++)
- + {
- + int src0 = LD32(src);
- + int src1 = LD32(src + stride);
- +
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 0, 4, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 1, 5, 0);
- + src += stride;
- + ST16(dst, rnd_avg32(LD16(dst), PICO_GET_W(PICO_OUTPIX0)));
- + dst += stride;
- + }
- +}
- +
- +
- +static void avg_h264_chroma_mc4_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
- + const int A=(8-x)*(8-y);\
- + const int B=( x)*(8-y);
- + const int C=(8-x)*( y);
- + const int D=( x)*( y);
- + int i;
- +
- + PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF0_B, 32);
- + PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF1_B, 0);
- + PICO_PUT_W(PICO_COEFF2_A, 0);
- + PICO_PUT_W(PICO_COEFF2_B, 0);
- + PICO_PUT_W(PICO_CONFIG,
- + PICO_OUTPUT_MODE(PICO_PLANAR_MODE)
- + | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE)
- + | PICO_COEFF_FRAC_BITS(6)
- + | PICO_OFFSET_FRAC_BITS(6));
- +
- + for(i=0; i<h; i++)
- + {
- + /*
- + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));
- + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));
- + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));
- + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));
- + dst+= stride;
- + src+= stride;
- + */
- +
- + int src0 = *((int *)src);
- + int src1 = (int)((src[4] << 24) | src[stride]);
- + int src2 = *((int *)(src + stride + 1));
- +
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0);
- + src += stride;
- + ST32(dst, rnd_avg32(LD32(dst), PICO_GET_W(PICO_OUTPIX0)));
- + dst += stride;
- + }
- +}
- +
- +static void avg_h264_chroma_mc8_pico(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
- + const int A=(8-x)*(8-y);
- + const int B=( x)*(8-y);
- + const int C=(8-x)*( y);
- + const int D=( x)*( y);
- + int i;
- +
- + PICO_PUT_W(PICO_COEFF0_A, (A << 16) | (B & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF0_B, 32);
- + PICO_PUT_W(PICO_COEFF1_A, (C << 16) | (D & 0xFFFF));
- + PICO_PUT_W(PICO_COEFF1_B, 0);
- + PICO_PUT_W(PICO_COEFF2_A, 0);
- + PICO_PUT_W(PICO_COEFF2_B, 0);
- + PICO_PUT_W(PICO_CONFIG,
- + PICO_OUTPUT_MODE(PICO_PLANAR_MODE)
- + | PICO_INPUT_MODE(PICO_HOR_FILTER_MODE)
- + | PICO_COEFF_FRAC_BITS(6)
- + | PICO_OFFSET_FRAC_BITS(6));
- +
- + for(i=0; i<h; i++)
- + {
- + /*
- + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));
- + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));
- + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));
- + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));
- + OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));
- + OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));
- + OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));
- + OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));
- + dst+= stride;
- + src+= stride;
- + */
- + int src0 = *((int *)src);
- + int src1 = (volatile int)((src[4] << 24) | src[stride]);
- + int src2 = *((int *)(src + stride + 1));
- +
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0);
- + ST32(dst, rnd_avg32(LD32(dst), PICO_GET_W(PICO_OUTPIX0)));
- +
- + src0 = *((int *)(src + 4));
- + src1 = (int)((src[8] << 24) | src[stride + 4]);
- + src2 = *((int *)(src + stride + 5));
- +
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 7, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 8, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 9, 0);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 10, 0);
- + src += stride;
- + ST32(dst + 4, rnd_avg32(LD32(dst + 4), PICO_GET_W(PICO_OUTPIX0)));
- + dst += stride;
- + }
- +}
- +
- +static struct pico_config_t h264_qpel4_h_lowpass_config = {
- + .input_mode = PICO_HOR_FILTER_MODE,
- + .output_mode = PICO_PLANAR_MODE,
- + .coeff_frac_bits = 5,
- + .offset_frac_bits = 5,
- + .coeff0_0 = 1,
- + .coeff0_1 = -5,
- + .coeff0_2 = 20,
- + .coeff0_3 = 16,
- + .coeff1_0 = 20,
- + .coeff1_1 = -5,
- + .coeff1_2 = 1,
- + .coeff1_3 = 0,
- + .coeff2_0 = 0,
- + .coeff2_1 = 0,
- + .coeff2_2 = 0,
- + .coeff2_3 = 0
- +};
- +
- +
- +
- +static void put_h264_qpel4_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + const int h=4;
- + int i;
- +
- + set_pico_config(&h264_qpel4_h_lowpass_config);
- +
- + for(i=0; i<h; i++){
- +
- + /*
- + OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
- + OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
- + OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
- + OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
- + dst+=dstStride;\
- + src+=srcStride;\ */
- + PICO_MVRC_W(PICO_INPIX0, LD32(src - 2));
- + PICO_MVRC_D(PICO_INPIX2, LD64_UNALIGNED(src + 2));
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 3, 6);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 4, 7);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 5, 8);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 6, 9);
- + src += srcStride;
- + ST32(dst, PICO_GET_W(PICO_OUTPIX0));
- + dst += dstStride;
- + }
- +}
- +
- +static void avg_h264_qpel4_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + const int h=4;
- + int i;
- +
- + set_pico_config(&h264_qpel4_h_lowpass_config);
- +
- + for(i=0; i<h; i++){
- +
- + /*
- + OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
- + OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
- + OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
- + OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
- + dst+=dstStride;\
- + src+=srcStride;\ */
- +
- + PICO_MVRC_W(PICO_INPIX0, LD32(src - 2));
- + PICO_MVRC_D(PICO_INPIX2, LD64_UNALIGNED(src + 2));
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 3, 6);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 4, 7);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 5, 8);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 6, 9);
- + src += srcStride;
- + ST32(dst, rnd_avg32(LD32(dst), PICO_GET_W(PICO_OUTPIX0)));
- + dst += dstStride;
- + }
- +}
- +
- +static struct pico_config_t h264_qpel4_v_lowpass_config1 = {
- + .input_mode = PICO_VERT_FILTER_MODE,
- + .output_mode = PICO_PACKED_MODE,
- + .coeff_frac_bits = 5,
- + .offset_frac_bits = 5,
- + .coeff0_0 = 1,
- + .coeff0_1 = -5,
- + .coeff0_2 = 20,
- + .coeff0_3 = 16,
- + .coeff1_0 = 1,
- + .coeff1_1 = -5,
- + .coeff1_2 = 20,
- + .coeff1_3 = 16,
- + .coeff2_0 = 1,
- + .coeff2_1 = -5,
- + .coeff2_2 = 20,
- + .coeff2_3 = 16
- +};
- +
- +
- +
- +static struct pico_config_t h264_qpel4_v_lowpass_config2 = {
- + .input_mode = PICO_VERT_FILTER_MODE,
- + .output_mode = PICO_PLANAR_MODE,
- + .coeff_frac_bits = 5,
- + .offset_frac_bits = 5,
- + .coeff0_0 = 1,
- + .coeff0_1 = -5,
- + .coeff0_2 = 20,
- + .coeff0_3 = 16,
- + .coeff1_0 = 20,
- + .coeff1_1 = -5,
- + .coeff1_2 = 1,
- + .coeff1_3 = 0,
- + .coeff2_0 = 0,
- + .coeff2_1 = 0,
- + .coeff2_2 = 0,
- + .coeff2_3 = 0
- +};
- +
- +static void put_h264_qpel4_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- +
- + /*
- + const int w=4;
- + uint8_t *cm = cropTbl + MAX_NEG_CROP;
- + int i;
- + for(i=0; i<w; i++)
- + {
- + const int srcB= src[-2*srcStride];\
- + const int srcA= src[-1*srcStride];\
- + const int src0= src[0 *srcStride];\
- + const int src1= src[1 *srcStride];\
- + const int src2= src[2 *srcStride];\
- + const int src3= src[3 *srcStride];\
- + const int src4= src[4 *srcStride];\
- + const int src5= src[5 *srcStride];\
- + const int src6= src[6 *srcStride];\
- + OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
- + OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
- + OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
- + OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
- + dst++;\
- + src++;\
- + */
- +
- + set_pico_config(&h264_qpel4_v_lowpass_config1);
- +
- + {
- + int srcB= LD32(src - 2*srcStride);
- + int srcA= LD32(src - 1*srcStride);
- + int src0= LD32(src + 0 *srcStride);
- + int src1= LD32(src + 1 *srcStride);
- + int src2= LD32(src + 2 *srcStride);
- + int src3= LD32(src + 3 *srcStride);
- + int src4= LD32(src + 4 *srcStride);
- + int src5= LD32(src + 5 *srcStride);
- + int src6= LD32(src + 6 *srcStride);
- +
- + /* First compute the leftmost three colums */
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(0, 0, 0, 3, 6);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX0, src3);
- + PICO_OP(PICO_USE_ACC, 0, 0, 3, 6);
- + ST32(dst, PICO_GET_W(PICO_OUTPIX0));
- + dst += dstStride;
- + PICO_MVRC_W(PICO_INPIX0, srcA);
- + PICO_MVRC_W(PICO_INPIX1, src0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_OP(0, 0, 0, 3, 6);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_MVRC_W(PICO_INPIX1, src3);
- + PICO_MVRC_W(PICO_INPIX0, src4);
- + PICO_OP(PICO_USE_ACC, 0, 0, 3, 6);
- + ST32(dst, PICO_GET_W(PICO_OUTPIX0));
- + dst += dstStride;
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(0, 0, 0, 3, 6);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_MVRC_W(PICO_INPIX1, src4);
- + PICO_MVRC_W(PICO_INPIX0, src5);
- + PICO_OP(PICO_USE_ACC, 0, 0, 3, 6);
- + ST32(dst, PICO_GET_W(PICO_OUTPIX0));
- + dst += dstStride;
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_OP(0, 0, 0, 3, 6);
- + PICO_MVRC_W(PICO_INPIX2, src4);
- + PICO_MVRC_W(PICO_INPIX1, src5);
- + PICO_MVRC_W(PICO_INPIX0, src6);
- + PICO_OP(PICO_USE_ACC, 0, 0, 3, 6);
- + ST32(dst, PICO_GET_W(PICO_OUTPIX0));
- + /* Now compute the last column */
- +
- + union wordbytes {
- + int word;
- + struct {
- + unsigned int t:8;
- + unsigned int u:8;
- + unsigned int l:8;
- + unsigned int b:8;
- + } bytes; } tmp1, tmp2, tmp3;
- +
- +
- + tmp1.bytes.t = srcB;
- + tmp1.bytes.u = src1;
- + tmp1.bytes.l = src4;
- +
- + tmp2.bytes.t = srcA;
- + tmp2.bytes.u = src2;
- + tmp2.bytes.l = src5;
- +
- + tmp3.bytes.t = src0;
- + tmp3.bytes.u = src3;
- + tmp3.bytes.l = src6;
- +
- + PICO_MVRC_W(PICO_INPIX0, tmp1.word);
- + PICO_MVRC_W(PICO_INPIX1, tmp2.word);
- + PICO_MVRC_W(PICO_INPIX2, tmp3.word);
- + set_pico_config(&h264_qpel4_v_lowpass_config2);
- +
- +
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 3, 6);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 4, 7);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 5, 8);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 6, 9);
- +
- + PICO_MVCR_W(tmp1.word, PICO_OUTPIX0);
- + dst[3] = (char)(tmp1.bytes.b);
- + dst[3 - dstStride] = (char)(tmp1.bytes.l);
- + dst[3 - 2*dstStride] = (char)(tmp1.bytes.u);
- + dst[3 - 3*dstStride] = (char)(tmp1.bytes.t);
- +
- + }
- + /*}
- +
- +
- + }*/
- +}
- +
- +static void avg_h264_qpel4_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- +
- + /*
- + const int w=4;
- + uint8_t *cm = cropTbl + MAX_NEG_CROP;
- + int i;
- + for(i=0; i<w; i++)
- + {
- + const int srcB= src[-2*srcStride];\
- + const int srcA= src[-1*srcStride];\
- + const int src0= src[0 *srcStride];\
- + const int src1= src[1 *srcStride];\
- + const int src2= src[2 *srcStride];\
- + const int src3= src[3 *srcStride];\
- + const int src4= src[4 *srcStride];\
- + const int src5= src[5 *srcStride];\
- + const int src6= src[6 *srcStride];\
- + OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
- + OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
- + OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
- + OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
- + dst++;\
- + src++;\
- + */
- + uint8_t tmp_block[4*4];
- +
- + set_pico_config(&h264_qpel4_v_lowpass_config1);
- +
- + {
- + int srcB= LD32(src - 2*srcStride);
- + int srcA= LD32(src - 1*srcStride);
- + int src0= LD32(src + 0 *srcStride);
- + int src1= LD32(src + 1 *srcStride);
- + int src2= LD32(src + 2 *srcStride);
- + int src3= LD32(src + 3 *srcStride);
- + int src4= LD32(src + 4 *srcStride);
- + int src5= LD32(src + 5 *srcStride);
- + int src6= LD32(src + 6 *srcStride);
- +
- + /* First compute the leftmost three colums */
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(0, 0, 0, 3, 6);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX0, src3);
- + PICO_OP(PICO_USE_ACC, 0, 0, 3, 6);
- + ST32(tmp_block, PICO_GET_W(PICO_OUTPIX0));
- + PICO_MVRC_W(PICO_INPIX0, srcA);
- + PICO_MVRC_W(PICO_INPIX1, src0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_OP(0, 0, 0, 3, 6);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_MVRC_W(PICO_INPIX1, src3);
- + PICO_MVRC_W(PICO_INPIX0, src4);
- + PICO_OP(PICO_USE_ACC, 0, 0, 3, 6);
- + ST32(tmp_block + 4, PICO_GET_W(PICO_OUTPIX0));
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(0, 0, 0, 3, 6);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_MVRC_W(PICO_INPIX1, src4);
- + PICO_MVRC_W(PICO_INPIX0, src5);
- + PICO_OP(PICO_USE_ACC, 0, 0, 3, 6);
- + ST32(tmp_block + 8, PICO_GET_W(PICO_OUTPIX0));
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_OP(0, 0, 0, 3, 6);
- + PICO_MVRC_W(PICO_INPIX2, src4);
- + PICO_MVRC_W(PICO_INPIX1, src5);
- + PICO_MVRC_W(PICO_INPIX0, src6);
- + PICO_OP(PICO_USE_ACC, 0, 0, 3, 6);
- + ST32(tmp_block + 12, PICO_GET_W(PICO_OUTPIX0));
- + /* Now compute the last column */
- +
- + union wordbytes {
- + int word;
- + struct {
- + unsigned int t:8;
- + unsigned int u:8;
- + unsigned int l:8;
- + unsigned int b:8;
- + } bytes; } tmp1, tmp2, tmp3;
- +
- +
- + tmp1.bytes.t = srcB;
- + tmp1.bytes.u = src1;
- + tmp1.bytes.l = src4;
- +
- + tmp2.bytes.t = srcA;
- + tmp2.bytes.u = src2;
- + tmp2.bytes.l = src5;
- +
- + tmp3.bytes.t = src0;
- + tmp3.bytes.u = src3;
- + tmp3.bytes.l = src6;
- +
- + PICO_MVRC_W(PICO_INPIX0, tmp1.word);
- + PICO_MVRC_W(PICO_INPIX1, tmp2.word);
- + PICO_MVRC_W(PICO_INPIX2, tmp3.word);
- + set_pico_config(&h264_qpel4_v_lowpass_config2);
- +
- +
- + PICO_OP(PICO_SINGLE_VECTOR, 0, 0, 3, 6);
- + PICO_OP(PICO_SINGLE_VECTOR, 1, 1, 4, 7);
- + PICO_OP(PICO_SINGLE_VECTOR, 2, 2, 5, 8);
- + PICO_OP(PICO_SINGLE_VECTOR, 3, 3, 6, 9);
- +
- + PICO_MVCR_W(tmp1.word, PICO_OUTPIX0);
- + tmp_block[3 + 3*4] = (char)(tmp1.bytes.b);
- + tmp_block[3 + 2*4] = (char)(tmp1.bytes.l);
- + tmp_block[3 + 1*4] = (char)(tmp1.bytes.u);
- + tmp_block[3] = (char)(tmp1.bytes.t);
- +
- + /* Compute the average */
- + srcB= LD32(dst);
- + srcA= LD32(dst + dstStride);
- + src0= LD32(dst + dstStride*2);
- + src1= LD32(dst + dstStride*3);
- +
- + src2= LD32(tmp_block);
- + src3= LD32(tmp_block + 4);
- + src4= LD32(tmp_block + 8);
- + src5= LD32(tmp_block + 12);
- +
- + ST32(dst, rnd_avg32(srcB, src2));
- + ST32(dst + dstStride, rnd_avg32(srcA, src3));
- + ST32(dst + 2*dstStride, rnd_avg32(src0, src4));
- + ST32(dst + 3*dstStride, rnd_avg32(src1, src5));
- + }
- +}
- +
- +static struct pico_config_t h264_qpel4_hv_lowpass_config = {
- + .input_mode = PICO_HOR_FILTER_MODE,
- + .output_mode = PICO_PACKED_MODE,
- + .coeff_frac_bits = 10,
- + .offset_frac_bits = 10,
- + .coeff0_0 = 1,
- + .coeff0_1 = -5,
- + .coeff0_2 = 20,
- + .coeff0_3 = 512,
- + .coeff1_0 = -5,
- + .coeff1_1 = 25,
- + .coeff1_2 = -100,
- + .coeff1_3 = 0,
- + .coeff2_0 = 20,
- + .coeff2_1 = -100,
- + .coeff2_2 = 400,
- + .coeff2_3 = 0
- +};
- +
- +static void put_h264_qpel4_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- +
- + int32_t tmp_block[48];
- + int32_t *tmp = tmp_block;
- + int i;
- +
- + set_pico_config(&h264_qpel4_hv_lowpass_config);
- +
- + src -= 2;
- + for ( i = 0; i < 2; i++ ){
- + int srcB= LD32(src - 2*srcStride);
- + int srcA= LD32(src - 1*srcStride);
- + int src0= LD32(src + 0 *srcStride);
- + int src1= LD32(src + 1 *srcStride);
- + int src2= LD32(src + 2 *srcStride);
- + int src3= LD32(src + 3 *srcStride);
- + int src4= LD32(src + 4 *srcStride);
- + int src5= LD32(src + 5 *srcStride);
- + int src6= LD32(src + 6 *srcStride);
- +
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(0, 0, 0, 4, 8);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX0, src3);
- + PICO_OP(PICO_USE_ACC, 0, 0, 4, 8);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_OP(0, 0, 1, 5, 9);
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(PICO_USE_ACC, 0, 1, 5, 9);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_OP(0, 0, 4, 8, 0);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_MVRC_W(PICO_INPIX1, src3);
- + PICO_MVRC_W(PICO_INPIX0, src4);
- + PICO_OP(PICO_USE_ACC, 0, 0, 4, 8);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_OP(0, 0, 1, 5, 9);
- + PICO_MVRC_W(PICO_INPIX0, srcA);
- + PICO_MVRC_W(PICO_INPIX1, src0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_OP(PICO_USE_ACC, 0, 1, 5, 9);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_MVRC_W(PICO_INPIX0, src2);
- + PICO_OP(0, 0, 4, 8, 0);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_MVRC_W(PICO_INPIX1, src4);
- + PICO_MVRC_W(PICO_INPIX0, src5);
- + PICO_OP(PICO_USE_ACC, 0, 0, 4, 8);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_OP(0, 0, 1, 5, 9);
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_USE_ACC, 0, 1, 5, 9);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_MVRC_W(PICO_INPIX0, src3);
- + PICO_OP(0, 0, 4, 8, 0);
- + PICO_MVRC_W(PICO_INPIX2, src4);
- + PICO_MVRC_W(PICO_INPIX1, src5);
- + PICO_MVRC_W(PICO_INPIX0, src6);
- + PICO_OP(PICO_USE_ACC, 0, 0, 4, 8);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_OP(0, 0, 1, 5, 9);
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_OP(PICO_USE_ACC, 0, 1, 5, 9);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- + src += 2;
- + }
- +
- + src -= 1;
- + tmp -= 48;
- +
- +
- + PICO_PUT_W(PICO_CONFIG,
- + PICO_OUTPUT_MODE(PICO_PLANAR_MODE)
- + | PICO_INPUT_MODE(PICO_VERT_FILTER_MODE)
- + | PICO_COEFF_FRAC_BITS(10)
- + | PICO_OFFSET_FRAC_BITS(10));
- +
- + for ( i = 0; i < 2; i++ ){
- + int srcB= LD32(src - 2*srcStride);
- + int srcA= LD32(src - 1*srcStride);
- + int src0= LD32(src + 0 *srcStride);
- + int src1= LD32(src + 1 *srcStride);
- + int src2= LD32(src + 2 *srcStride);
- + int src3= LD32(src + 3 *srcStride);
- + int src4= LD32(src + 4 *srcStride);
- + int src5= LD32(src + 5 *srcStride);
- + int src6= LD32(src + 6 *srcStride);
- +
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(PICO_USE_ACC, 0, 6, 3, 0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX0, src3);
- + PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 0, 6, 3, 0);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_OP(PICO_USE_ACC, 1, 9, 6, 3);
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 1, 9, 6, 3);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_MVRC_W(PICO_INPIX0, srcA);
- + PICO_MVRC_W(PICO_INPIX1, src0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_OP(PICO_USE_ACC, 2, 6, 3, 0);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_MVRC_W(PICO_INPIX1, src3);
- + PICO_MVRC_W(PICO_INPIX0, src4);
- + PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 2, 6, 3, 0);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_OP(PICO_USE_ACC, 3, 9, 6, 3);
- + PICO_MVRC_W(PICO_INPIX0, srcA);
- + PICO_MVRC_W(PICO_INPIX1, src0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 3, 9, 6, 3);
- +
- + ST16(dst + 0*dstStride, (short)(PICO_GET_W(PICO_OUTPIX0) >> 16));
- + ST16(dst + 1*dstStride, (short)PICO_GET_W(PICO_OUTPIX0));
- +
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_USE_ACC, 0, 6, 3, 0);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_MVRC_W(PICO_INPIX1, src4);
- + PICO_MVRC_W(PICO_INPIX0, src5);
- + PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 0, 6, 3, 0);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_OP(PICO_USE_ACC, 1, 9, 6, 3);
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 1, 9, 6, 3);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_OP(PICO_USE_ACC, 2, 6, 3, 0);
- + PICO_MVRC_W(PICO_INPIX2, src4);
- + PICO_MVRC_W(PICO_INPIX1, src5);
- + PICO_MVRC_W(PICO_INPIX0, src6);
- + PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 2, 6, 3, 0);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_OP(PICO_USE_ACC, 3, 9, 6, 3);
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 3, 9, 6, 3);
- +
- + ST16(dst + 2*dstStride, (short)(PICO_GET_W(PICO_OUTPIX0) >> 16));
- + ST16(dst + 3*dstStride, (short)PICO_GET_W(PICO_OUTPIX0));
- +
- + dst += 2;
- + src += 2;
- + }
- +}
- +
- +
- +
- +
- +static void avg_h264_qpel4_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- +
- + int32_t tmp_block[48];
- + int32_t *tmp = tmp_block;
- + int i;
- +
- + set_pico_config(&h264_qpel4_hv_lowpass_config);
- +
- + src -= 2;
- + for ( i = 0; i < 2; i++ ){
- + int srcB= LD32(src - 2*srcStride);
- + int srcA= LD32(src - 1*srcStride);
- + int src0= LD32(src + 0 *srcStride);
- + int src1= LD32(src + 1 *srcStride);
- + int src2= LD32(src + 2 *srcStride);
- + int src3= LD32(src + 3 *srcStride);
- + int src4= LD32(src + 4 *srcStride);
- + int src5= LD32(src + 5 *srcStride);
- + int src6= LD32(src + 6 *srcStride);
- +
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(0, 0, 0, 4, 8);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX0, src3);
- + PICO_OP(PICO_USE_ACC, 0, 0, 4, 8);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_OP(0, 0, 1, 5, 9);
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(PICO_USE_ACC, 0, 1, 5, 9);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_OP(0, 0, 4, 8, 0);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_MVRC_W(PICO_INPIX1, src3);
- + PICO_MVRC_W(PICO_INPIX0, src4);
- + PICO_OP(PICO_USE_ACC, 0, 0, 4, 8);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_OP(0, 0, 1, 5, 9);
- + PICO_MVRC_W(PICO_INPIX0, srcA);
- + PICO_MVRC_W(PICO_INPIX1, src0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_OP(PICO_USE_ACC, 0, 1, 5, 9);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_MVRC_W(PICO_INPIX0, src2);
- + PICO_OP(0, 0, 4, 8, 0);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_MVRC_W(PICO_INPIX1, src4);
- + PICO_MVRC_W(PICO_INPIX0, src5);
- + PICO_OP(PICO_USE_ACC, 0, 0, 4, 8);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_OP(0, 0, 1, 5, 9);
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_USE_ACC, 0, 1, 5, 9);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_MVRC_W(PICO_INPIX0, src3);
- + PICO_OP(0, 0, 4, 8, 0);
- + PICO_MVRC_W(PICO_INPIX2, src4);
- + PICO_MVRC_W(PICO_INPIX1, src5);
- + PICO_MVRC_W(PICO_INPIX0, src6);
- + PICO_OP(PICO_USE_ACC, 0, 0, 4, 8);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- +
- + PICO_OP(0, 0, 1, 5, 9);
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_OP(PICO_USE_ACC, 0, 1, 5, 9);
- + PICO_STCM_W(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + tmp += 3;
- + src += 2;
- + }
- +
- + src -= 1;
- + tmp -= 48;
- +
- +
- + PICO_PUT_W(PICO_CONFIG,
- + PICO_OUTPUT_MODE(PICO_PLANAR_MODE)
- + | PICO_INPUT_MODE(PICO_VERT_FILTER_MODE)
- + | PICO_COEFF_FRAC_BITS(10)
- + | PICO_OFFSET_FRAC_BITS(10));
- +
- + for ( i = 0; i < 2; i++ ){
- + int srcB= LD32(src - 2*srcStride);
- + int srcA= LD32(src - 1*srcStride);
- + int src0= LD32(src + 0 *srcStride);
- + int src1= LD32(src + 1 *srcStride);
- + int src2= LD32(src + 2 *srcStride);
- + int src3= LD32(src + 3 *srcStride);
- + int src4= LD32(src + 4 *srcStride);
- + int src5= LD32(src + 5 *srcStride);
- + int src6= LD32(src + 6 *srcStride);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(PICO_USE_ACC, 0, 6, 3, 0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX0, src3);
- + PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 0, 6, 3, 0);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_OP(PICO_USE_ACC, 1, 9, 6, 3);
- + PICO_MVRC_W(PICO_INPIX0, srcB);
- + PICO_MVRC_W(PICO_INPIX1, srcA);
- + PICO_MVRC_W(PICO_INPIX2, src0);
- + PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 1, 9, 6, 3);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_MVRC_W(PICO_INPIX0, srcA);
- + PICO_MVRC_W(PICO_INPIX1, src0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_OP(PICO_USE_ACC, 2, 6, 3, 0);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_MVRC_W(PICO_INPIX1, src3);
- + PICO_MVRC_W(PICO_INPIX0, src4);
- + PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 2, 6, 3, 0);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_OP(PICO_USE_ACC, 3, 9, 6, 3);
- + PICO_MVRC_W(PICO_INPIX0, srcA);
- + PICO_MVRC_W(PICO_INPIX1, src0);
- + PICO_MVRC_W(PICO_INPIX2, src1);
- + PICO_OP(PICO_USE_ACC | PICO_SINGLE_VECTOR, 3, 9, 6, 3);
- +
- + ST16(dst + 0*dstStride, rnd_avg32(LD16(dst + 0*dstStride), PICO_GET_W(PICO_OUTPIX0) >> 16));
- + ST16(dst + 1*dstStride, rnd_avg32(LD16(dst + 1*dstStride), PICO_GET_W(PICO_OUTPIX0)));
- +
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_USE_ACC, 0, 6, 3, 0);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_MVRC_W(PICO_INPIX1, src4);
- + PICO_MVRC_W(PICO_INPIX0, src5);
- + PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 0, 6, 3, 0);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_OP(PICO_USE_ACC, 1, 9, 6, 3);
- + PICO_MVRC_W(PICO_INPIX0, src0);
- + PICO_MVRC_W(PICO_INPIX1, src1);
- + PICO_MVRC_W(PICO_INPIX2, src2);
- + PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 1, 9, 6, 3);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_OP(PICO_USE_ACC, 2, 6, 3, 0);
- + PICO_MVRC_W(PICO_INPIX2, src4);
- + PICO_MVRC_W(PICO_INPIX1, src5);
- + PICO_MVRC_W(PICO_INPIX0, src6);
- + PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 2, 6, 3, 0);
- +
- + PICO_LDCM_W_INC(tmp,
- + PICO_REGVECT_VMU0_OUT,
- + PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT);
- + PICO_OP(PICO_USE_ACC, 3, 9, 6, 3);
- + PICO_MVRC_W(PICO_INPIX0, src1);
- + PICO_MVRC_W(PICO_INPIX1, src2);
- + PICO_MVRC_W(PICO_INPIX2, src3);
- + PICO_OP(PICO_USE_ACC| PICO_SINGLE_VECTOR, 3, 9, 6, 3);
- +
- + ST16(dst + 2*dstStride, rnd_avg32(LD16(dst + 2*dstStride), PICO_GET_W(PICO_OUTPIX0) >> 16));
- + ST16(dst + 3*dstStride, rnd_avg32(LD16(dst + 3*dstStride), PICO_GET_W(PICO_OUTPIX0)));
- +
- + dst += 2;
- + src += 2;
- + }
- +}
- +
- +
- +static void put_h264_qpel8_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + put_h264_qpel4_v_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel4_v_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- + src += 4*srcStride;
- + dst += 4*dstStride;
- + put_h264_qpel4_v_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel4_v_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- +}
- +
- +static void avg_h264_qpel8_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + avg_h264_qpel4_v_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel4_v_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- + src += 4*srcStride;
- + dst += 4*dstStride;
- + avg_h264_qpel4_v_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel4_v_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- +}
- +
- +static void put_h264_qpel8_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + put_h264_qpel4_h_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel4_h_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- + src += 4*srcStride;
- + dst += 4*dstStride;
- + put_h264_qpel4_h_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel4_h_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- +}
- +
- +static void avg_h264_qpel8_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + avg_h264_qpel4_h_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel4_h_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- + src += 4*srcStride;
- + dst += 4*dstStride;
- + avg_h264_qpel4_h_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel4_h_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- +}
- +
- +static void put_h264_qpel8_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + put_h264_qpel4_hv_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel4_hv_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- + src += 4*srcStride;
- + dst += 4*dstStride;
- + put_h264_qpel4_hv_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel4_hv_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- +}
- +
- +static void avg_h264_qpel8_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + avg_h264_qpel4_hv_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel4_hv_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- + src += 4*srcStride;
- + dst += 4*dstStride;
- + avg_h264_qpel4_hv_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel4_hv_lowpass_pico(dst+4, src+4, dstStride, srcStride);
- +}
- +
- +static void put_h264_qpel16_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + put_h264_qpel8_v_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel8_v_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- + src += 8*srcStride;
- + dst += 8*dstStride;
- + put_h264_qpel8_v_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel8_v_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- +}
- +
- +static void avg_h264_qpel16_v_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + avg_h264_qpel8_v_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel8_v_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- + src += 8*srcStride;
- + dst += 8*dstStride;
- + avg_h264_qpel8_v_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel8_v_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- +}
- +
- +static void put_h264_qpel16_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + put_h264_qpel8_h_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel8_h_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- + src += 8*srcStride;
- + dst += 8*dstStride;
- + put_h264_qpel8_h_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel8_h_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- +}
- +
- +static void avg_h264_qpel16_h_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + avg_h264_qpel8_h_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel8_h_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- + src += 8*srcStride;
- + dst += 8*dstStride;
- + avg_h264_qpel8_h_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel8_h_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- +}
- +
- +static void put_h264_qpel16_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + put_h264_qpel8_hv_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel8_hv_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- + src += 8*srcStride;
- + dst += 8*dstStride;
- + put_h264_qpel8_hv_lowpass_pico(dst , src , dstStride, srcStride);
- + put_h264_qpel8_hv_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- +}
- +
- +static void avg_h264_qpel16_hv_lowpass_pico(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){
- + avg_h264_qpel8_hv_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel8_hv_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- + src += 8*srcStride;
- + dst += 8*dstStride;
- + avg_h264_qpel8_hv_lowpass_pico(dst , src , dstStride, srcStride);
- + avg_h264_qpel8_hv_lowpass_pico(dst+8, src+8, dstStride, srcStride);
- +}
- +
- +
- +#define H264_MC(OPNAME, SIZE) \
- +static void OPNAME ## h264_qpel ## SIZE ## _mc00_pico (uint8_t *dst, uint8_t *src, int stride){\
- + OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc10_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t half[SIZE*SIZE];\
- + put_h264_qpel ## SIZE ## _h_lowpass_pico(half, src, SIZE, stride);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc20_pico(uint8_t *dst, uint8_t *src, int stride){\
- + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_pico(dst, src, stride, stride);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc30_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t half[SIZE*SIZE];\
- + put_h264_qpel ## SIZE ## _h_lowpass_pico(half, src, SIZE, stride);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc01_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t full[SIZE*(SIZE+5)];\
- + uint8_t * const full_mid= full + SIZE*2;\
- + uint8_t half[SIZE*SIZE];\
- + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
- + put_h264_qpel ## SIZE ## _v_lowpass_pico(half, full_mid, SIZE, SIZE);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc02_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t full[SIZE*(SIZE+5)];\
- + uint8_t * const full_mid= full + SIZE*2;\
- + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
- + OPNAME ## h264_qpel ## SIZE ## _v_lowpass_pico(dst, full_mid, stride, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc03_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t full[SIZE*(SIZE+5)];\
- + uint8_t * const full_mid= full + SIZE*2;\
- + uint8_t half[SIZE*SIZE];\
- + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
- + put_h264_qpel ## SIZE ## _v_lowpass_pico(half, full_mid, SIZE, SIZE);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc11_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t full[SIZE*(SIZE+5)];\
- + uint8_t * const full_mid= full + SIZE*2;\
- + uint8_t halfH[SIZE*SIZE];\
- + uint8_t halfV[SIZE*SIZE];\
- + put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src, SIZE, stride);\
- + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
- + put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc31_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t full[SIZE*(SIZE+5)];\
- + uint8_t * const full_mid= full + SIZE*2;\
- + uint8_t halfH[SIZE*SIZE];\
- + uint8_t halfV[SIZE*SIZE];\
- + put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src, SIZE, stride);\
- + copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
- + put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc13_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t full[SIZE*(SIZE+5)];\
- + uint8_t * const full_mid= full + SIZE*2;\
- + uint8_t halfH[SIZE*SIZE];\
- + uint8_t halfV[SIZE*SIZE];\
- + put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src + stride, SIZE, stride);\
- + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
- + put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc33_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t full[SIZE*(SIZE+5)];\
- + uint8_t * const full_mid= full + SIZE*2;\
- + uint8_t halfH[SIZE*SIZE];\
- + uint8_t halfV[SIZE*SIZE];\
- + put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src + stride, SIZE, stride);\
- + copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
- + put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc22_pico(uint8_t *dst, uint8_t *src, int stride){\
- + OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_pico(dst, src, stride, stride);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc21_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t halfH[SIZE*SIZE];\
- + uint8_t halfHV[SIZE*SIZE];\
- + put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src, SIZE, stride);\
- + put_h264_qpel ## SIZE ## _hv_lowpass_pico(halfHV, src, SIZE, stride);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc23_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t halfH[SIZE*SIZE];\
- + uint8_t halfHV[SIZE*SIZE];\
- + put_h264_qpel ## SIZE ## _h_lowpass_pico(halfH, src + stride, SIZE, stride);\
- + put_h264_qpel ## SIZE ## _hv_lowpass_pico(halfHV, src, SIZE, stride);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc12_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t full[SIZE*(SIZE+5)];\
- + uint8_t * const full_mid= full + SIZE*2;\
- + uint8_t halfV[SIZE*SIZE];\
- + uint8_t halfHV[SIZE*SIZE];\
- + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
- + put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\
- + put_h264_qpel ## SIZE ## _hv_lowpass_pico(halfHV, src, SIZE, stride);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
- +}\
- +\
- +static void OPNAME ## h264_qpel ## SIZE ## _mc32_pico(uint8_t *dst, uint8_t *src, int stride){\
- + uint8_t full[SIZE*(SIZE+5)];\
- + uint8_t * const full_mid= full + SIZE*2;\
- + uint8_t halfV[SIZE*SIZE];\
- + uint8_t halfHV[SIZE*SIZE];\
- + copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
- + put_h264_qpel ## SIZE ## _v_lowpass_pico(halfV, full_mid, SIZE, SIZE);\
- + put_h264_qpel ## SIZE ## _hv_lowpass_pico(halfHV, src, SIZE, stride);\
- + OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
- +}\
- +
- +H264_MC(put_, 4)
- +H264_MC(put_, 8)
- +H264_MC(put_, 16)
- +H264_MC(avg_, 4)
- +H264_MC(avg_, 8)
- +H264_MC(avg_, 16)
- +
- +
- +
- +#define dspfunc16(PFX) \
- + void PFX ## _pixels16_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ){ \
- + PFX ## _pixels8_avr32(dst, pixels, line_size, h);\
- + PFX ## _pixels8_avr32(dst + 8, pixels + 8, line_size, h);\
- + }\
- + void PFX ## _pixels16_h_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ){ \
- + PFX ## _pixels8_h_avr32(dst, pixels, line_size, h);\
- + PFX ## _pixels8_h_avr32(dst + 8, pixels + 8, line_size, h);\
- + }\
- + void PFX ## _pixels16_v_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ){ \
- + PFX ## _pixels8_v_avr32(dst, pixels, line_size, h);\
- + PFX ## _pixels8_v_avr32(dst + 8, pixels + 8, line_size, h);\
- + }\
- + void PFX ## _pixels16_hv_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ){ \
- + PFX ## _pixels8_hv_avr32(dst, pixels, line_size, h);\
- + PFX ## _pixels8_hv_avr32(dst + 8, pixels + 8, line_size, h);\
- + }\
- +
- +
- +dspfunc16(put)
- +dspfunc16(put_no_rnd)
- +dspfunc16(avg)
- +dspfunc16(avg_no_rnd)
- +#undef dspfunc16
- +
- +static int pix_sum_avr32(uint8_t * pix, int line_size)
- +{
- + int s, i;
- +
- + s = 0;
- + for (i = 0; i < 16; i++) {
- + int tmp1,tmp2,tmp3,tmp4,tmp5;
- + __asm__ volatile ( "ld.w\t%0, %6[0]\n\t"
- + "ld.w\t%1, %6[4]\n\t"
- + "ld.w\t%2, %6[8]\n\t"
- + "ld.w\t%3, %6[12]\n\t"
- + "punpckub.h\t%4, %0:t\n\t"
- + "padd.h\t%5, %5, %4\n\t"
- + "punpckub.h\t%4, %0:b\n\t"
- + "padd.h\t%5, %5, %4\n\t"
- + "punpckub.h\t%4, %1:t\n\t"
- + "padd.h\t%5, %5, %4\n\t"
- + "punpckub.h\t%4, %1:b\n\t"
- + "padd.h\t%5, %5, %4\n\t"
- + "punpckub.h\t%4, %2:t\n\t"
- + "padd.h\t%5, %5, %4\n\t"
- + "punpckub.h\t%4, %2:b\n\t"
- + "padd.h\t%5, %5, %4\n\t"
- + "punpckub.h\t%4, %3:t\n\t"
- + "padd.h\t%5, %5, %4\n\t"
- + "punpckub.h\t%4, %3:b\n\t"
- + "padd.h\t%5, %5, %4\n\t"
- + : "=&r"(tmp1),"=&r"(tmp2),"=&r"(tmp3),"=&r"(tmp4),"=&r"(tmp5),"=&r"(s)
- + : "r"(pix));
- + pix += line_size;
- + }
- + __asm__ volatile ( "addhh.w\t%0, %0:t, %0:b" : "=&r" (s) );
- +
- + return s;
- +}
- +
- +
- +//#define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom )
- +//#define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
- +//#define H264_WEIGHT(W,H) \
- +//static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
- +// int attribute_unused x, y; \
- +// offset <<= log2_denom; \
- +// if(log2_denom) offset += 1<<(log2_denom-1); \
- +// for(y=0; y<H; y++, block += stride){ \
- +// uint32_t tmp0, tmp1;
- +// if(W==2) { \
- +// asm volatile ( "ld.ub\t%[tmp0], %[block][0]\n" \
- +// "ld.ub\t%[tmp1], %[block][1]\n" \
- +// "mulhh.w\t%[tmp0], %[tmp0]:b, %[weight]:b\n" \
- +// "mulhh.w\t%[tmp1], %[tmp1]:b, %[weight]:b\n" \
- +// "asr\t%[tmp0], %[log2_denom]\n" \
- +// "asr\t%[tmp1], %[log2_denom]\n" \
- +// "satu\t%[tmp0] >> 0, 8\n" \
- +// "satu\t%[tmp1] >> 0, 8\n" \
- +// "st.b\t%[block][0], %[tmp0]\n" \
- +// "st.b\t%[block][1], %[tmp1]\n" \
- +// : [tmp0] "=&r"(tmp0), [tmp1] "=&r"(tmp1) \
- +// : [block] "r"(block), [weight]"r"(weight), [log2_denom]"r"(log2denom) ); \
- +// } else if ( W==4 ) { \
- +// asm volatile ( "ld.w\t%[tmp0], %[block][0]\n" \
- +// "punpckub.h\t%[tmp1], %[tmp0]:t\n" \
- +// "punpckub.h\t%[tmp0], %[tmp0]:b\n" \
- +// "mulhh.w\t%[tmp2], %[tmp1]:t, %[weight]:b\n" \
- +// "mulhh.w\t%[tmp1], %[tmp1]:b, %[weight]:b\n" \
- +// "asr\t%[tmp0], %[log2_denom]\n" \
- +// "asr\t%[tmp1], %[log2_denom]\n" \
- +// "satu\t%[tmp0] >> 0, 8\n" \
- +// "satu\t%[tmp1] >> 0, 8\n" \
- +// "st.b\t%[block][0], %[tmp0]\n" \
- +// "st.b\t%[block][1], %[tmp1]\n" \
- +// : [tmp0] "=&r"(tmp0), [tmp1] "=&r"(tmp1) \
- +// : [block] "r"(block), [weight]"r"(weight), [log2_denom]"r"(log2denom) ); \
- +//
- +//
- +//
- +// if(W==4) continue; \
- +// op_scale1(4); \
- +// op_scale1(5); \
- +// op_scale1(6); \
- +// op_scale1(7); \
- +// if(W==8) continue; \
- +// op_scale1(8); \
- +// op_scale1(9); \
- +// op_scale1(10); \
- +// op_scale1(11); \
- +// op_scale1(12); \
- +// op_scale1(13); \
- +// op_scale1(14); \
- +// op_scale1(15); \
- +// } \
- +//} \
- +//static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \
- +// int attribute_unused x, y; \
- +// int offset = (offsets + offsetd + 1) >> 1; \
- +// offset = ((offset << 1) + 1) << log2_denom; \
- +// for(y=0; y<H; y++, dst += stride, src += stride){ \
- +// op_scale2(0); \
- +// op_scale2(1); \
- +// if(W==2) continue; \
- +// op_scale2(2); \
- +// op_scale2(3); \
- +// if(W==4) continue; \
- +// op_scale2(4); \
- +// op_scale2(5); \
- +// op_scale2(6); \
- +// op_scale2(7); \
- +// if(W==8) continue; \
- +// op_scale2(8); \
- +// op_scale2(9); \
- +// op_scale2(10); \
- +// op_scale2(11); \
- +// op_scale2(12); \
- +// op_scale2(13); \
- +// op_scale2(14); \
- +// op_scale2(15); \
- +// } \
- +//}
- +
- +
- +
- +/* Returns zero in each byte where the absolute difference between <a> and <b>
- + is not less than <compare> */
- +#define PABS_DIFF_LESS_THAN( a, b, compare) \
- + ({ uint32_t __tmp__, __tmp2__, __mask__; \
- + asm ( \
- + /* Check ABS( a - b ) < compare */ \
- + "psubs.ub\t%[tmp], %[opa], %[opb]\n" \
- + "psubs.ub\t%[tmp2], %[opb], %[opa]\n" \
- + "or\t%[tmp], %[tmp2]\n" /* ABS ( a - b ) */ \
- + /* This produces 0 for all bytes where the comparison is not true */ \
- + "psubs.ub\t%[mask], %[cmp], %[tmp]\n" \
- + : [tmp] "=&r"(__tmp__), [tmp2] "=&r"(__tmp2__), [mask] "=&r"(__mask__) \
- + : [opa] "r"(a), [opb] "r"(b), [cmp] "r"(compare) ); \
- + __mask__; })
- +
- +/*
- + Set all bytes containing zero in <value> to 255 and the rest to zero.
- +
- + Add with saturation 254 to all bytes making all bytes different from
- + zero become 255. Then add one without saturation to make all bytes
- + originally containing zero 255 and the rest 0. */
- +#define SET_ALL_BITS_IN_ZERO_BYTES(value) \
- + ({ uint32_t __tmp__; \
- + asm ( \
- + "padds.ub\t%[tmp], %[val], %[max_minus_one]\n" \
- + "padd.b\t%[tmp], %[tmp], %[all_ones]\n" \
- + : [tmp] "=r"(__tmp__) \
- + : [val] "r"(value), [max_minus_one] "r"(0xFEFEFEFE), [all_ones] "r"(0x01010101) ); \
- + __tmp__; })
- +
- +#define PACKW_SH(upper, lower) \
- + ({ uint32_t __tmp__; \
- + asm ( \
- + "packw.sh\t%[tmp], %[u], %[l]\n" \
- + : [tmp] "=r"(__tmp__) \
- + : [u] "r"(upper), [l] "r"(lower) ); \
- + __tmp__; })
- +
- +#define PACKSH_UB(upper, lower) \
- + ({ uint32_t __tmp__; \
- + asm ( \
- + "packsh.sb\t%[tmp], %[u], %[l]\n" \
- + : [tmp] "=r"(__tmp__) \
- + : [u] "r"(upper), [l] "r"(lower) ); \
- + __tmp__; })
- +
- +static void h264_v_loop_filter_luma_avr32(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
- +{
- + int i;
- +
- + if ( alpha == 0 )
- + return;
- +
- + alpha = PACKW_SH(alpha, alpha);
- + alpha = PACKSH_UB(alpha, alpha);
- + beta = PACKW_SH(beta, beta);
- + beta = PACKSH_UB(beta, beta);
- +
- + for( i = 0; i < 4; i++ ) {
- + uint32_t p0, p1, p2, q0, q1, q2;
- + uint32_t mask, mask2;
- + uint32_t tmp, tmp2, tmp3, tmp4;
- +
- + if( tc0[i] < 0 ) {
- + pix += 4;
- + continue;
- + }
- +
- +/* for( d = 0; d < 4; d++ ) {
- + const int p0 = pix[-1*stride];
- + const int p1 = pix[-2*stride];
- + const int p2 = pix[-3*stride];
- + const int q0 = pix[0];
- + const int q1 = pix[1*stride];
- + const int q2 = pix[2*stride];
- +
- + if( ABS( p0 - q0 ) < alpha &&
- + ABS( p1 - p0 ) < beta &&
- + ABS( q1 - q0 ) < beta ) { */
- +
- + p0 = LD32(pix - stride);
- + p1 = LD32(pix - 2*stride);
- + q0 = LD32(pix);
- + q1 = LD32(pix + stride);
- +
- + /* Check which of the columns should be filtered, if any. */
- + mask = PABS_DIFF_LESS_THAN(p0, q0, alpha);
- + mask |= PABS_DIFF_LESS_THAN(p1, p0, beta);
- + mask |= PABS_DIFF_LESS_THAN(q1, q0, beta);
- +
- + if ( !mask )
- + continue;
- +
- + mask = SET_ALL_BITS_IN_ZERO_BYTES(mask);
- +
- +
- + int tc = PACKW_SH(tc0[i], tc0[i]);
- + int tc0_p = tc;
- + int tc0_m = PACKW_SH(-tc0[i], -tc0[i]);
- +
- + /*
- + int i_delta;
- + if( ABS( p2 - p0 ) < beta ) {
- + pix[-2*stride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
- + tc++;
- + }*/
- +
- + p2 = LD32(pix - 3*stride);
- + mask2 = PABS_DIFF_LESS_THAN(p2, p0, beta) & ~mask;
- +
- + if ( mask2 ){
- + mask2 = SET_ALL_BITS_IN_ZERO_BYTES(mask2);
- + asm ("pavg.ub\t%[tmp], %[p0], %[q0]\n"
- + "paddh.ub\t%[tmp], %[tmp], %[p2]\n"
- + "punpckub.h\t%[tmp2], %[tmp]:t\n"
- + "punpckub.h\t%[tmp], %[tmp]:b\n"
- + "punpckub.h\t%[tmp3], %[p1]:t\n"
- + "punpckub.h\t%[tmp4], %[p1]:b\n"
- + "psub.h\t%[tmp2], %[tmp2], %[tmp3]\n"
- + "psub.h\t%[tmp], %[tmp], %[tmp4]\n"
- + "pmin.sh\t%[tmp2], %[tmp2], %[tc0_p]\n"
- + "pmin.sh\t%[tmp], %[tmp], %[tc0_p]\n"
- + "pmax.sh\t%[tmp2], %[tmp2], %[tc0_m]\n"
- + "pmax.sh\t%[tmp], %[tmp], %[tc0_m]\n"
- + "padd.h\t%[tmp2], %[tmp2], %[tmp3]\n"
- + "padd.h\t%[tmp], %[tmp], %[tmp4]\n"
- + "packsh.ub\t%[tmp], %[tmp2], %[tmp]\n"
- + "andn\t%[tmp], %[mask2]\n"
- + "and\t%[tmp2], %[q1], %[mask2]\n"
- + "or\t%[tmp], %[tmp2]\n"
- + : [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),
- + [tmp4]"=&r"(tmp4)
- + : [q0]"r"(q0), [p2]"r"(p2), [p1]"r"(p1), [p0]"r"(p0), [q1]"r"(q1), [tc0_p]"r"(tc0_p),
- + [tc0_m]"r"(tc0_m), [mask2]"r"(mask2));
- + ST32(pix - 2*stride, tmp);
- + tc += 0x00010001;
- + }
- +
- +
- + q2 = LD32(pix + 2*stride);
- +
- + /*
- + if( ABS( q2 - q0 ) < beta ) {
- + pix[ stride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
- + tc++;
- + }
- + */
- + mask2 = PABS_DIFF_LESS_THAN(q2, q0, beta) & ~mask;
- +
- + if ( mask2 ){
- + mask2 = SET_ALL_BITS_IN_ZERO_BYTES(mask2);
- + asm ("pavg.ub\t%[tmp], %[p0], %[q0]\n"
- + "paddh.ub\t%[tmp], %[tmp], %[q2]\n"
- + "punpckub.h\t%[tmp2], %[tmp]:t\n"
- + "punpckub.h\t%[tmp], %[tmp]:b\n"
- + "punpckub.h\t%[tmp3], %[q1]:t\n"
- + "punpckub.h\t%[tmp4], %[q1]:b\n"
- + "psub.h\t%[tmp2], %[tmp2], %[tmp3]\n"
- + "psub.h\t%[tmp], %[tmp], %[tmp4]\n"
- + "pmin.sh\t%[tmp2], %[tmp2], %[tc0_p]\n"
- + "pmin.sh\t%[tmp], %[tmp], %[tc0_p]\n"
- + "pmax.sh\t%[tmp2], %[tmp2], %[tc0_m]\n"
- + "pmax.sh\t%[tmp], %[tmp], %[tc0_m]\n"
- + "padd.h\t%[tmp2], %[tmp2], %[tmp3]\n"
- + "padd.h\t%[tmp], %[tmp], %[tmp4]\n"
- + "packsh.ub\t%[tmp], %[tmp2], %[tmp]\n"
- + "andn\t%[tmp], %[mask2]\n"
- + "and\t%[tmp2], %[q1], %[mask2]\n"
- + "or\t%[tmp], %[tmp2]\n"
- + : [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),
- + [tmp4]"=&r"(tmp4)
- + : [q0]"r"(q0), [q2]"r"(q2), [q1]"r"(q1), [p0]"r"(p0), [tc0_p]"r"(tc0_p),
- + [tc0_m]"r"(tc0_m), [mask2]"r"(mask2));
- + ST32(pix + stride, tmp);
- + tc += 0x00010001;
- + }
- +
- + uint32_t old_p0 = p0;
- + uint32_t old_q0 = q0;
- +
- + /* i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
- + pix[-stride] = clip_uint8( p0 + i_delta );
- + pix[0] = clip_uint8( q0 - i_delta ); */
- +
- + asm (
- + /* Check if the two upper pixels should be filtered */
- + "lsr\t%[tmp], %[inv_mask], 16\n"
- + "breq\t0f\n"
- +
- + "punpckub.h\t%[tmp], %[p1]:t\n"
- + "punpckub.h\t%[tmp2], %[q1]:t\n"
- +
- + /* p1 - q1 */
- + "psub.h\t%[tmp], %[tmp], %[tmp2]\n"
- +
- + "punpckub.h\t%[tmp3], %[q0]:t\n"
- + "punpckub.h\t%[tmp4], %[p0]:t\n"
- +
- + /* q0 - p0 */
- + "psub.h\t%[tmp2], %[tmp3], %[tmp4]\n"
- +
- + /* (q0 - p0) << 2 */
- + "plsl.h\t%[tmp2], %[tmp2], 2\n"
- +
- + /* ((q0 - p0) << 2) + (p1 - q1) */
- + "padd.h\t%[tmp2], %[tmp2], %[tmp]\n"
- +
- + "mov\t%[tmp], 0x00040004\n"
- + /* ((q0 - p0) << 2) + (p1 - q1) + 4*/
- + "padd.h\t%[tmp2], %[tmp2], %[tmp]\n"
- +
- + /* (((q0 - p0) << 2) + (p1 - q1) + 4) >> 3*/
- + "pasr.h\t%[tmp2], %[tmp2], 3\n"
- +
- + "mov\t%[tmp], 0\n"
- + "psub.h\t%[tmp], %[tmp], %[tc]\n"
- +
- + /* i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); */
- + "pmin.sh\t%[tmp2], %[tmp2], %[tc]\n"
- + "pmax.sh\t%[tmp2], %[tmp2], %[tmp]\n"
- +
- +
- + /* pix[-stride] = clip_uint8( p0 + i_delta ); */
- + "padd.h\t%[tmp4], %[tmp4], %[tmp2]\n"
- +
- +
- + /* pix[0] = clip_uint8( q0 - i_delta ); */
- + "psub.h\t%[tmp3], %[tmp3], %[tmp2]\n"
- +
- + /* Check if the two lower pixels should be filtered */
- + "lsl\t%[tmp2], %[inv_mask], 16\n"
- + "breq\t1f\n"
- +
- + "0:\n"
- + "punpckub.h\t%[p1], %[p1]:b\n"
- + "punpckub.h\t%[q1], %[q1]:b\n"
- +
- + /* p1 - q1 */
- + "psub.h\t%[p1], %[p1], %[q1]\n"
- +
- + "punpckub.h\t%[q0], %[q0]:b\n"
- + "punpckub.h\t%[p0], %[p0]:b\n"
- +
- + /* q0 - p0 */
- + "psub.h\t%[tmp2], %[q0], %[p0]\n"
- +
- + /* (q0 - p0) << 2 */
- + "plsl.h\t%[tmp2], %[tmp2], 2\n"
- +
- + /* ((q0 - p0) << 2) + (p1 - q1) */
- + "padd.h\t%[tmp2], %[tmp2], %[p1]\n"
- +
- + "mov\t%[q1], 0x00040004\n"
- + /* ((q0 - p0) << 2) + (p1 - q1) + 4*/
- + "padd.h\t%[tmp2], %[tmp2], %[q1]\n"
- +
- + /* (((q0 - p0) << 2) + (p1 - q1) + 4) >> 3*/
- + "pasr.h\t%[tmp2], %[tmp2], 3\n"
- +
- + /* i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); */
- + "pmin.sh\t%[tmp2], %[tmp2], %[tc]\n"
- + "pmax.sh\t%[tmp2], %[tmp2], %[tmp]\n"
- +
- + /* pix[-stride] = clip_uint8( p0 + i_delta ); */
- + "padd.h\t%[p0], %[p0], %[tmp2]\n"
- +
- + /* pix[0] = clip_uint8( q0 - i_delta ); */
- + "psub.h\t%[q0], %[q0], %[tmp2]\n"
- +
- + "1:\n"
- + "packsh.ub\t%[p0], %[tmp4], %[p0]\n"
- + "packsh.ub\t%[q0], %[tmp3], %[tmp4]\n"
- +
- + : [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),
- + [tmp4]"=&r"(tmp4), [q0]"=&r"(q0), [q1]"=&r"(q1), [p0]"=&r"(p0), [p1]"=&r"(p1)
- + : [tc]"r"(tc), [inv_mask]"r"(~mask));
- +
- + ST32(pix - stride, (mask & old_p0) | (p0 & ~mask));
- + ST32(pix, (mask & old_q0) | (q0 & ~mask));
- +
- + }
- + pix += 1;
- +}
- +
- +
- +
- +
- +#ifdef CHECK_DSP_FUNCS_AGAINST_C
- +
- +void dump_block8(uint8_t *block, int line_size, int h){
- + int i, j;
- +
- + for ( i = 0; i < h ; i++ ){
- + av_log(NULL, AV_LOG_ERROR, "\t");
- + for ( j = 0; j < 8 ; j++ ){
- + av_log(NULL, AV_LOG_ERROR, "%d ", block[j + i*line_size]);
- + }
- + av_log(NULL, AV_LOG_ERROR, "\n");
- + }
- +}
- +
- +void dump_block4(uint8_t *block, int line_size, int h){
- + int i, j;
- +
- + for ( i = 0; i < h ; i++ ){
- + av_log(NULL, AV_LOG_ERROR, "\t");
- + for ( j = 0; j < 4 ; j++ ){
- + av_log(NULL, AV_LOG_ERROR, "%d ", block[j + i*line_size]);
- + }
- + av_log(NULL, AV_LOG_ERROR, "\n");
- + }
- +}
- +
- +void dump_block(uint8_t *block, int line_size, int h, int w){
- + int i, j;
- +
- + for ( i = 0; i < h ; i++ ){
- + av_log(NULL, AV_LOG_ERROR, "\t");
- + for ( j = 0; j < w ; j++ ){
- + av_log(NULL, AV_LOG_ERROR, "%d ", block[j + i*line_size]);
- + }
- + av_log(NULL, AV_LOG_ERROR, "\n");
- + }
- +}
- +
- +void check_block8(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct,
- + int h, char *name, int max_dev){
- + int i,j;
- + for ( i = 0; i < 8 ; i++ ){
- + for ( j = 0; j < h ; j++ ){
- + int diff = test[i + line_size_test*j] - correct[i + line_size_correct*j];
- + diff = diff < 0 ? -diff : diff;
- + if ( diff > max_dev ){
- + av_log(NULL, AV_LOG_ERROR, "Error pixel x=%i, y=%i differs. Is 0x%x should be 0x%x\n",
- + i, j, test[i + line_size_test*j], correct[i + j*line_size_correct]);
- + av_log(NULL, AV_LOG_ERROR, "Error resulting block from %s is:\n", name);
- + dump_block8(test, line_size_test, h);
- + av_log(NULL, AV_LOG_ERROR, "But should be equal to:\n");
- + dump_block8(correct, line_size_correct, h);
- + exit(1);
- + }
- + }
- + }
- +}
- +
- +void check_block4(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct,
- + int h, char *name, int max_dev){
- + int i,j;
- + for ( i = 0; i < 4 ; i++ ){
- + for ( j = 0; j < h ; j++ ){
- + int diff = test[i + line_size_test*j] - correct[i + line_size_correct*j];
- + diff = diff < 0 ? -diff : diff;
- + if ( diff > max_dev ){
- + av_log(NULL, AV_LOG_ERROR, "Error pixel x=%i, y=%i differs. Is 0x%x should be 0x%x\n",
- + i, j, test[i + line_size_test*j], correct[i + j*line_size_correct]);
- + av_log(NULL, AV_LOG_ERROR, "Error resulting block from %s is:\n", name);
- + dump_block8(test, line_size_test, h);
- + av_log(NULL, AV_LOG_ERROR, "But should be equal to:\n");
- + dump_block4(correct, line_size_correct, h);
- + exit(1);
- + }
- + }
- + }
- +}
- +
- +void check_block(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct,
- + int h, int width, char *name, int max_dev){
- + int i,j;
- + for ( i = 0; i < width ; i++ ){
- + for ( j = 0; j < h ; j++ ){
- + int diff = test[i + line_size_test*j] - correct[i + line_size_correct*j];
- + diff = diff < 0 ? -diff : diff;
- + if ( diff > max_dev ){
- + av_log(NULL, AV_LOG_ERROR, "Error pixel x=%i, y=%i differs. Is 0x%x should be 0x%x\n",
- + i, j, test[i + line_size_test*j], correct[i + j*line_size_correct]);
- + av_log(NULL, AV_LOG_ERROR, "Error resulting block from %s is:\n", name);
- + dump_block(test, line_size_test, h, width);
- + av_log(NULL, AV_LOG_ERROR, "But should be equal to:\n");
- + dump_block(correct, line_size_correct, h, width);
- + exit(1);
- + }
- + }
- + }
- +}
- +
- +void dump_dct_block(DCTELEM *block){
- + int i, j;
- +
- + for ( i = 0; i < 8 ; i++ ){
- + av_log(NULL, AV_LOG_ERROR, "\t");
- + for ( j = 0; j < 8 ; j++ ){
- + av_log(NULL, AV_LOG_ERROR, "0x%x ", block[j + i*8]);
- + }
- + av_log(NULL, AV_LOG_ERROR, "\n");
- + }
- +}
- +
- +void test_idct_avr32(DCTELEM *block){
- + DCTELEM testBlock[64];
- + int i, j;
- +
- + /* Copy transposed block to testBlock */
- + for ( i = 0; i < 8 ; i++ ){
- + for ( j = 0; j < 8 ; j++ ){
- + testBlock[i + 8*j] = block[j + i*8];
- + }
- + }
- +
- + idct_avr32(block);
- + simple_idct(&testBlock);
- +
- + for ( i = 0; i < 64 ; i++ ){
- + if ( block[i] != testBlock[i] ){
- + av_log(NULL, AV_LOG_ERROR, "Error resulting block from idct is:\n");
- + dump_dct_block(block);
- + av_log(NULL, AV_LOG_ERROR, "But should be equal to the transposed of:\n");
- + dump_dct_block(testBlock);
- + exit(1);
- + }
- + }
- +}
- +
- +void test_idct_put_avr32(uint8_t *dest, int line_size, DCTELEM *block){
- + uint8_t testBlock[64];
- + DCTELEM blockCopy[64];
- + int i, j;
- +
- + /* Copy transposed block to blockCopy */
- + for ( i = 0; i < 8 ; i++ ){
- + for ( j = 0; j < 8 ; j++ ){
- + blockCopy[i + 8*j] = block[j + i*8];
- + }
- + }
- +
- + idct_put_avr32(dest, line_size, block);
- + simple_idct_put(&testBlock, 8, blockCopy);
- +
- + check_block8(dest, testBlock, line_size, 8, 8, "idct_put", 1);
- +}
- +
- +
- +void test_idct_add_avr32(uint8_t *dest, int line_size, DCTELEM *block){
- + uint8_t testBlock[64];
- + DCTELEM blockCopy[64];
- + int i, j;
- +
- + /* Copy dest to testBlock */
- + for ( i = 0; i < 8 ; i++ ){
- + for ( j = 0; j < 8 ; j++ ){
- + testBlock[i + 8*j] = dest[i + j*line_size];
- + }
- + }
- +
- + /* Copy transposed block to blockCopy */
- + for ( i = 0; i < 8 ; i++ ){
- + for ( j = 0; j < 8 ; j++ ){
- + blockCopy[i + 8*j] = block[j + i*8];
- + }
- + }
- +
- + idct_add_avr32(dest, line_size, block);
- + simple_idct_add(&testBlock, 8, blockCopy);
- +
- + check_block8(dest, testBlock, line_size, 8, 8, "idct_add", 1);
- +}
- +
- +void test_h264_idct_add_avr32(uint8_t *dest, DCTELEM *block, int stride){
- + uint8_t testBlock[16];
- + DCTELEM blockCopy[16];
- + int i, j;
- +
- + /* Copy dest to testBlock */
- + for ( i = 0; i < 4 ; i++ ){
- + for ( j = 0; j < 4 ; j++ ){
- + testBlock[i + 4*j] = dest[i + j*stride];
- + }
- + }
- +
- + /* Copy transposed block to blockCopy */
- + for ( i = 0; i < 16 ; i++ ){
- + blockCopy[i] = block[i];
- + }
- +
- + ff_h264_idct_add_c(dest, block, stride);
- +
- + h264_idct_add_avr32(testBlock, blockCopy, 4);
- +
- + check_block(dest, testBlock, stride, 4, 4, 4, "h264_idct_add", 0);
- +}
- +
- +void test_h264_idct8_add_avr32(uint8_t *dest, DCTELEM *block, int stride){
- + uint8_t testBlock[8*8];
- + DCTELEM blockCopy[8*8];
- + int i, j;
- +
- + /* Copy dest to testBlock */
- + for ( i = 0; i < 8 ; i++ ){
- + for ( j = 0; j < 8 ; j++ ){
- + testBlock[i + 8*j] = dest[i + j*stride];
- + }
- + }
- +
- + /* Copy source block to blockCopy */
- + for ( i = 0; i < 8*8 ; i++ ){
- + blockCopy[i] = block[i];
- + }
- +
- + ff_h264_idct8_add_c(dest, block, stride);
- + h264_idct8_add_avr32(testBlock, blockCopy, 8);
- +
- + check_block(dest, testBlock, stride, 8, 8, 8, "h264_idct8_add", 0);
- +}
- +
- +void test_put_pixels_funcs8(op_pixels_func test, op_pixels_func correct, uint8_t *block,
- + const uint8_t *pixels, int line_size, int h, char *name, int in_h_size, int in_v_size){
- + uint8_t *testBlock, *testBlock2;
- + int i, j;
- + int input_v_size = h + in_v_size;
- + int input_h_size = 8 + in_h_size;
- +
- + testBlock = alloca(input_h_size*input_v_size);
- + testBlock2 = alloca(input_h_size*input_v_size);
- +
- + for ( i = 0; i < input_h_size ; i++ ){
- + for ( j = 0; j < input_v_size ; j++ ){
- + testBlock[i + input_h_size*j] = pixels[i + j*line_size];
- + }
- + }
- +
- + test(block, pixels, line_size, h);
- + correct(testBlock2, testBlock, input_h_size, h);
- +
- + check_block8(block, testBlock2, line_size, input_h_size, h, name, 0);
- +
- +}
- +
- +void test_h264_chroma_mc_funcs(h264_chroma_mc_func test, h264_chroma_mc_func correct, uint8_t *dst,
- + uint8_t *src, int stride, int h, int w, int x, int y, char *name){
- + uint8_t *testBlock, *testBlock2;
- + int i, j;
- + int input_v_size = h + 1;
- + int input_h_size = ((w + 1) + 3) & ~3;
- +
- + testBlock = alloca(input_h_size*input_v_size);
- + testBlock2 = alloca(input_h_size*input_v_size);
- +
- + for ( i = 0; i < w + 1 ; i++ ){
- + for ( j = 0; j < h + 1 ; j++ ){
- + testBlock[i + input_h_size*j] = src[i + j*stride];
- + }
- + }
- +
- + for ( i = 0; i < w ; i++ ){
- + for ( j = 0; j < h ; j++ ){
- + testBlock2[i + input_h_size*j] = dst[i + j*stride];
- + }
- + }
- +
- + test(dst, src, stride, h, x, y);
- + correct(testBlock2, testBlock, input_h_size, h, x, y);
- +
- + check_block(dst, testBlock2, stride, input_h_size, h, w, name, 0);
- +
- +}
- +
- +void test_qpel_mc_funcs(qpel_mc_func test, qpel_mc_func correct, uint8_t *dst,
- + uint8_t *src, int stride, int size, char *name){
- + uint8_t *testBlock, *testBlock2;
- + int i, j;
- + int test_stride = size + 8;
- +
- + testBlock = alloca(test_stride*(size+8)) + 4 + test_stride*4;
- + testBlock2 = alloca(test_stride*size);
- +
- + for ( i = -4; i < size+4 ; i++ ){
- + for ( j = -4; j < size+4 ; j++ ){
- + testBlock[i + test_stride*j] = src[i + j*stride];
- + }
- + }
- +
- + for ( i = 0; i < size ; i++ ){
- + for ( j = 0; j < size ; j++ ){
- + testBlock2[i + test_stride*j] = dst[i + j*stride];
- + }
- + }
- +
- + correct(dst, src, stride);
- + test(testBlock2, testBlock, test_stride);
- +
- + check_block(testBlock2, dst, test_stride, stride, size, size, name, 0);
- +
- +}
- +
- +
- +#define test_pixels_funcs(PFX, NUM ) \
- +void test_ ## PFX ## _pixels ## NUM ## _avr32( uint8_t *block, const uint8_t *pixels, int line_size, int h){ \
- + test_put_pixels_funcs8(PFX ## _pixels ## NUM ## _avr32, PFX ## _pixels ## NUM ## _c, \
- + block, pixels, line_size, h, "test_" #PFX "_pixels", 0, 0); } \
- +void test_ ## PFX ## _pixels ## NUM ## _h_avr32( uint8_t *block, const uint8_t *pixels, int line_size, int h){ \
- + test_put_pixels_funcs8(PFX ## _pixels ## NUM ## _h_avr32, PFX ## _pixels ## NUM ## _x2_c, \
- + block, pixels, line_size, h, "test_" #PFX "_pixels_h", 1, 0); } \
- +void test_ ## PFX ## _pixels ## NUM ## _v_avr32( uint8_t *block, const uint8_t *pixels, int line_size, int h){ \
- + test_put_pixels_funcs8(PFX ## _pixels ## NUM ## _v_avr32, PFX ## _pixels ## NUM ## _y2_c, \
- + block, pixels, line_size, h, "test_" #PFX "_pixels_v", 0, 1); } \
- +void test_ ## PFX ## _pixels ## NUM ## _hv_avr32( uint8_t *block, const uint8_t *pixels, int line_size, int h){ \
- + test_put_pixels_funcs8(PFX ## _pixels ## NUM ## _hv_avr32, PFX ## _pixels ## NUM ## _xy2_c, \
- + block, pixels, line_size, h, "test_" #PFX "_pixels_hv", 1, 1); }
- +
- +test_pixels_funcs(put, 8);
- +test_pixels_funcs(put_no_rnd, 8);
- +test_pixels_funcs(put, 16);
- +test_pixels_funcs(put_no_rnd, 16);
- +
- +test_pixels_funcs(avg, 8);
- +test_pixels_funcs(avg_no_rnd, 8);
- +test_pixels_funcs(avg, 16);
- +test_pixels_funcs(avg_no_rnd, 16);
- +
- +#define test_h264_chroma_mc_funcs(PFX, NUM ) \
- +void test_ ## PFX ## _h264_chroma_mc ## NUM ## _pico( uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){ \
- + test_h264_chroma_mc_funcs(PFX ## _h264_chroma_mc ## NUM ## _pico, PFX ## _h264_chroma_mc ## NUM ## _c, \
- + dst, src, stride, h, NUM, x, y, "test_" #PFX "_h264_chroma_mc" #NUM "_pico"); } \
- +
- +test_h264_chroma_mc_funcs(put, 2);
- +test_h264_chroma_mc_funcs(put, 4);
- +test_h264_chroma_mc_funcs(put, 8);
- +test_h264_chroma_mc_funcs(avg, 2);
- +test_h264_chroma_mc_funcs(avg, 4);
- +test_h264_chroma_mc_funcs(avg, 8);
- +
- +#define test_qpel_mc_funcs_type(PFX, NUM, TYPE ) \
- +void test_ ## PFX ## NUM ## _ ## TYPE ## _pico( uint8_t *dst, uint8_t *src, int stride){ \
- + test_qpel_mc_funcs(PFX ## NUM ## _ ## TYPE ## _pico, PFX ## NUM ## _ ## TYPE ## _c, \
- + dst, src, stride, NUM, "test_" #PFX #NUM "_" #TYPE "_pico"); }
- +
- +#define test_qpel_mc_funcs(PFX, NUM) \
- + test_qpel_mc_funcs_type(PFX, NUM, mc00);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc10);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc20);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc30);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc01);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc11);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc21);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc31);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc02);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc12);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc22);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc32);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc03);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc13);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc23);\
- + test_qpel_mc_funcs_type(PFX, NUM, mc33)
- +
- +test_qpel_mc_funcs(put_h264_qpel, 4);
- +test_qpel_mc_funcs(put_h264_qpel, 8);
- +test_qpel_mc_funcs(put_h264_qpel, 16);
- +test_qpel_mc_funcs(avg_h264_qpel, 4);
- +test_qpel_mc_funcs(avg_h264_qpel, 8);
- +test_qpel_mc_funcs(avg_h264_qpel, 16);
- +
- +
- +#define dspfunc(PFX, IDX, NUM) \
- + c->PFX ## _pixels_tab[IDX][ 0] = DSP_FUNC_NAME( PFX ## NUM ## _mc00_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 1] = DSP_FUNC_NAME( PFX ## NUM ## _mc10_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 2] = DSP_FUNC_NAME( PFX ## NUM ## _mc20_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 3] = DSP_FUNC_NAME( PFX ## NUM ## _mc30_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 4] = DSP_FUNC_NAME( PFX ## NUM ## _mc01_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 5] = DSP_FUNC_NAME( PFX ## NUM ## _mc11_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 6] = DSP_FUNC_NAME( PFX ## NUM ## _mc21_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 7] = DSP_FUNC_NAME( PFX ## NUM ## _mc31_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 8] = DSP_FUNC_NAME( PFX ## NUM ## _mc02_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 9] = DSP_FUNC_NAME( PFX ## NUM ## _mc12_pico ); \
- + c->PFX ## _pixels_tab[IDX][10] = DSP_FUNC_NAME( PFX ## NUM ## _mc22_pico ); \
- + c->PFX ## _pixels_tab[IDX][11] = DSP_FUNC_NAME( PFX ## NUM ## _mc32_pico ); \
- + c->PFX ## _pixels_tab[IDX][12] = DSP_FUNC_NAME( PFX ## NUM ## _mc03_pico ); \
- + c->PFX ## _pixels_tab[IDX][13] = DSP_FUNC_NAME( PFX ## NUM ## _mc13_pico ); \
- + c->PFX ## _pixels_tab[IDX][14] = DSP_FUNC_NAME( PFX ## NUM ## _mc23_pico ); \
- + c->PFX ## _pixels_tab[IDX][15] = DSP_FUNC_NAME( PFX ## NUM ## _mc33_pico )
- +
- +#endif
- +
- +void dsputil_init_avr32(DSPContext* c, AVCodecContext *avctx)
- +{
- +
- + /* H264 */
- +
- + if ( 0 /*avr32_use_pico*/ ){
- + c->put_h264_chroma_pixels_tab[0]= DSP_FUNC_NAME(put_h264_chroma_mc8_pico);
- + c->put_h264_chroma_pixels_tab[1]= DSP_FUNC_NAME(put_h264_chroma_mc4_pico);
- + c->put_h264_chroma_pixels_tab[2]= DSP_FUNC_NAME(put_h264_chroma_mc2_pico);
- +
- + c->avg_h264_chroma_pixels_tab[0]= DSP_FUNC_NAME(avg_h264_chroma_mc8_pico);
- + c->avg_h264_chroma_pixels_tab[1]= DSP_FUNC_NAME(avg_h264_chroma_mc4_pico);
- + c->avg_h264_chroma_pixels_tab[2]= DSP_FUNC_NAME(avg_h264_chroma_mc2_pico);
- + }
- +
- +#define dspfunc(PFX, IDX, NUM) \
- + c->PFX ## _pixels_tab[IDX][ 0] = DSP_FUNC_NAME( PFX ## NUM ## _mc00_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 1] = DSP_FUNC_NAME( PFX ## NUM ## _mc10_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 2] = DSP_FUNC_NAME( PFX ## NUM ## _mc20_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 3] = DSP_FUNC_NAME( PFX ## NUM ## _mc30_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 4] = DSP_FUNC_NAME( PFX ## NUM ## _mc01_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 5] = DSP_FUNC_NAME( PFX ## NUM ## _mc11_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 6] = DSP_FUNC_NAME( PFX ## NUM ## _mc21_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 7] = DSP_FUNC_NAME( PFX ## NUM ## _mc31_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 8] = DSP_FUNC_NAME( PFX ## NUM ## _mc02_pico ); \
- + c->PFX ## _pixels_tab[IDX][ 9] = DSP_FUNC_NAME( PFX ## NUM ## _mc12_pico ); \
- + c->PFX ## _pixels_tab[IDX][10] = DSP_FUNC_NAME( PFX ## NUM ## _mc22_pico ); \
- + c->PFX ## _pixels_tab[IDX][11] = DSP_FUNC_NAME( PFX ## NUM ## _mc32_pico ); \
- + c->PFX ## _pixels_tab[IDX][12] = DSP_FUNC_NAME( PFX ## NUM ## _mc03_pico ); \
- + c->PFX ## _pixels_tab[IDX][13] = DSP_FUNC_NAME( PFX ## NUM ## _mc13_pico ); \
- + c->PFX ## _pixels_tab[IDX][14] = DSP_FUNC_NAME( PFX ## NUM ## _mc23_pico ); \
- + c->PFX ## _pixels_tab[IDX][15] = DSP_FUNC_NAME( PFX ## NUM ## _mc33_pico )
- +
- + if ( avr32_use_pico ){
- + dspfunc(put_h264_qpel, 0, 16);
- + dspfunc(put_h264_qpel, 1, 8);
- + dspfunc(put_h264_qpel, 2, 4);
- + dspfunc(avg_h264_qpel, 0, 16);
- + dspfunc(avg_h264_qpel, 1, 8);
- + dspfunc(avg_h264_qpel, 2, 4);
- + }
- +
- + c->idct_put= DSP_FUNC_NAME(idct_put_avr32);
- + c->idct_add= DSP_FUNC_NAME(idct_add_avr32);
- + c->idct = DSP_FUNC_NAME(idct_avr32);
- + c->h264_idct_add = DSP_FUNC_NAME(h264_idct_add_avr32);
- + c->h264_idct8_add = DSP_FUNC_NAME(h264_idct8_add_avr32);
- +
- + /*c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_avr32;*/
- +
- + c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
- +
- + c->fdct = fdct_avr32;
- +
- + c->clear_blocks = clear_blocks_avr32;
- +
- +#undef dspfunc
- +#define dspfunc(PFX, IDX, NUM) \
- + c->PFX ## _pixels_tab[IDX][0] = DSP_FUNC_NAME( PFX ## _pixels ## NUM ## _avr32 ); \
- + c->PFX ## _pixels_tab[IDX][1] = DSP_FUNC_NAME( PFX ## _pixels ## NUM ## _h_avr32); \
- + c->PFX ## _pixels_tab[IDX][2] = DSP_FUNC_NAME( PFX ## _pixels ## NUM ## _v_avr32); \
- + c->PFX ## _pixels_tab[IDX][3] = DSP_FUNC_NAME( PFX ## _pixels ## NUM ## _hv_avr32)
- +
- + dspfunc(put, 0, 16);
- + dspfunc(put_no_rnd, 0, 16);
- + dspfunc(put, 1, 8);
- + dspfunc(put_no_rnd, 1, 8);
- +
- + dspfunc(avg, 1, 8);
- + dspfunc(avg_no_rnd, 1, 8);
- + dspfunc(avg, 0, 16);
- + dspfunc(avg_no_rnd, 0, 16);
- +#undef dspfunc
- +
- +}
- +
- +
- +
- +#if 0
- +int main(int argc, char *argv[]){
- +
- +
- +}
- +#endif
- +
- diff --git a/libavcodec/avr32/fdct.S b/libavcodec/avr32/fdct.S
- new file mode 100644
- index 0000000..be45b86
- --- /dev/null
- +++ b/libavcodec/avr32/fdct.S
- @@ -0,0 +1,541 @@
- +/*
- + * Copyright (c) 2007 Atmel Corporation. All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + *
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + *
- + * 2. Redistributions in binary form must reproduce the above
- + * copyright notice, this list of conditions and the following
- + * disclaimer in the documentation and/or other materials provided
- + * with the distribution.
- + *
- + * 3. The name of ATMEL may not be used to endorse or promote products
- + * derived from this software without specific prior written
- + * permission.
- + *
- + * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
- + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- + * DAMAGE.
- + */
- +
- +//**********************************************************
- +//* 2-D fDCT, Based on: *
- +//* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical *
- +//* Fast 1-D DCT Algorithms with 11 Multiplications", *
- +//* Proc. Int'l. Conf. on Acoustics, Speech, and Signal *
- +//* Processing 1989 (ICASSP '89), pp. 988-991. *
- +//* *
- +//* Fixed point implementation optimized for the AVR-II *
- +//* instruction set. If a table is used for the *
- +//* coeffisients we can load two and two of them from *
- +//* This will give a reduction of
- +//* *
- +//* *
- +//**********************************************************
- +
- +
- +/* This routine is a slow-but-accurate integer implementation of the
- + * forward DCT (Discrete Cosine Transform). Taken from the IJG software
- + *
- + * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- + * on each column. Direct algorithms are also available, but they are
- + * much more complex and seem not to be any faster when reduced to code.
- + *
- + * This implementation is based on an algorithm described in
- + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
- + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
- + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
- + * The primary algorithm described there uses 11 multiplies and 29 adds.
- + * We use their alternate method with 12 multiplies and 32 adds.
- + * The advantage of this method is that no data path contains more than one
- + * multiplication; this allows a very simple and accurate implementation in
- + * scaled fixed-point arithmetic, with a minimal number of shifts.
- + *
- + * The poop on this scaling stuff is as follows:
- + *
- + * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
- + * larger than the true DCT outputs. The final outputs are therefore
- + * a factor of N larger than desired; since N=8 this can be cured by
- + * a simple right shift at the end of the algorithm. The advantage of
- + * this arrangement is that we save two multiplications per 1-D DCT,
- + * because the y0 and y4 outputs need not be divided by sqrt(N).
- + * In the IJG code, this factor of 8 is removed by the quantization step
- + * (in jcdctmgr.c), here it is removed.
- + *
- + * We have to do addition and subtraction of the integer inputs, which
- + * is no problem, and multiplication by fractional constants, which is
- + * a problem to do in integer arithmetic. We multiply all the constants
- + * by CONST_SCALE and convert them to integer constants (thus retaining
- + * CONST_BITS bits of precision in the constants). After doing a
- + * multiplication we have to divide the product by CONST_SCALE, with proper
- + * rounding, to produce the correct output. This division can be done
- + * cheaply as a right shift of CONST_BITS bits. We postpone shifting
- + * as long as possible so that partial sums can be added together with
- + * full fractional precision.
- + *
- + * The outputs of the first pass are scaled up by PASS1_BITS bits so that
- + * they are represented to better-than-integral precision. These outputs
- + * require 8 + PASS1_BITS + 3 bits; this fits in a 16-bit word
- + * with the recommended scaling. (For 12-bit sample data, the intermediate
- + * array is INT32 anyway.)
- + *
- + * To avoid overflow of the 32-bit intermediate results in pass 2, we must
- + * have 8 + CONST_BITS + PASS1_BITS <= 26. Error analysis
- + * shows that the values given below are the most effective.
- + *
- + * We can gain a little more speed, with a further compromise in accuracy,
- + * by omitting the addition in a descaling shift. This yields an incorrectly
- + * rounded result half the time...
- + */
- +
- + .global fdct_avr32
- +
- +
- +
- +#define CONST_BITS 13
- +#define PASS1_BITS 2
- +
- +#define FIX_0_298631336 2446 /* FIX(0.298631336) */
- +#define FIX_0_390180644 3196 /* FIX(0.390180644) */
- +#define FIX_0_541196100 4433 /* FIX(0.541196100) */
- +#define FIX_0_765366865 6270 /* FIX(0.765366865) */
- +#define FIX_0_899976223 7373 /* FIX(0.899976223) */
- +#define FIX_1_175875602 9633 /* FIX(1.175875602) */
- +#define FIX_1_501321110 12299 /* FIX(1.501321110) */
- +#define FIX_1_847759065 15137 /* FIX(1.847759065) */
- +#define FIX_1_961570560 16069 /* FIX(1.961570560) */
- +#define FIX_2_053119869 16819 /* FIX(2.053119869) */
- +#define FIX_2_562915447 20995 /* FIX(2.562915447) */
- +#define FIX_3_072711026 25172 /* FIX(3.072711026) */
- +
- +
- +/*
- + * Perform an integer forward DCT on one block of samples.
- + */
- +
- +//void
- +//fdct_int32(short *const block)
- +//{
- +// int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- +// int tmp10, tmp11, tmp12, tmp13;
- +// int z1, z2, z3, z4, z5;
- +// short *blkptr;
- +// int *dataptr;
- +// int data[64];
- +// int i;
- +//
- +// /* Pass 1: process rows. */
- +// /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- +// /* furthermore, we scale the results by 2**PASS1_BITS. */
- +//
- +// dataptr = data;
- +// blkptr = block;
- +
- + .text
- +fdct_avr32:
- + pushm r0-r3, r4-r7, lr
- +#define loop_ctr r0
- +#define blkptr r12
- +#define x0 r1
- +#define x1 r2
- +#define x2 r3
- +#define x3 r4
- +#define x4 r5
- +#define x5 r6
- +#define x6 r7
- +#define x7 r8
- +#define tmp0 r5
- +#define tmp7 r2
- +#define tmp1 r3
- +#define tmp6 r4
- +#define tmp2 r9
- +#define tmp5 r8
- +#define tmp3 r7
- +#define tmp4 r6
- +
- +
- + mov loop_ctr, 8
- +// for (i = 0; i < 8; i++) {
- +ROW_LOOP:
- +
- + ldm blkptr, r1, r2, r3, r4
- +
- +// tmp2 = blkptr[2] + blkptr[5];
- +// tmp3 = blkptr[3] + blkptr[4];
- + paddx.h r5, r3, r2
- +// tmp5 = blkptr[2] - blkptr[5];
- +// tmp4 = blkptr[3] - blkptr[4];
- + psubx.h r6, r3, r2
- +// tmp0 = blkptr[0] + blkptr[7];
- +// tmp1 = blkptr[1] + blkptr[6];
- + paddx.h r2, r4, r1
- +// tmp7 = blkptr[0] - blkptr[7];
- +// tmp6 = blkptr[1] - blkptr[6];
- + psubx.h r3, r4, r1
- +
- +// /* Even part per LL&M figure 1 --- note that published figure is faulty;
- +// * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
- +// */
- +
- +#define tmp10 r1
- +#define tmp13 r5
- +#define tmp11 r7
- +#define tmp12 r3
- +#define z1 r9
- +
- +// tmp10 = tmp0 + tmp3;
- +// tmp13 = tmp0 - tmp3;
- + paddsub.h r1, r2:t, r5:b
- +// tmp11 = tmp1 + tmp2;
- +// tmp12 = tmp1 - tmp2;
- + paddsub.h r4, r2:b, r5:t
- +
- +
- +// dataptr[0] = (tmp10 + tmp11) << PASS1_BITS;
- +// dataptr[4] = (tmp10 - tmp11) << PASS1_BITS;
- + paddsub.h r7, r1:t, r4:t
- + ld.w r10, pc[const_table - .]
- + plsl.h r7, r7, PASS1_BITS
- +
- +// z1 = (tmp12 + tmp13) * FIX_0_541196100;
- + addhh.w r8, r4:b, r1:b
- + mulhh.w r8, r8:b, r10:t
- +
- +// dataptr[2] =
- +// DESCALE(z1 + tmp13 * FIX_0_765366865, CONST_BITS - PASS1_BITS);
- +// dataptr[6] =
- +// DESCALE(z1 + tmp12 * (-FIX_1_847759065), CONST_BITS - PASS1_BITS);
- + mulhh.w r9, r1:b, r10:b
- + ld.w r10, pc[const_table - . + 4]
- + add r1, r8, r9
- + satrnds r1 >> (CONST_BITS - PASS1_BITS), 31
- +
- + mulhh.w r9, r4:b, r10:t
- + add r4, r8, r9
- + satrnds r4 >> (CONST_BITS - PASS1_BITS), 31
- +
- +
- +// /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- +// * cK represents cos(K*pi/16).
- +// * i0..i3 in the paper are tmp4..tmp7 here.
- +// */
- +
- +#define z2 r5
- +#define z3 r6
- +#define z4 r7
- +#define z5 r8
- +
- +// z4 = tmp5 + tmp7;
- +// z3 = tmp4 + tmp6;
- + padd.h r2, r6, r3
- +// z2 = tmp5 + tmp6;
- +// z1 = tmp4 + tmp7;
- + paddx.h r5, r6, r3
- +
- + lddpc r9, pc[const_table - . + 8]
- +// z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
- + addhh.w r8, r2:t, r2:b
- + mulhh.w r8, r8:b, r10:b
- + lddpc r10, pc[const_table - . + 12]
- +
- +
- +// tmp4 *= FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
- + mulhh.w r11, r6:b, r9:t
- +
- +// tmp5 *= FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
- + mulhh.w r6, r6:t, r9:b
- +
- +// tmp6 *= FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
- + lddpc r9, pc[const_table - . + 20]
- + mulhh.w lr, r3:b, r10:t
- +
- +// tmp7 *= FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
- + mulhh.w r3, r3:t, r10:b
- +
- +// z3 *= -FIX_1_961570560; /* sqrt(2) * (-c3-c5) */
- + mulhh.w r10, r2:b, r9:t
- +
- +// z4 *= -FIX_0_390180644; /* sqrt(2) * (c5-c3) */
- + mulhh.w r2, r2:t, r9:b
- + lddpc r9, pc[const_table - . + 16]
- +// z3 += z5;
- +// z4 += z5;
- + add r10, r8
- + add r2, r8
- +
- +// z1 *= -FIX_0_899976223; /* sqrt(2) * (c7-c3) */
- + mulhh.w r8, r5:b, r9:t
- +
- +// z2 *= -FIX_2_562915447; /* sqrt(2) * (-c1-c3) */
- + mulhh.w r5, r5:t, r9:b
- +
- +// dataptr[7] = DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS);
- + add r11, r8
- + add r11, r10
- + satrnds r11 >> (CONST_BITS - PASS1_BITS), 31
- +
- +// dataptr[5] = DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS);
- + add r6, r5
- +
- + sthh.w blkptr[6*2], r4:b, r11:b
- + add r6, r2
- + satrnds r6 >> (CONST_BITS - PASS1_BITS), 31
- +
- +// dataptr[3] = DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS);
- + add lr, r5
- + sthh.w blkptr[4*2], r7:b, r6:b
- + add lr, r10
- + satrnds lr >> (CONST_BITS - PASS1_BITS), 31
- +
- +// dataptr[1] = DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS);
- + add r3, r8
- + sthh.w blkptr[2*2], r1:b, lr:b
- + add r3, r2
- + satrnds r3 >> (CONST_BITS - PASS1_BITS), 31
- +
- +
- +
- +// dataptr += 8; /* advance pointer to next row */
- +// blkptr += 8;
- + sthh.w blkptr[0], r7:t, r3:b
- + sub blkptr, -16
- + sub loop_ctr, 1
- + brne ROW_LOOP
- +
- +// }
- +
- + /* Pass 2: process columns.
- + * We remove the PASS1_BITS scaling, but leave the results scaled up
- + * by an overall factor of 8.
- + */
- +
- +// dataptr = data;
- + sub blkptr, 128
- +
- + mov loop_ctr, 4
- +// for (i = 0; i < 8; i++) {
- +COLOUMN_LOOP:
- + ld.w r1, blkptr[0]
- + ld.w r2, blkptr[1*8*2]
- + ld.w r3, blkptr[2*8*2]
- + ld.w r4, blkptr[3*8*2]
- + ld.w r5, blkptr[4*8*2]
- + ld.w r6, blkptr[5*8*2]
- + ld.w r7, blkptr[6*8*2]
- + ld.w r8, blkptr[7*8*2]
- +
- +// tmp0 = blkptr[0] + blkptr[7*8];
- + padds.sh r9, r1, r8
- +// tmp7 = blkptr[0] - blkptr[7*8];
- + psubs.sh r1, r1, r8
- +// tmp1 = blkptr[1*8] + blkptr[6*8];
- + padds.sh r8, r2, r7
- +// tmp6 = blkptr[1*8] - blkptr[6*8];
- + psubs.sh r2, r2, r7
- +// tmp2 = blkptr[2*8] + blkptr[5*8];
- + padds.sh r7, r3, r6
- +// tmp5 = blkptr[2*8] - blkptr[5*8];
- + psubs.sh r3, r3, r6
- +// tmp3 = blkptr[3*8] + blkptr[4*8];
- + padds.sh r6, r4, r5
- +// tmp4 = blkptr[3*8] - blkptr[4*8];
- + psubs.sh r4, r4, r5
- +
- +// /* even part per ll&m figure 1 --- note that published figure is faulty;
- +// * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
- +// */
- +//
- +// tmp10 = tmp0 + tmp3;
- + padds.sh r5, r9, r6
- +// tmp13 = tmp0 - tmp3;
- + psubs.sh r9, r9, r6
- +// tmp11 = tmp1 + tmp2;
- + padds.sh r6, r8, r7
- +// tmp12 = tmp1 - tmp2;
- + psubs.sh r8, r8, r7
- +
- +// dataptr[0] = DESCALE(tmp10 + tmp11, PASS1_BITS);
- +// dataptr[32] = DESCALE(tmp10 - tmp11, PASS1_BITS);
- +//Might get an overflow here
- + padds.sh r7, r5, r6
- + psubs.sh r5, r5, r6
- +
- + //Rounding
- + mov lr, (1 << (PASS1_BITS + 2))
- + orh lr, hi(1 << (16 + PASS1_BITS + 2))
- + padds.sh r7, r7, lr
- + padds.sh r5, r5, lr
- +
- + pasr.h r7, r7, PASS1_BITS + 3
- + pasr.h r5, r5, PASS1_BITS + 3
- + st.w r12[0], r7
- + st.w r12[4*8*2], r5
- +
- + lddpc r10, const_table2
- +
- +
- +// z1 = (tmp12 + tmp13) * FIX_0_541196100;
- + padds.sh r5, r8, r9
- + mulhh.w r6, r5:t, r10:t
- + mulhh.w r7, r5:b, r10:t
- +
- +// dataptr[16] =
- +// DESCALE(z1 + tmp13 * FIX_0_765366865, CONST_BITS + PASS1_BITS);
- + lddpc r11, const_table2 + 4
- + mulhh.w lr, r9:t, r10:b
- + mulhh.w r9, r9:b, r10:b
- + add lr, r6
- + add r9, r7
- + satrnds lr >> (CONST_BITS + PASS1_BITS + 3), 31
- + satrnds r9 >> (CONST_BITS + PASS1_BITS + 3), 31
- + sthh.w r12[2*8*2], lr:b, r9:b
- +
- +// dataptr[48] =
- +// DESCALE(z1 + tmp12 * (-FIX_1_847759065), CONST_BITS + PASS1_BITS);
- + mulhh.w lr, r8:t, r11:t
- + mulhh.w r8, r8:b, r11:t
- + add lr, r6
- + add r8, r7
- + satrnds lr >> (CONST_BITS + PASS1_BITS + 3), 31
- + satrnds r8 >> (CONST_BITS + PASS1_BITS + 3), 31
- + sthh.w r12[6*8*2], lr:b, r8:b
- +
- +// /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- +// * cK represents cos(K*pi/16).
- +// * i0..i3 in the paper are tmp4..tmp7 here.
- +// */
- +//
- +// z2 = tmp5 + tmp6;
- +// z3 = tmp4 + tmp6;
- +// z4 = tmp5 + tmp7;
- + padds.sh r5, r3, r2
- + padds.sh r6, r4, r2
- + padds.sh r7, r3, r1
- +
- +// z5 = (z3 + z4) * FIX_1_175875602; /* sqrt(2) * c3 */
- + padds.sh r8, r6, r7
- + mulhh.w r9, r8:t, r11:b
- + mulhh.w r8, r8:b, r11:b
- +
- +// z3 *= -FIX_1_961570560; /* sqrt(2) * (-c3-c5) */
- +// z3 += z5;
- + lddpc r11, const_table2 + 8
- + mulhh.w r10, r6:t, r11:t
- + mulhh.w r6, r6:b, r11:t
- + add r10, r9
- + add r6, r8
- +
- +// z4 *= -FIX_0_390180644; /* sqrt(2) * (c5-c3) */
- +// z4 += z5;
- + mulhh.w lr, r7:t, r11:b
- + mulhh.w r7, r7:b, r11:b
- + lddpc r11, const_table2 + 12
- + st.w --sp,r0
- + add lr, r9
- + add r7, r8
- +
- +// tmp6 *= FIX_3_072711026; /* sqrt(2) * ( c1+c3+c5-c7) */
- + mulhh.w r0, r2:t, r11:t
- + machh.w r0, r5:t, r11:b
- + mulhh.w r2, r2:b, r11:t
- + machh.w r2, r5:b, r11:b
- +
- +// z2 *= -FIX_2_562915447; /* sqrt(2) * (-c1-c3) */
- +// dataptr[24] = DESCALE(tmp6 + z2 + z3, CONST_BITS + PASS1_BITS);
- + add r0, r10
- + lddpc r11, const_table2 + 16
- + add r2, r6
- + satrnds r0 >> (CONST_BITS + PASS1_BITS + 3), 31
- + satrnds r2 >> (CONST_BITS + PASS1_BITS + 3), 31
- + sthh.w r12[3*8*2], r0:b, r2:b
- +// tmp5 *= FIX_2_053119869; /* sqrt(2) * ( c1+c3-c5+c7) */
- + mulhh.w r0, r3:t, r11:t
- + machh.w r0, r5:t, r11:b
- + mulhh.w r2, r3:b, r11:t
- + machh.w r2, r5:b, r11:b
- + add r0, lr
- + lddpc r11, const_table2 + 20
- + add r2, r7
- +
- +// dataptr[40] = DESCALE(tmp5 + z2 + z4, CONST_BITS + PASS1_BITS);
- + satrnds r0 >> (CONST_BITS + PASS1_BITS + 3), 31
- + satrnds r2 >> (CONST_BITS + PASS1_BITS + 3), 31
- + sthh.w r12[5*8*2], r0:b, r2:b
- +
- +
- +// z1 = tmp4 + tmp7;
- + padds.sh r2, r4, r1
- +
- +// tmp4 *= FIX_0_298631336; /* sqrt(2) * (-c1+c3+c5-c7) */
- + mulhh.w r3, r4:t, r11:t
- + machh.w r3, r2:t, r11:b
- + mulhh.w r4, r4:b, r11:t
- + machh.w r4, r2:b, r11:b
- + add r3, r10
- + lddpc r11, const_table2 + 24
- + add r4, r6
- +
- +// z1 *= -FIX_0_899976223; /* sqrt(2) * (c7-c3) */
- +// dataptr[56] = DESCALE(tmp4 + z1 + z3, CONST_BITS + PASS1_BITS);
- + satrnds r3 >> (CONST_BITS + PASS1_BITS + 3), 31
- + satrnds r4 >> (CONST_BITS + PASS1_BITS + 3), 31
- + sthh.w r12[7*8*2], r3:b, r4:b
- +
- +
- +// tmp7 *= FIX_1_501321110; /* sqrt(2) * ( c1+c3-c5-c7) */
- + mulhh.w r3, r1:t, r11:t
- + machh.w r3, r2:t, r11:b
- + mulhh.w r4, r1:b, r11:t
- + machh.w r4, r2:b, r11:b
- + add r3, lr
- + add r4, r7
- +
- +// dataptr[8] = DESCALE(tmp7 + z1 + z4, CONST_BITS + PASS1_BITS);
- + satrnds r3 >> (CONST_BITS + PASS1_BITS + 3), 31
- + satrnds r4 >> (CONST_BITS + PASS1_BITS + 3), 31
- + sthh.w r12[1*8*2], r3:b, r4:b
- + ld.w r0, sp++
- +
- +// dataptr++; /* advance pointer to next column */
- + sub blkptr, -4
- + sub loop_ctr, 1
- + brne COLOUMN_LOOP
- +
- +// }
- +
- + popm r0-r3, r4-r7, pc
- +
- +// /* descale */
- +// for (i = 0; i < 64; i++)
- +// block[i] = (short int) DESCALE(data[i], 3);
- +
- +
- +//}
- +
- +
- + .align 2
- +const_table: .short FIX_0_541196100, FIX_0_765366865, -FIX_1_847759065, FIX_1_175875602
- + .short FIX_0_298631336, FIX_2_053119869, FIX_3_072711026, FIX_1_501321110
- + .short -FIX_0_899976223,-FIX_2_562915447, -FIX_1_961570560, -FIX_0_390180644
- +
- +const_table2: .short FIX_0_541196100, FIX_0_765366865, -FIX_1_847759065, FIX_1_175875602
- + .short -FIX_1_961570560, -FIX_0_390180644, FIX_3_072711026, -FIX_2_562915447
- + .short FIX_2_053119869, -FIX_2_562915447, FIX_0_298631336, -FIX_0_899976223
- + .short FIX_1_501321110, -FIX_0_899976223
- +
- +
- +
- +
- diff --git a/libavcodec/avr32/h264idct.S b/libavcodec/avr32/h264idct.S
- new file mode 100644
- index 0000000..4b23e2d
- --- /dev/null
- +++ b/libavcodec/avr32/h264idct.S
- @@ -0,0 +1,451 @@
- +/*
- + * Copyright (c) 2007 Atmel Corporation. All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + *
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + *
- + * 2. Redistributions in binary form must reproduce the above
- + * copyright notice, this list of conditions and the following
- + * disclaimer in the documentation and/or other materials provided
- + * with the distribution.
- + *
- + * 3. The name of ATMEL may not be used to endorse or promote products
- + * derived from this software without specific prior written
- + * permission.
- + *
- + * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
- + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- + * DAMAGE.
- + */
- +
- + .global h264_idct_add_avr32
- +
- + /* Macro for performing the 1-D transform on one row line.
- +
- + The register 'w01' should contain the first two pixels,
- + and the register 'w23' should contain the last two pixels
- + in the line. The resulting line is placed in p01 and p23
- + so that { w01, w23 } = { x0, x1, x3, x2 }.
- + 'tmp' and 'tmp2' should be scratchpad registers. */
- + .macro transform_row w01, w23, tmp, tmp2
- + add \tmp, \w23, \w01 << 1 /* tmp = { xxxx, 2*w1 + w3 } */
- + sub \tmp2, \w01, \w23 << 1 /* tmp2 = { xxxx, w1 - 2*w3 } */
- + bfins \tmp2, \tmp, 16, 16 /* tmp2 = { 2*w1 + w3, w1 - 2*w3 } */
- + pasr.h \tmp2, \tmp2, 1 /* tmp2 = { w1 + w3/2, w1/2 - w3 } */
- + paddsub.h \tmp, \w01:t, \w23:t /* tmp = { w0 + w2, w0 - w2 } */
- + padd.h \w01, \tmp, \tmp2 /* w01 = { w0 + w2 + w1 + w3/2, w0 - w2 + w1/2 - w3 } */
- + psub.h \w23, \tmp, \tmp2 /* w23 = { w0 + w2 - w1 - w3/2, w0 - w2 - w1/2 + w3 } */
- + .endm
- +
- + /* Macro for performing the 1-D transform on two columns.
- +
- + The registers w0, w1, w2, w3 should each contain two
- + packed samples from the two colomns to transform.
- + tmp and tmp2 are scratchpad registers.
- +
- + The resulting transformed columns are placed in the
- + same positions as the input columns.
- + */
- + .macro transform_2columns w0, w1, w2, w3, tmp, tmp2
- + padd.h \tmp, \w0, \w2 /* tmp = z0 = w0 + w2 */
- + psub.h \w0, \w0, \w2 /* w0 = z1 = w0 - w2 */
- + pasr.h \w2, \w1, 1 /* w2 = w1/2 */
- + pasr.h \tmp2, \w3, 1 /* tmp2 = w3/2 */
- + psub.h \w3, \w2, \w3 /* w3 = z2 = w1/2 - w3 */
- + padd.h \tmp2, \w1, \tmp2/* tmp2 = z3 = w1 + w3/2 */
- + padd.h \w1, \w0, \w3 /* w1 = x1 = z1 + z2 */
- + psub.h \w2, \w0, \w3 /* w2 = x2 = z1 - z2 */
- + padd.h \w0, \tmp, \tmp2/* w0 = x0 = z0 + z3 */
- + psub.h \w3, \tmp, \tmp2/* w3 = x3 = z0 - z3 */
- + /* Scale down result. */
- + pasr.h \w0, \w0, 6
- + pasr.h \w1, \w1, 6
- + pasr.h \w2, \w2, 6
- + pasr.h \w3, \w3, 6
- + .endm
- +
- +/*void h264_idct_add_avr32(uint8_t *dst, DCTELEM *block, int stride)*/
- +
- +h264_idct_add_avr32:
- +
- + stm --sp,r0-r3,r4-r7, lr
- +
- + /* Setup rounding factor. */
- + mov r0, (1 << 5)
- + lsl r0, 16
- +
- + /* Load block */
- + ldm r11,r2-r9
- + /* r9 = { w00, w01 },
- + r8 = { w02, w03 },
- + r7 = { w10, w11 },
- + r6 = { w12, w13 },
- + r5 = { w20, w21 },
- + r4 = { w22, w23 },
- + r3 = { w30, w31 },
- + r2 = { w32, w33 } */
- +
- +
- + /* Add the rounding factor to w00. */
- + add r9, r0
- +
- + /* Transform rows */
- + transform_row r9, r8, r0, r1
- + transform_row r7, r6, r0, r1
- + transform_row r5, r4, r0, r1
- + transform_row r3, r2, r0, r1
- +
- + /* Transform columns */
- + transform_2columns r9, r7, r5, r3, r0, r1
- + transform_2columns r8, r6, r4, r2, r0, r1
- +
- + /* Load predicted pixels.*/
- + ld.w lr, r12[0]
- + ld.w r11, r12[r10]
- +
- + /* Unpack to halwords. */
- + punpckub.h r0, lr:t
- + punpckub.h r1, lr:b
- +
- + /* Add with transformed row. */
- + padd.h r0, r0, r9
- + paddx.h r1, r1, r8
- + /* Pack and saturate back to 8-bit pixels. */
- + packsh.ub r0, r0, r1
- +
- + /* Unpack to halwords. */
- + punpckub.h lr, r11:t
- + punpckub.h r11, r11:b
- +
- + /* Add with transformed row. */
- + padd.h lr, lr, r7
- + paddx.h r11, r11, r6
- + /* Pack and saturate back to 8-bit pixels. */
- + packsh.ub r1, lr, r11
- +
- + /* Store back to frame. */
- + st.w r12[0], r0
- + st.w r12[r10], r1
- +
- + add r12, r12, r10 << 1
- +
- + /* Load predicted pixels.*/
- + ld.w lr, r12[0]
- + ld.w r11, r12[r10]
- +
- + /* Unpack to halwords. */
- + punpckub.h r0, lr:t
- + punpckub.h r1, lr:b
- +
- + /* Add with transformed row. */
- + padd.h r0, r0, r5
- + paddx.h r1, r1, r4
- + /* Pack and saturate back to 8-bit pixels. */
- + packsh.ub r0, r0, r1
- +
- + /* Unpack to halwords. */
- + punpckub.h lr, r11:t
- + punpckub.h r11, r11:b
- +
- + /* Add with transformed row. */
- + padd.h lr, lr, r3
- + paddx.h r11, r11, r2
- + /* Pack and saturate back to 8-bit pixels. */
- + packsh.ub r1, lr, r11
- +
- + /* Store back to frame. */
- + st.w r12[0], r0
- + st.w r12[r10], r1
- +
- + ldm sp++,r0-r3,r4-r7, pc
- +
- +
- + .global h264_idct8_add_avr32
- +//void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
- +
- +h264_idct8_add_avr32:
- + stm --sp,r0-r3,r4-r7, lr
- +
- + /* Push dst and stride on stack */
- + stm --sp,r10,r12
- +
- +// int i;
- +// DCTELEM (*src)[8] = (DCTELEM(*)[8])block;
- +// uint8_t *cm = cropTbl + MAX_NEG_CROP;
- +
- +// block[0] += 32;
- +
- +
- +// for( i = 0; i < 8; i++ )
- +// {
- + mov lr, 4
- +0:
- + ld.w r7, r11[0*(8*2)]
- + ld.w r6, r11[1*(8*2)]
- + ld.w r5, r11[2*(8*2)]
- + ld.w r4, r11[3*(8*2)]
- + ld.w r3, r11[4*(8*2)]
- + ld.w r2, r11[5*(8*2)]
- + ld.w r1, r11[6*(8*2)]
- + ld.w r0, r11[7*(8*2)]
- +
- +/*
- +
- + const int a0 = src[0][i] + src[4][i];
- + const int a2 = src[0][i] - src[4][i];
- + const int a4 = (src[2][i]>>1) - src[6][i];
- + const int a6 = (src[6][i]>>1) + src[2][i];
- +*/
- + padd.h r8, r7, r3 /* r8 = a0 */
- + psub.h r7, r7, r3 /* r7 = a2 */
- + pasr.h r3, r5, 1 /* r3 = src[2][i] >> 1 */
- + pasr.h r9, r1, 1 /* r9 = src[6][i] >> 1 */
- + psub.h r3, r3, r1 /* r3 = a4 */
- + padd.h r9, r9, r5 /* r9 = a6 */
- +
- +/*
- + const int b0 = a0 + a6;
- + const int b2 = a2 + a4;
- + const int b4 = a2 - a4;
- + const int b6 = a0 - a6;
- +*/
- + padd.h r1, r8, r9 /* r1 = b0 */
- + psub.h r8, r8, r9 /* r8 = b6 */
- + padd.h r5, r7, r3 /* r5 = b2 */
- + psub.h r7, r7, r3 /* r7 = b4 */
- +
- +/*
- + const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1);
- + const int a3 = src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1);
- + const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1);
- + const int a7 = src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1);
- +*/
- + pasr.h r3, r0, 1
- + padd.h r3, r3, r0
- + psub.h r3, r2, r3
- + psub.h r3, r3, r4 /* r3 = a1 */
- +
- + pasr.h r9, r4, 1
- + padd.h r9, r9, r4
- + psub.h r9, r0, r9
- + padd.h r9, r6, r9 /* r9 = a3 */
- +
- + pasr.h r10, r2, 1
- + padd.h r10, r10, r2
- + padd.h r10, r10, r0
- + psub.h r10, r10, r6 /* r10 = a5 */
- +
- + pasr.h r0, r6, 1
- + padd.h r0, r0, r6
- + padd.h r0, r0, r2
- + padd.h r0, r0, r4 /* r0 = a7 */
- +/*
- + const int b1 = (a7>>2) + a1;
- + const int b3 = a3 + (a5>>2);
- + const int b5 = (a3>>2) - a5;
- + const int b7 = a7 - (a1>>2);
- +*/
- + pasr.h r2, r0, 2
- + padd.h r2, r2, r3 /* r2 = b1 */
- + pasr.h r3, r3, 2
- + psub.h r3, r0, r3 /* r3 = b7 */
- +
- + pasr.h r0, r10, 2
- + padd.h r0, r0, r9 /* r0 = b3 */
- + pasr.h r9, r9, 2
- + psub.h r9, r9, r10 /* r9 = b5 */
- +
- +
- +/*
- + src[0][i] = b0 + b7;
- + src[7][i] = b0 - b7;
- + src[1][i] = b2 + b5;
- + src[6][i] = b2 - b5;
- + src[2][i] = b4 + b3;
- + src[5][i] = b4 - b3;
- + src[3][i] = b6 + b1;
- + src[4][i] = b6 - b1; */
- +
- + padd.h r4, r1, r3
- + psub.h r1, r1, r3
- + st.w r11[0*(8*2)], r4
- + st.w r11[7*(8*2)], r1
- +
- + padd.h r3, r5, r9
- + psub.h r5, r5, r9
- + st.w r11[1*(8*2)], r3
- + st.w r11[6*(8*2)], r5
- +
- + padd.h r9, r7, r0
- + psub.h r7, r7, r0
- + st.w r11[2*(8*2)], r9
- + st.w r11[5*(8*2)], r7
- +
- + padd.h r0, r8, r2
- + psub.h r8, r8, r2
- + st.w r11[3*(8*2)], r0
- + st.w r11[4*(8*2)], r8
- +
- + sub r11, -4
- + sub lr, 1
- + brne 0b
- +
- +// }
- +
- + lddsp r12, sp[0] /* r12 = dst */
- + sub r11, 4*4
- + ldm r11++, r4-r7
- + mov lr, 8
- + /* Push dst and stride on stack */
- +
- +1:
- +// for( i = 0; i < 8; i++ )
- +// {
- +
- + /* r7 = {src[i][0], src[i][1]}
- + r6 = {src[i][2], src[i][3]}
- + r5 = {src[i][4], src[i][5]}
- + r4 = {src[i][6], src[i][7]} */
- +
- +/*
- + const int a0 = src[i][0] + src[i][4];
- + const int a2 = src[i][0] - src[i][4];
- + const int a4 = (src[i][2]>>1) - src[i][6];
- + const int a6 = (src[i][6]>>1) + src[i][2];
- +*/
- + pasr.h r8, r6, 1
- + pasr.h r9, r4, 1
- + addhh.w r0, r7:t, r5:t /* r0 = a0 */
- + subhh.w r1, r7:t, r5:t /* r1 = a2 */
- + subhh.w r2, r8:t, r4:t /* r2 = a4 */
- + addhh.w r3, r9:t, r6:t /* r3 = a6 */
- +
- +/*
- + const int b0 = a0 + a6;
- + const int b2 = a2 + a4;
- + const int b4 = a2 - a4;
- + const int b6 = a0 - a6;
- +*/
- + add r10, r0, r3 /* r10 = b0 */
- + sub r0, r3 /* r0 = b6 */
- + add r3, r1, r2 /* r3 = b2 */
- + sub r1, r2 /* r1 = b4 */
- +/*
- +
- +
- + const int a7 = src[i][5] + src[i][3] + src[i][1] + (src[i][1]>>1);
- + const int a1 = src[i][5] - src[i][3] - src[i][7] - (src[i][7]>>1);
- + const int a3 = src[i][7] + src[i][1] - src[i][3] - (src[i][3]>>1);
- + const int a5 = src[i][7] - src[i][1] + src[i][5] + (src[i][5]>>1); */
- + addhh.w r8, r8:b, r6:b
- + addhh.w r2, r4:b, r7:b
- + sub r2, r8 /* r2 = a3 */
- +
- + addhh.w r9, r9:b, r4:b
- + subhh.w r8, r5:b, r6:b
- + sub r8, r9 /* r8 = a1 */
- +
- + pasr.h r9, r7, 1
- + addhh.w r9, r9:b, r7:b
- + addhh.w r6, r5:b, r6:b
- + add r6, r9 /* r6 = a7 */
- +
- + pasr.h r9, r5, 1
- + addhh.w r9, r9:b, r5:b
- + subhh.w r5, r4:b, r7:b
- + add r5, r9 /* r5 = a5 */
- +
- +/* const int b1 = (a7>>2) + a1;
- + const int b3 = (a5>>2) + a3;
- + const int b5 = (a3>>2) - a5;
- + const int b7 = -(a1>>2) + a7 ; */
- + asr r4, r6, 2
- + add r4, r8 /* r4 = b1 */
- + asr r8, 2
- + rsub r8, r6 /* r8 = b7 */
- +
- + asr r6, r5, 2
- + add r6, r2 /* r6 = b3 */
- + asr r2, 2
- + sub r2, r5 /* r2 = b5 */
- +
- +/*
- + dst[i*stride + 0] = cm[ dst[i*stride + 0] + ((b0 + b7) >> 6) ];
- + dst[i*stride + 1] = cm[ dst[i*stride + 1] + ((b2 + b5) >> 6) ];
- + dst[i*stride + 2] = cm[ dst[i*stride + 2] + ((b4 + b3) >> 6) ];
- + dst[i*stride + 3] = cm[ dst[i*stride + 3] + ((b6 + b1) >> 6) ];
- + dst[i*stride + 4] = cm[ dst[i*stride + 4] + ((b6 - b1) >> 6) ];
- + dst[i*stride + 5] = cm[ dst[i*stride + 5] + ((b4 - b3) >> 6) ];
- + dst[i*stride + 6] = cm[ dst[i*stride + 6] + ((b2 - b5) >> 6) ];
- + dst[i*stride + 7] = cm[ dst[i*stride + 7] + ((b0 - b7) >> 6) ];
- +*/
- + add r5, r10, r8
- + satrnds r5 >> 6, 0 /* r5 = (b0 + b7) >> 6 */
- + sub r10, r8
- + satrnds r10 >> 6, 0 /* r10 = (b0 - b7) >> 6 */
- + add r8, r3, r2
- + satrnds r8 >> 6, 0 /* r8 = (b2 + b5) >> 6 */
- + sub r3, r2
- + satrnds r3 >> 6, 0 /* r3 = (b2 - b5) >> 6 */
- +
- + add r2, r1, r6
- + satrnds r2 >> 6, 0 /* r2 = (b4 + b3) >> 6 */
- + sub r1, r6
- + satrnds r1 >> 6, 0 /* r1 = (b4 - b3) >> 6 */
- +
- + add r6, r0, r4
- + satrnds r6 >> 6, 0 /* r6 = (b6 + b1) >> 6 */
- + sub r0, r4
- + satrnds r0 >> 6, 0 /* r0 = (b6 - b1) >> 6 */
- +
- + ld.w r4, r12[0]
- +
- + packw.sh r8, r5, r8
- + packw.sh r7, r2, r6
- + ld.w r9, r12[4]
- + packw.sh r6, r0, r1
- + packw.sh r5, r3, r10
- +
- + punpckub.h r10, r4:t
- + punpckub.h r4, r4:b
- + punpckub.h r3, r9:t
- + punpckub.h r9, r9:b
- +
- + padd.h r8, r8, r10
- + padd.h r7, r7, r4
- + padd.h r6, r6, r3
- + padd.h r5, r5, r9
- +
- + lddsp r10, sp[4] /* r10 = stride */
- + packsh.ub r0, r8, r7
- + packsh.ub r1, r6, r5
- +
- + st.w r12[0], r0
- + st.w r12[4], r1
- +
- + ldm r11++, r4-r7
- + add r12, r10 /* dst += stride */
- +
- + sub lr, 1
- + brne 1b
- +
- + sub sp, -8
- + ldm sp++,r0-r3,r4-r7, pc
- +
- +
- +
- +// }
- +//}
- diff --git a/libavcodec/avr32/idct.S b/libavcodec/avr32/idct.S
- new file mode 100644
- index 0000000..e7551ec
- --- /dev/null
- +++ b/libavcodec/avr32/idct.S
- @@ -0,0 +1,829 @@
- +/*
- + * Copyright (c) 2007 Atmel Corporation. All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + *
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + *
- + * 2. Redistributions in binary form must reproduce the above
- + * copyright notice, this list of conditions and the following
- + * disclaimer in the documentation and/or other materials provided
- + * with the distribution.
- + *
- + * 3. The name of ATMEL may not be used to endorse or promote products
- + * derived from this software without specific prior written
- + * permission.
- + *
- + * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
- + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- + * DAMAGE.
- + */
- +
- + .global idct_add_avr32
- + .global idct_put_avr32
- + .global idct_avr32
- +
- +
- +#define CONST_BITS 13
- +#define PASS1_BITS 2
- +
- +#define ONE ((INT32) 1)
- +
- +#define CONST_SCALE (ONE << CONST_BITS)
- +
- +#define LINE_SIZE 32
- +
- +#define FIX_0_298631336 (2446) /* FIX(0.298631336) */
- +#define FIX_0_390180644 (3196) /* FIX(0.390180644) */
- +#define FIX_0_541196100 (4433) /* FIX(0.541196100) */
- +#define FIX_0_765366865 (6270) /* FIX(0.765366865) */
- +#define FIX_0_899976223 (7373) /* FIX(0.899976223) */
- +#define FIX_1_175875602 (9633) /* FIX(1.175875602) */
- +#define FIX_1_501321110 (12299)/* FIX(1.501321110) */
- +#define FIX_1_847759065 (15137)/* FIX(1.847759065) */
- +#define FIX_1_961570560 (16069)/* FIX(1.961570560) */
- +#define FIX_2_053119869 (16819)/* FIX(2.053119869) */
- +#define FIX_2_562915447 (20995)/* FIX(2.562915447) */
- +#define FIX_3_072711026 (25172)/* FIX(3.072711026) */
- +
- +
- +#define loop_cnt r11
- +
- + .text
- +
- +idct_add_avr32:
- + pushm r0-r3, r4-r7, lr //Free up registers to use for local variables
- +
- + // Give room for some variables on the stack
- + sub sp, 8
- + stdsp SP[0], r12 // rfp
- + stdsp SP[4], r11 // iinc
- +
- + mov loop_cnt, 8 //Initialize loop counter
- +
- +FOR_ROW:
- +
- + ldm r10, r0, r1, r2, r3 //Load 8 DCT-coeffisients from the current row in the DCT-block
- + mov r6, 0
- +#ifdef USE_PREFETCH
- + pref r10[LINE_SIZE] //Prefetch next line
- +#endif
- + or r4, r2, r3 << 16
- + or r4, r1 //Check if all DCT-coeffisients except the DC is zero
- + or r4, r0
- + brne AC_ROW //If there are non-zero AC coeffisients perform row-transform
- +
- + paddsub.h r5, r3:t, r6:b //Extract the DC-coeff from r5
- + plsl.h r5, r5, PASS1_BITS
- + mov r4, r5
- + st.d r10++, r4
- + st.d r10++, r4
- +
- + sub loop_cnt, 1 //Decrement loop counter
- + brne FOR_ROW //Perform loop one more time if loop_cnt is not zero
- +
- + bral COLOUMN_TRANSFORM //Perform coloumn transform after row transform is computed
- +
- +
- +AC_ROW:
- +
- +
- + ld.w r12, pc[coef_table - .]
- + ld.w r9, pc[coef_table - . + 4]
- +
- + padd.h r4, r2, r0 // r4:t = dataptr[2] + dataptr[6],r4:b = dataptr[3] + dataptr[7]
- + mulhh.w r5, r4:t, r12:t
- + mulhh.w r6, r0:t, r12:b
- + ld.w r12, pc[coef_table - . + 8]
- + mulhh.w r7, r2:t, r9:t
- + add r6, r5 // tmp2
- + satrnds r6 >> (CONST_BITS - PASS1_BITS), 31
- + add r7, r5 // tmp3
- + satrnds r7 >> (CONST_BITS - PASS1_BITS), 31
- +
- + paddsub.h r5, r3:t, r1:t
- + plsl.h r5, r5, PASS1_BITS // r5:t = tmp0, r5:b = tmp1
- +
- + paddsub.h r7, r5:t, r7:b // r7:t = tmp10, r7:b = tmp13
- + paddsub.h r6, r5:b, r6:b // r6:t = tmp11, r6:b = tmp12
- +
- +
- + addhh.w lr, r3:b, r1:b // lr = z4
- + addhh.w r5, r4:b, lr:b
- + mulhh.w r5, r5:b, r9:b // r5 = z5
- +
- + ld.w r9, pc[coef_table - . + 12]
- + mulhh.w r4, r4:b, r12:t // r4 = z3
- + mulhh.w lr, lr:b, r12:b // lr = z4
- +
- + add r4, r5
- + add lr, r5
- +
- + addhh.w r5, r2:b, r1:b // r5 = z2
- + addhh.w r8, r3:b, r0:b // r8 = z1
- +
- +
- + mulhh.w r0, r0:b, r9:t // r0 = tmp0
- + ld.w r12, pc[coef_table - . + 16]
- + mulhh.w r1, r1:b, r9:b // r1 = tmp1
- + ld.w r9, pc[coef_table - . + 20]
- + mulhh.w r2, r2:b, r12:t // r2 = tmp2
- + mulhh.w r3, r3:b, r12:b // r3 = tmp3
- + mulhh.w r8, r8:b, r9:t // r8 = z1
- + mulhh.w r5, r5:b, r9:b // r5 = z2
- +
- +
- + add r0, r8
- + add r0, r4
- + add r1, r5
- + add r1, lr
- + add r2, r5
- + add r2, r4
- + add r3, r8
- + add r3, lr
- +
- + satrnds r0 >> (CONST_BITS - PASS1_BITS), 31
- + satrnds r1 >> (CONST_BITS - PASS1_BITS), 31
- + satrnds r2 >> (CONST_BITS - PASS1_BITS), 31
- + satrnds r3 >> (CONST_BITS - PASS1_BITS), 31
- +
- + paddsub.h r5, r6:t, r2:b // r5:t = dataptr[1], r5:b = dataptr[6]
- + paddsub.h r4, r7:t, r3:b // r4:t = dataptr[0], r4:b = dataptr[7]
- + paddsub.h r3, r6:b, r1:b // r3:t = dataptr[2], r3:b = dataptr[5]
- + paddsub.h r2, r7:b, r0:b // r2:t = dataptr[3], r2:b = dataptr[4]
- +
- + sthh.w r10[0], r4:t, r5:t
- + sthh.w r10[4], r3:t, r2:t
- + sthh.w r10[8], r2:b, r3:b
- + sthh.w r10[12], r5:b, r4:b
- +
- +
- +
- + sub r10, -16
- + sub loop_cnt, 1
- + brne FOR_ROW, e
- +
- +COLOUMN_TRANSFORM:
- +
- + sub r10, 128 //Set pointer to start of DCT block
- +
- +
- + mov loop_cnt, 8
- +FOR_COLOUMN:
- + ldins.h r3:t,r10[0] // r3:t = dataptr[0]
- + ldins.h r1:t,r10[1*8*2]// r1:t = dataptr[1]
- + ldins.h r2:t,r10[2*8*2]// r2:t = dataptr[2]
- + ldins.h r0:t,r10[5*8*2]// r0:t = dataptr[5]
- + ldins.h r3:b,r10[4*8*2]// r3:b = dataptr[4]
- + ldins.h r1:b,r10[3*8*2]// r1:b = dataptr[3]
- + ldins.h r2:b,r10[6*8*2]// r2:b = dataptr[6]
- + ldins.h r0:b,r10[7*8*2]// r0:b = dataptr[7]
- +
- + or r4, r1, r3 << 16
- + or r4, r2
- + or r4, r0
- + brne AC_COLOUMN //If there are non-zero AC coeffisients perform row-transform
- +
- + lddsp r12, SP[0] // rfp
- + lddsp r9, SP[4] // iinc
- + satrnds r3 >> ( PASS1_BITS + 3 + 16 ), 9
- + ld.d r0, r12[0]
- + sub r10, -2 // Increment the dataptr
- + bfins r3, r3, 16, 16
- + punpckub.h r2, r1:t
- + padd.h r2, r2, r3
- + punpckub.h r1, r1:b
- + padd.h r1, r1, r3
- + packsh.ub r1, r2, r1
- + punpckub.h r2, r0:t
- + padd.h r2, r2, r3
- + punpckub.h r0, r0:b
- + padd.h r0, r0, r3
- + packsh.ub r0, r2, r0
- + st.d r12[0], r0
- + add r12, r9 // increment rfp
- + stdsp SP[0], r12
- +
- + sub loop_cnt, 1//Decrement loop counter
- + brne FOR_COLOUMN//Perform loop one more time if loop_cnt is not zero
- +
- + sub sp, -8
- + popm r0-r3, r4-r7, pc//Pop back registers and PC
- +
- +AC_COLOUMN:
- +
- + ld.w r12, pc[coef_table - .]
- + ld.w r9, pc[coef_table - . + 4]
- +
- + addhh.w r4, r2:t, r2:b
- + mulhh.w r4, r4:b, r12:t // r4 = z1
- + mulhh.w r5, r2:b, r12:b
- + ld.w r12, pc[coef_table - . + 8]
- + mulhh.w r6, r2:t, r9:t
- + add r5, r4 // r5 = tmp2
- + add r6, r4 // r6 = tmp3
- +
- + addhh.w r7, r3:t, r3:b
- + subhh.w r8, r3:t, r3:b
- +
- + lsl r7, CONST_BITS
- + lsl r8, CONST_BITS
- +
- + add r2, r7, r6 // r2 = tmp10
- + sub r3, r7, r6 // r3 = tmp13
- + add r4, r8, r5 // r4 = tmp11
- + sub r5, r8, r5 // r5 = tmp12
- +
- + padd.h r6, r0, r1 // r6:t = z4, r6:b = z3
- + addhh.w r7, r6:t, r6:b
- + mulhh.w r7, r7:b, r9:b // r7 = z5
- +
- + ld.w r9, pc[coef_table - . + 12]
- + mulhh.w r8, r6:b, r12:t // r8 = z3
- + mulhh.w r6, r6:t, r12:b // r6 = z4
- +
- + add r8, r7
- + add r6, r7
- +
- + paddx.h r7, r0, r1 // r7:t = z2, r7:b = z1
- +
- + mulhh.w r12, r0:b, r9:t // r12 = tmp0
- + mulhh.w r0, r0:t, r9:b // r0 = tmp1
- + ld.w r9, pc[coef_table - . + 16]
- + add r12, r8
- + add r0, r6
- +
- + ld.w lr, pc[coef_table - . + 20]
- + machh.w r8, r1:b, r9:t // r8 = tmp2
- + machh.w r6, r1:t, r9:b // r6 = tmp3
- + mulhh.w r9, r7:b, lr:t // r9 = z1
- + mulhh.w r7, r7:t, lr:b // r7 = z2
- +
- +
- + add r12, r9
- + add r0, r7
- + add r8, r7
- + add r6, r9
- +
- + add r1, r2, r6 // r1 = dataptr[DCTSIZE*0]
- + sub r2, r2, r6 // r2 = dataptr[DCTSIZE*7]
- + add r6, r4, r8 // r6 = dataptr[DCTSIZE*1]
- + sub r4, r4, r8 // r4 = dataptr[DCTSIZE*6]
- + add r8, r5, r0 // r8 = dataptr[DCTSIZE*2]
- + sub r5, r5, r0 // r5 = dataptr[DCTSIZE*5]
- + add r0, r3, r12 // r0 = dataptr[DCTSIZE*3]
- + sub r3, r3, r12 // r3 = dataptr[DCTSIZE*4]
- +
- + satrnds r1 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r2 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r6 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r4 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r8 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r5 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r0 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r3 >> (CONST_BITS+PASS1_BITS+3), 9
- +
- + packw.sh r1, r1, r6
- + packw.sh r8, r8, r0
- + packw.sh r3, r3, r5
- + packw.sh r4, r4, r2
- +
- + lddsp r12, SP[0] // rfp
- + lddsp r9, SP[4] // iinc
- + ld.d r6, r12[0]
- + sub r10, -2 // Increment the dataptr
- + punpckub.h r0, r7:t
- + padd.h r1, r1, r0
- + punpckub.h r0, r7:b
- + padd.h r8, r8, r0
- + packsh.ub r7, r1, r8
- + punpckub.h r0, r6:t
- + padd.h r3, r3, r0
- + punpckub.h r0, r6:b
- + padd.h r4, r4, r0
- + packsh.ub r6, r3, r4
- + st.d r12[0], r6
- + add r12, r9 // increment rfp
- + stdsp SP[0], r12
- +
- + sub loop_cnt, 1 //Decrement loop counter
- + brne FOR_COLOUMN //Perform loop one more time if loop_cnt is not zero
- +
- + sub sp, -8
- + popm r0-r3, r4-r7, pc //Pop back registers and PC
- +
- +
- +
- +//Coeffisient Table:
- + .align 2
- +coef_table:
- + .short FIX_0_541196100, -FIX_1_847759065, FIX_0_765366865, FIX_1_175875602
- + .short - FIX_1_961570560, - FIX_0_390180644, FIX_0_298631336, FIX_2_053119869
- + .short FIX_3_072711026, FIX_1_501321110, - FIX_0_899976223, - FIX_2_562915447
- +
- +
- +idct_put_avr32:
- + pushm r0-r3, r4-r7, lr //Free up registers to use for local variables
- +
- + //; Give room for some variables on the stack
- + sub sp, 8
- + stdsp SP[0], r12 // rfp
- + stdsp SP[4], r11 // iinc
- +
- + mov loop_cnt, 8 //Initialize loop counter
- +
- +0:
- +
- + ldm r10, r0, r1, r2, r3 //Load 8 DCT-coeffisients from the current row in the DCT-block
- + mov r6, 0
- +#ifdef USE_PREFETCH
- + pref r10[LINE_SIZE] //Prefetch next line
- +#endif
- + or r4, r2, r3 << 16
- + or r4, r1 //Check if all DCT-coeffisients except the DC is zero
- + or r4, r0
- + brne 1f //If there are non-zero AC coeffisients perform row-transform
- +
- + paddsub.h r5, r3:t, r6:b //Extract the DC-coeff from r5
- + plsl.h r5, r5, PASS1_BITS
- + mov r4, r5
- + st.d r10++, r4
- + st.d r10++, r4
- +
- + sub loop_cnt, 1 //Decrement loop counter
- + brne 0b //Perform loop one more time if loop_cnt is not zero
- +
- + bral 2f //Perform coloumn transform after row transform is computed
- +
- +1:
- +
- + ld.w r12, pc[coef_table_copy - .]
- + ld.w r9, pc[coef_table_copy - . + 4]
- +
- + padd.h r4, r2, r0 // r4:t = dataptr[2] + dataptr[6],r4:b = dataptr[3] + dataptr[7]
- + mulhh.w r5, r4:t, r12:t
- + mulhh.w r6, r0:t, r12:b
- + ld.w r12, pc[coef_table_copy - . + 8]
- + mulhh.w r7, r2:t, r9:t
- + add r6, r5 // tmp2
- + satrnds r6 >> (CONST_BITS - PASS1_BITS), 31
- + add r7, r5 // tmp3
- + satrnds r7 >> (CONST_BITS - PASS1_BITS), 31
- +
- + paddsub.h r5, r3:t, r1:t
- + plsl.h r5, r5, PASS1_BITS // r5:t = tmp0, r5:b = tmp1
- +
- + paddsub.h r7, r5:t, r7:b // r7:t = tmp10, r7:b = tmp13
- + paddsub.h r6, r5:b, r6:b // r6:t = tmp11, r6:b = tmp12
- +
- +
- +
- + addhh.w lr, r3:b, r1:b // lr = z4
- + addhh.w r5, r4:b, lr:b
- + mulhh.w r5, r5:b, r9:b // r5 = z5
- +
- + ld.w r9, pc[coef_table_copy - . + 12]
- + mulhh.w r4, r4:b, r12:t // r4 = z3
- + mulhh.w lr, lr:b, r12:b // lr = z4
- +
- + add r4, r5
- + add lr, r5
- +
- + addhh.w r5, r2:b, r1:b // r5 = z2
- + addhh.w r8, r3:b, r0:b // r8 = z1
- +
- +
- + mulhh.w r0, r0:b, r9:t // r0 = tmp0
- + ld.w r12, pc[coef_table_copy - . + 16]
- + mulhh.w r1, r1:b, r9:b // r1 = tmp1
- + ld.w r9, pc[coef_table_copy - . + 20]
- + mulhh.w r2, r2:b, r12:t // r2 = tmp2
- + mulhh.w r3, r3:b, r12:b // r3 = tmp3
- + mulhh.w r8, r8:b, r9:t // r8 = z1
- + mulhh.w r5, r5:b, r9:b // r5 = z2
- +
- +
- + add r0, r8
- + add r0, r4
- + add r1, r5
- + add r1, lr
- + add r2, r5
- + add r2, r4
- + add r3, r8
- + add r3, lr
- +
- + satrnds r0 >> (CONST_BITS - PASS1_BITS), 31
- + satrnds r1 >> (CONST_BITS - PASS1_BITS), 31
- + satrnds r2 >> (CONST_BITS - PASS1_BITS), 31
- + satrnds r3 >> (CONST_BITS - PASS1_BITS), 31
- +
- + paddsub.h r5, r6:t, r2:b // r5:t = dataptr[1], r5:b = dataptr[6]
- + paddsub.h r4, r7:t, r3:b // r4:t = dataptr[0], r4:b = dataptr[7]
- + paddsub.h r3, r6:b, r1:b // r3:t = dataptr[2], r3:b = dataptr[5]
- + paddsub.h r2, r7:b, r0:b // r2:t = dataptr[3], r2:b = dataptr[4]
- +
- + sthh.w r10[0], r4:t, r5:t
- + sthh.w r10[4], r3:t, r2:t
- + sthh.w r10[8], r2:b, r3:b
- + sthh.w r10[12], r5:b, r4:b
- +
- +
- +
- + sub r10, -16
- + sub loop_cnt, 1
- + brne 0b
- +
- +2:
- +
- + sub r10, 128 //Set pointer to start of DCT block
- +
- + mov loop_cnt, 8
- +
- +0:
- + ldins.h r3:t,r10[0] // r3:t = dataptr[0]
- + ldins.h r1:t,r10[1*8*2]// r1:t = dataptr[1]
- + ldins.h r2:t,r10[2*8*2]// r2:t = dataptr[2]
- + ldins.h r0:t,r10[5*8*2]// r0:t = dataptr[5]
- + ldins.h r3:b,r10[4*8*2]// r3:b = dataptr[4]
- + ldins.h r1:b,r10[3*8*2]// r1:b = dataptr[3]
- + ldins.h r2:b,r10[6*8*2]// r2:b = dataptr[6]
- + ldins.h r0:b,r10[7*8*2]// r0:b = dataptr[7]
- +
- + or r4, r1, r3 << 16
- + or r4, r2
- + or r4, r0
- + brne 1f //If there are non-zero AC coeffisients perform row-transform
- +
- + lddsp r12, SP[0] // rfp
- + lddsp r9, SP[4] // iinc
- + satrnds r3 >> ( PASS1_BITS + 3 + 16 ), 31
- + packw.sh r3, r3, r3
- + packsh.ub r3, r3, r3
- + mov r2, r3
- + st.d r12[0], r2
- + add r12, r9 // increment rfp
- + sub r10, -2 // Increment the dataptr
- + stdsp SP[0], r12
- +
- + sub loop_cnt, 1//Decrement loop counter
- + brne 0b //Perform loop one more time if loop_cnt is not zero
- +
- + sub sp, -8
- + popm r0-r3, r4-r7, pc//Pop back registers and PC
- +
- +1:
- +
- + ld.w r12, pc[coef_table_copy - .]
- + ld.w r9, pc[coef_table_copy - . + 4]
- +
- + addhh.w r4, r2:t, r2:b
- + mulhh.w r4, r4:b, r12:t // r4 = z1
- + mulhh.w r5, r2:b, r12:b
- + ld.w r12, pc[coef_table_copy - . + 8]
- + mulhh.w r6, r2:t, r9:t
- + add r5, r4 // r5 = tmp2
- + add r6, r4 // r6 = tmp3
- +
- + addhh.w r7, r3:t, r3:b
- + subhh.w r8, r3:t, r3:b
- +
- + lsl r7, CONST_BITS
- + lsl r8, CONST_BITS
- +
- + add r2, r7, r6 // r2 = tmp10
- + sub r3, r7, r6 // r3 = tmp13
- + add r4, r8, r5 // r4 = tmp11
- + sub r5, r8, r5 // r5 = tmp12
- +
- +
- + padd.h r6, r0, r1 // r6:t = z4, r6:b = z3
- + addhh.w r7, r6:t, r6:b
- + mulhh.w r7, r7:b, r9:b // r7 = z5
- +
- + ld.w r9, pc[coef_table_copy - . + 12]
- + mulhh.w r8, r6:b, r12:t // r8 = z3
- + mulhh.w r6, r6:t, r12:b // r6 = z4
- +
- + add r8, r7
- + add r6, r7
- +
- + paddx.h r7, r0, r1 // r7:t = z2, r7:b = z1
- +
- + mulhh.w r12, r0:b, r9:t // r12 = tmp0
- + mulhh.w r0, r0:t, r9:b // r0 = tmp1
- + ld.w r9, pc[coef_table_copy - . + 16]
- + add r12, r8
- + add r0, r6
- +
- + ld.w lr, pc[coef_table_copy - . + 20]
- + machh.w r8, r1:b, r9:t // r8 = tmp2
- + machh.w r6, r1:t, r9:b // r6 = tmp3
- + mulhh.w r9, r7:b, lr:t // r9 = z1
- + mulhh.w r7, r7:t, lr:b // r7 = z2
- +
- +
- + add r12, r9
- + add r0, r7
- + add r8, r7
- + add r6, r9
- +
- + add r1, r2, r6 // r1 = dataptr[DCTSIZE*0]
- + sub r2, r2, r6 // r2 = dataptr[DCTSIZE*7]
- + add r6, r4, r8 // r6 = dataptr[DCTSIZE*1]
- + sub r4, r4, r8 // r4 = dataptr[DCTSIZE*6]
- + add r8, r5, r0 // r8 = dataptr[DCTSIZE*2]
- + sub r5, r5, r0 // r5 = dataptr[DCTSIZE*5]
- + add r0, r3, r12 // r0 = dataptr[DCTSIZE*3]
- + sub r3, r3, r12 // r3 = dataptr[DCTSIZE*4]
- +
- + satrnds r1 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r2 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r6 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r4 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r8 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r5 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r0 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r3 >> (CONST_BITS+PASS1_BITS+3), 9
- +
- + packw.sh r1, r1, r6
- + packw.sh r8, r8, r0
- + packw.sh r3, r3, r5
- + packw.sh r4, r4, r2
- +
- + packsh.ub r1, r1, r8
- + packsh.ub r0, r3, r4
- + lddsp r12, SP[0] // rfp
- + lddsp r9, SP[4] // iinc
- + st.d r12[0], r0
- + sub r10, -2 // Increment the dataptr
- + add r12, r9 // increment rfp
- + stdsp SP[0], r12
- +
- + sub loop_cnt, 1 //Decrement loop counter
- + brne 0b //Perform loop one more time if loop_cnt is not zero
- +
- + sub sp, -8
- + popm r0-r3, r4-r7, pc //Pop back registers and PC
- +
- +
- +
- + .align 2
- +coef_table_copy:
- + .short FIX_0_541196100, -FIX_1_847759065, FIX_0_765366865, FIX_1_175875602
- + .short - FIX_1_961570560, - FIX_0_390180644, FIX_0_298631336, FIX_2_053119869
- + .short FIX_3_072711026, FIX_1_501321110, - FIX_0_899976223, - FIX_2_562915447
- +
- +
- +idct_avr32:
- + pushm r0-r3, r4-r7, lr //Free up registers to use for local variables
- +
- + //; Give room for a temporary block on the stack
- + sub sp, 8*8*2
- +
- + mov loop_cnt, 8 //Initialize loop counter
- +
- +0:
- +
- + ldm r12++, r0, r1, r2, r3 //Load 8 DCT-coeffisients from the current row in the DCT-block
- + mov r6, 0
- +#ifdef USE_PREFETCH
- + pref r12[LINE_SIZE] //Prefetch next line
- +#endif
- + or r4, r2, r3 << 16
- + or r4, r1 //Check if all DCT-coeffisients except the DC is zero
- + or r4, r0
- + brne 1f //If there are non-zero AC coeffisients perform row-transform
- +
- + paddsub.h r5, r3:t, r6:b //Extract the DC-coeff from r5
- + plsl.h r5, r5, PASS1_BITS
- + mov r4, r5
- + st.d sp++, r4
- + st.d sp++, r4
- +
- + sub loop_cnt, 1 //Decrement loop counter
- + brne 0b //Perform loop one more time if loop_cnt is not zero
- +
- + bral 2f //Perform coloumn transform after row transform is computed
- +
- +1:
- +
- + ld.w r10, pc[coef_table_idct - .]
- + ld.w r9, pc[coef_table_idct - . + 4]
- +
- + padd.h r4, r2, r0 // r4:t = dataptr[2] + dataptr[6],r4:b = dataptr[3] + dataptr[7]
- + mulhh.w r5, r4:t, r10:t
- + mulhh.w r6, r0:t, r10:b
- + ld.w r10, pc[coef_table_idct - . + 8]
- + mulhh.w r7, r2:t, r9:t
- + add r6, r5 // tmp2
- + satrnds r6 >> (CONST_BITS - PASS1_BITS), 31
- + add r7, r5 // tmp3
- + satrnds r7 >> (CONST_BITS - PASS1_BITS), 31
- +
- + paddsub.h r5, r3:t, r1:t
- + plsl.h r5, r5, PASS1_BITS // r5:t = tmp0, r5:b = tmp1
- +
- + paddsub.h r7, r5:t, r7:b // r7:t = tmp10, r7:b = tmp13
- + paddsub.h r6, r5:b, r6:b // r6:t = tmp11, r6:b = tmp12
- +
- +
- +
- + addhh.w lr, r3:b, r1:b // lr = z4
- + addhh.w r5, r4:b, lr:b
- + mulhh.w r5, r5:b, r9:b // r5 = z5
- +
- + ld.w r9, pc[coef_table_idct - . + 12]
- + mulhh.w r4, r4:b, r10:t // r4 = z3
- + mulhh.w lr, lr:b, r10:b // lr = z4
- +
- + add r4, r5
- + add lr, r5
- +
- + addhh.w r5, r2:b, r1:b // r5 = z2
- + addhh.w r8, r3:b, r0:b // r8 = z1
- +
- +
- + mulhh.w r0, r0:b, r9:t // r0 = tmp0
- + ld.w r10, pc[coef_table_idct - . + 16]
- + mulhh.w r1, r1:b, r9:b // r1 = tmp1
- + ld.w r9, pc[coef_table_idct - . + 20]
- + mulhh.w r2, r2:b, r10:t // r2 = tmp2
- + mulhh.w r3, r3:b, r10:b // r3 = tmp3
- + mulhh.w r8, r8:b, r9:t // r8 = z1
- + mulhh.w r5, r5:b, r9:b // r5 = z2
- +
- +
- + add r0, r8
- + add r0, r4
- + add r1, r5
- + add r1, lr
- + add r2, r5
- + add r2, r4
- + add r3, r8
- + add r3, lr
- +
- + satrnds r0 >> (CONST_BITS - PASS1_BITS), 31
- + satrnds r1 >> (CONST_BITS - PASS1_BITS), 31
- + satrnds r2 >> (CONST_BITS - PASS1_BITS), 31
- + satrnds r3 >> (CONST_BITS - PASS1_BITS), 31
- +
- + paddsub.h r5, r6:t, r2:b // r5:t = dataptr[1], r5:b = dataptr[6]
- + paddsub.h r4, r7:t, r3:b // r4:t = dataptr[0], r4:b = dataptr[7]
- + paddsub.h r3, r6:b, r1:b // r3:t = dataptr[2], r3:b = dataptr[5]
- + paddsub.h r2, r7:b, r0:b // r2:t = dataptr[3], r2:b = dataptr[4]
- +
- + sthh.w sp[0], r4:t, r5:t
- + sthh.w sp[4], r3:t, r2:t
- + sthh.w sp[8], r2:b, r3:b
- + sthh.w sp[12], r5:b, r4:b
- +
- +
- +
- + sub sp, -16
- + sub loop_cnt, 1
- + brne 0b
- +
- +2:
- +
- + sub sp, 8*8*2 //Set pointer to start of DCT block
- + sub r12, 8*8*2 //Set pointer to start of DCT block
- +
- + mov loop_cnt, 8
- +
- +0:
- + ldins.h r3:t,sp[0] // r3:t = dataptr[0]
- + ldins.h r1:t,sp[1*8*2]// r1:t = dataptr[1]
- + ldins.h r2:t,sp[2*8*2]// r2:t = dataptr[2]
- + ldins.h r0:t,sp[5*8*2]// r0:t = dataptr[5]
- + ldins.h r3:b,sp[4*8*2]// r3:b = dataptr[4]
- + ldins.h r1:b,sp[3*8*2]// r1:b = dataptr[3]
- + ldins.h r2:b,sp[6*8*2]// r2:b = dataptr[6]
- + ldins.h r0:b,sp[7*8*2]// r0:b = dataptr[7]
- +
- + or r4, r1, r3 << 16
- + or r4, r2
- + or r4, r0
- + brne 1f //If there are non-zero AC coeffisients perform row-transform
- +
- + satrnds r3 >> ( PASS1_BITS + 3 + 16 ), 31
- + packw.sh r3, r3, r3
- + mov r2, r3
- + st.d r12++, r2
- + st.d r12++, r2
- + sub sp, -2 // Increment the dataptr
- +
- + sub loop_cnt, 1//Decrement loop counter
- + brne 0b //Perform loop one more time if loop_cnt is not zero
- +
- + sub sp, -(8*8*2 - 8)
- + popm r0-r3, r4-r7, pc//Pop back registers and PC
- +
- +1:
- +
- + ld.w r10, pc[coef_table_idct - .]
- + ld.w r9, pc[coef_table_idct - . + 4]
- +
- + addhh.w r4, r2:t, r2:b
- + mulhh.w r4, r4:b, r10:t // r4 = z1
- + mulhh.w r5, r2:b, r10:b
- + ld.w r10, pc[coef_table_idct - . + 8]
- + mulhh.w r6, r2:t, r9:t
- + add r5, r4 // r5 = tmp2
- + add r6, r4 // r6 = tmp3
- +
- + addhh.w r7, r3:t, r3:b
- + subhh.w r8, r3:t, r3:b
- +
- + lsl r7, CONST_BITS
- + lsl r8, CONST_BITS
- +
- + add r2, r7, r6 // r2 = tmp10
- + sub r3, r7, r6 // r3 = tmp13
- + add r4, r8, r5 // r4 = tmp11
- + sub r5, r8, r5 // r5 = tmp12
- +
- +
- + padd.h r6, r0, r1 // r6:t = z4, r6:b = z3
- + addhh.w r7, r6:t, r6:b
- + mulhh.w r7, r7:b, r9:b // r7 = z5
- +
- + ld.w r9, pc[coef_table_idct - . + 12]
- + mulhh.w r8, r6:b, r10:t // r8 = z3
- + mulhh.w r6, r6:t, r10:b // r6 = z4
- +
- + add r8, r7
- + add r6, r7
- +
- + paddx.h r7, r0, r1 // r7:t = z2, r7:b = z1
- +
- + mulhh.w r10, r0:b, r9:t // r10 = tmp0
- + mulhh.w r0, r0:t, r9:b // r0 = tmp1
- + ld.w r9, pc[coef_table_idct - . + 16]
- + add r10, r8
- + add r0, r6
- +
- + ld.w lr, pc[coef_table_idct - . + 20]
- + machh.w r8, r1:b, r9:t // r8 = tmp2
- + machh.w r6, r1:t, r9:b // r6 = tmp3
- + mulhh.w r9, r7:b, lr:t // r9 = z1
- + mulhh.w r7, r7:t, lr:b // r7 = z2
- +
- +
- + add r10, r9
- + add r0, r7
- + add r8, r7
- + add r6, r9
- +
- + add r1, r2, r6 // r1 = dataptr[DCTSIZE*0]
- + sub r2, r2, r6 // r2 = dataptr[DCTSIZE*7]
- + add r6, r4, r8 // r6 = dataptr[DCTSIZE*1]
- + sub r4, r4, r8 // r4 = dataptr[DCTSIZE*6]
- + add r8, r5, r0 // r8 = dataptr[DCTSIZE*2]
- + sub r5, r5, r0 // r5 = dataptr[DCTSIZE*5]
- + add r0, r3, r10 // r0 = dataptr[DCTSIZE*3]
- + sub r3, r3, r10 // r3 = dataptr[DCTSIZE*4]
- +
- + satrnds r1 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r2 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r6 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r4 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r8 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r5 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r0 >> (CONST_BITS+PASS1_BITS+3), 9
- + satrnds r3 >> (CONST_BITS+PASS1_BITS+3), 9
- +
- + packw.sh r7, r1, r6
- + packw.sh r6, r8, r0
- + packw.sh r5, r3, r5
- + packw.sh r4, r4, r2
- +
- + stm r12, r4-r7
- + sub sp, -2 // Increment the dataptr
- + sub r12, -16
- +
- + sub loop_cnt, 1 //Decrement loop counter
- + brne 0b //Perform loop one more time if loop_cnt is not zero
- +
- + sub sp, -(8*8*2 - 8)
- + popm r0-r3, r4-r7, pc //Pop back registers and PC
- +
- +
- +
- + .align 2
- +coef_table_idct:
- + .short FIX_0_541196100, -FIX_1_847759065, FIX_0_765366865, FIX_1_175875602
- + .short - FIX_1_961570560, - FIX_0_390180644, FIX_0_298631336, FIX_2_053119869
- + .short FIX_3_072711026, FIX_1_501321110, - FIX_0_899976223, - FIX_2_562915447
- +
- diff --git a/libavcodec/avr32/mc.S b/libavcodec/avr32/mc.S
- new file mode 100644
- index 0000000..07a002d
- --- /dev/null
- +++ b/libavcodec/avr32/mc.S
- @@ -0,0 +1,434 @@
- +/*
- + * Copyright (c) 2007 Atmel Corporation. All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + *
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + *
- + * 2. Redistributions in binary form must reproduce the above
- + * copyright notice, this list of conditions and the following
- + * disclaimer in the documentation and/or other materials provided
- + * with the distribution.
- + *
- + * 3. The name of ATMEL may not be used to endorse or promote products
- + * derived from this software without specific prior written
- + * permission.
- + *
- + * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
- + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- + * DAMAGE.
- + */
- +
- +
- + /* Macro for masking the lowest bit of each byte in a
- + packed word */
- + .macro packedmask1 reg, round
- + .if \round
- + and \reg, \reg, r8 >> 1
- + .else
- + and \reg, r8
- + .endif
- + .endm
- +
- + /* Macro for 8 pixel wide horizontal and vertical interpolation functions */
- + .macro pixels8_hv round, put
- +
- +
- + pushm r0-r7, lr
- +
- + /* R12 = uint8_t *block, R11 = uint8_t pixels, R10 = int line_size, R9 = int h */
- +
- + /* Rounding immediate */
- + .if \round
- + mov r8, lo(0x02020202)
- + orh r8, hi(0x02020202)
- + .else
- + mov r8, lo(0x01010101)
- + orh r8, hi(0x01010101)
- + .endif
- + mov r7, 2
- +
- + /* Pixel naming convention :
- +
- + |-----------------------------------------------------|
- + | s00 | s01 | s02 | s03 | s04 | s05 | s06 | s07 | s08 |
- + |----d00---d01---d02---d03---d04---d05---d06---d07----|
- + | s10 | s11 | s12 | s13 | s14 | s15 | s16 | s17 | s18 |
- + |-----------------------------------------------------|
- + */
- +1:
- + ld.w r0, r11[0] // r0 = { s00, s01, s02, s03 }
- + ld.w r1, r11[1] // r1 = { s01, s02, s03, s04 }
- + mov lr, r9
- + eor r2, r0, r1
- + packedmask1 r2, \round
- + add r2, r8
- +
- + paddh.ub r0, r0, r1 // r0 = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2}
- +
- + add r11, r10 // pixels += line_size
- + ld.w r1, r11[0] // r1 = { s10, s11, s12, s13 }
- + ld.w r3, r11[1] // r3 = { s11, s12, s13, s14 }
- +0:
- + eor r5, r1, r3
- + packedmask1 r5, \round
- + add r2, r5
- +
- + paddh.ub r1, r1, r3 // r1 = {(s10+s11)/2,(s11+s12)/2,(s12+s13)/2,(s13+s14)/2}
- + eor r6, r0, r1
- + packedmask1 r6, \round
- + add r2, r2, r6 << 1
- +
- + ld.w r3, r11[r10] // r3 = { s00, s01, s02, s03 }
- + add r11, r10 // pixels += line_size
- + ld.w r4, r11[1] // r4 = { s01, s02, s03, s04 }
- +
- + paddh.ub r0, r0, r1
- + plsr.b r2, r2, 2
- + padd.b r0, r0, r2 // r0 = { d00, d01, d02, d03 }
- +
- + /* Next row */
- + .if \put
- + eor r2, r3, r4
- + packedmask1 r2, \round
- + add r2, r8
- + .else
- + ld.w r6, r12[0]
- + eor r2, r3, r4
- + packedmask1 r2, \round
- + add r2, r8
- + pavg.ub r0, r0, r6
- + .endif
- + st.w r12[0], r0 // Put data into the block
- +
- + add r5, r2
- + paddh.ub r0, r3, r4 // r0 = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2}
- +
- + eor r6, r0, r1
- + packedmask1 r6, \round
- + add r5, r5, r6 << 1
- +
- + .if \put
- + paddh.ub r1, r0, r1
- + plsr.b r5, r5, 2
- + padd.b r1, r1, r5 // r1 = { d10, d11, d12, d13 }
- + .else
- + ld.w r3, r12[r10]
- + paddh.ub r1, r0, r1
- + plsr.b r5, r5, 2
- + padd.b r1, r1, r5 // r1 = { d10, d11, d12, d13 }
- + pavg.ub r1, r1, r3
- + .endif
- +
- + st.w r12[r10], r1 // Put data into the block
- +
- +
- + ld.w r1, r11[r10] // r1 = { s10, s11, s12, s13 }
- + add r11, r10 // pixels += line_size
- + ld.w r3, r11[1] // r3 = { s11, s12, s13, s14 }
- + add r12, r12, r10 << 1 // block += 2*line_size
- + sub lr, 2
- + brne 0b
- +
- + mul r0, r10, r9 // r0 = line_size * h
- + rsub r0, r0, 4 // r0 = 4 - (line_size * h)
- + add r11, r0
- + sub r11, r10 // pixels += 4 - (line_size * (h+1))
- + add r12, r0 // pixels += 4 - (line_size * (h))
- + sub r7, 1
- + brne 1b
- +
- + popm r0-r7, pc
- + .endm
- +
- +
- + /* Macro for 8 pixel wide vertical interpolation functions */
- +
- + .macro pixels8_v round, put
- + pushm r4-r7,lr
- + /* R12 = uint8_t *block, R11 = uint8_t pixels, R10 = int line_size, R9 = int h */
- +
- + /*
- + Pixel Naming Convention :
- + |-----------------------------------------------|
- + | s00 | s01 | s02 | s03 | s04 | s05 | s06 | s07 |
- + |-d00---d01---d02---d03---d04---d05---d06---d07-|
- + | s10 | s11 | s12 | s13 | s14 | s15 | s16 | s17 |
- + |-----------------------------------------------|
- + */
- + ld.w r8, r11[r10] // r8 = { s10, s11, s12, s13 }
- + ld.w lr, r11++ // lr = { s00, s01, s02, s03 }, src += 4
- + ld.w r7, r11[0] // r7 = { s04, s05, s06, s07 }
- + ld.w r6, r11[r10] // r6 = { s14, s15, s16, s17 }
- + sub r10, 4 // stride -= 4
- + add r11, r11, r10 << 1 // src += 2*stride
- + sub r11, -4 // src += 4
- +
- +0:
- + .if \round
- + pavg.ub r5, r8, lr // r5 = {(s10+s00)/2,(s11+s01)/2,(s12+s02)/2,(s13+s03)/2}
- + pavg.ub r4, r6, r7 // r4 = {(s14+s04)/2,(s15+s05)/2,(s16+s06)/2,(s17+s07)/2}
- + .else
- + paddh.ub r5, r8, lr // r5 = {(s10+s00)/2,(s11+s01)/2,(s12+s02)/2,(s13+s03)/2}
- + paddh.ub r4, r6, r7 // r4 = {(s14+s04)/2,(s15+s05)/2,(s16+s06)/2,(s17+s07)/2}
- + .endif
- +
- + .if \put
- + st.w r12++, r5 // *dst++ = { d00, d01, d02, d03 }
- + ld.w lr, r11++ // lr = { s10, s11, s12, s13 }, src += 4
- + st.w r12[0], r4 // *dst = { d04, d05, d06, d07 }
- + ld.w r7, r11[0] // r7 = { s14, s15, s16, s17 }
- + .else
- + ld.w lr, r12[0]
- + ld.w r7, r12[4]
- + pavg.ub r5, r5, lr
- + pavg.ub r4, r4, r7
- + st.w r12++, r5 // *dst++ = { d00, d01, d02, d03 }
- + ld.w lr, r11++ // lr = { s10, s11, s12, s13 }, src += 4
- + st.w r12[0], r4 // *dst = { d04, d05, d06, d07 }
- + ld.w r7, r11[0] // r7 = { s14, s15, s16, s17 }
- + .endif
- + add r11, r10 // src += stride
- +#ifdef USE_PREFETCH
- + pref r11[0]
- +#endif
- + add r12, r10 // dst += stride
- +
- + .if \round
- + pavg.ub r5, r8, lr // r5 = {(s10+s00)/2,(s11+s01)/2,(s12+s02)/2,(s13+s03)/2}
- + pavg.ub r4, r6, r7 // r4 = {(s14+s04)/2,(s15+s05)/2,(s16+s06)/2,(s17+s07)/2}
- + .else
- + paddh.ub r5, r8, lr // r5 = {(s10+s00)/2,(s11+s01)/2,(s12+s02)/2,(s13+s03)/2}
- + paddh.ub r4, r6, r7 // r4 = {(s14+s04)/2,(s15+s05)/2,(s16+s06)/2,(s17+s07)/2}
- + .endif
- + .if \put
- + st.w r12++, r5 // *dst++ = { d00, d01, d02, d03 }
- + ld.w r8, r11++ // r8 = { s10, s11, s12, s13 }, src += 4
- + st.w r12[0], r4 // *dst = { d04, d05, d06, d07 }
- + ld.w r6, r11[0] // r6 = { s14, s15, s16, s17 }
- + .else
- + ld.w r8, r12[0]
- + ld.w r6, r12[4]
- + pavg.ub r5, r5, r8
- + pavg.ub r4, r4, r6
- + st.w r12++, r5 // *dst++ = { d00, d01, d02, d03 }
- + ld.w r8, r11++ // r8 = { s10, s11, s12, s13 }, src += 4
- + st.w r12[0], r4 // *dst = { d04, d05, d06, d07 }
- + ld.w r6, r11[0] // r6 = { s14, s15, s16, s17 }
- + .endif
- +
- + add r11, r10 // src += stride
- +#ifdef USE_PREFETCH
- + pref r11[0]
- +#endif
- + add r12, r10 // dst += stride
- + sub r9, 2
- + brne 0b
- +
- + popm r4-r7,pc
- + .endm
- +
- + /* Macro for 8 pixel wide horizontal interpolation functions */
- +
- + .macro pixels8_h round, put
- + pushm r4-r7, lr
- +
- + /* R12 = uint8_t *block, R11 = uint8_t pixels, R10 = int line_size, R9 = int h */
- + /*
- + Pixel Naming Convention:
- + |--------------------------------------------------------------------|
- + | s00 d00 s01 d01 s02 d02 s03 d03 s04 d04 s05 d05 s06 d06 s07 d07 s08|
- + |------|-------|-------|-------|-------|-------|-------|-------|-----|
- + | s10 d10 s11 d11 s12 d12 s13 d13 s14 d14 s15 d15 s16 d16 s17 d17 s18|
- + |--------------------------------------------------------------------|
- + */
- +
- + ld.w lr, r11[0] // lr = { s00, s01, s02, s03 }
- + ld.w r8, r11[1] // r8 = { s01, s02, s03, s04 }
- + ld.w r7, r11[4] // r7 = { s04, s05, s06, s07 }
- + ld.w r6, r11[5] // r6 = { s05, s06, s07, s08 }
- + add r11, r10 // src += stride
- +
- +0:
- + .if \round
- + pavg.ub lr, r8, lr // lr = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2}
- + pavg.ub r7, r6, r7 // r7 = {(s04+s05)/2,(s05+s06)/2,(s06+s07)/2,(s07+s08)/2}
- + .else
- + paddh.ub lr, r8, lr // lr = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2}
- + paddh.ub r7, r6, r7 // r7 = {(s04+s05)/2,(s05+s06)/2,(s06+s07)/2,(s07+s08)/2}
- + .endif
- + .if \put
- + ld.w r5, r11[0] // r5 = { s00, s01, s02, s03 }
- + ld.w r4, r11[1] // r4 = { s01, s02, s03, s04 }
- + .else
- + ld.w r8, r12[0]
- + ld.w r6, r12[4]
- + ld.w r5, r11[0] // r5 = { s00, s01, s02, s03 }
- + ld.w r4, r11[1] // r4 = { s01, s02, s03, s04 }
- + pavg.ub lr, lr, r8
- + pavg.ub r7, r7, r6
- + .endif
- + st.w r12[0], lr // dst = { d00, d01, d02, d03 }
- + st.w r12[4], r7 // dst = { d04, d05, d06, d07 }
- + ld.w r8, r11[4] // r8 = { s04, s05, s06, s07 }
- + ld.w r6, r11[5] // r6 = { s05, s06, s07, s08 }
- + add r11, r10 // src += stride
- +#ifdef USE_PREFETCH
- + pref r11[0]
- +#endif
- + add r12, r10 // dst += stride
- +
- + .if \round
- + pavg.ub r5, r4, r5 // r5 = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2}
- + pavg.ub r4, r6, r8 // r4 = {(s04+s05)/2,(s05+s06)/2,(s06+s07)/2,(s07+s08)/2}
- + .else
- + paddh.ub r5, r4, r5 // r5 = {(s00+s01)/2,(s01+s02)/2,(s02+s03)/2,(s03+s04)/2}
- + paddh.ub r4, r6, r8 // r4 = {(s04+s05)/2,(s05+s06)/2,(s06+s07)/2,(s07+s08)/2}
- + .endif
- + .if \put
- + ld.w lr, r11[0] // lr = { s00, s01, s02, s03 }
- + ld.w r8, r11[1] // r8 = { s01, s02, s03, s04 }
- + .else
- + ld.w r7, r12[0]
- + ld.w r6, r12[4]
- + ld.w lr, r11[0] // lr = { s00, s01, s02, s03 }
- + ld.w r8, r11[1] // r8 = { s01, s02, s03, s04 }
- + pavg.ub r5, r5, r7
- + pavg.ub r4, r4, r6
- + .endif
- + st.w r12[0], r5 // dst = { d00, d01, d02, d03 }
- + st.w r12[4], r4 // dst = { d04, d05, d06, d07 }
- + ld.w r7, r11[4] // r7 = { s04, s05, s06, s07 }
- + ld.w r6, r11[5] // r6 = { s05, s06, s07, s08 }
- + add r11, r10 // src += stride
- +#ifdef USE_PREFETCH
- + pref r11[0]
- +#endif
- + add r12, r10 // dst += stride
- + sub r9, 2
- + brne 0b
- +
- + popm r4-r7, pc
- + .endm
- +
- + /* Macro for 8 pixel wide copy functions */
- + .macro pixels8 put
- + stm --sp, r3-r7,lr
- + /* R12 = uint8_t *block, R11 = uint8_t pixels, R10 = int line_size, R9 = int h */
- + mov lr, r9
- + sub r3, r10, 2 // stride2 = stride - 2
- +0:
- + .if \put
- + ld.w r9, r11[r10] // r9 = { s10, s11, s12, s13 }
- + ld.w r7, r11++ // r7 = { s00, s01, s02, s03 }, src += 4
- + ld.w r6, r11[0] // r6 = { s04, s05, s06, s07 }
- + ld.w r8, r11[r10] // r8 = { s14, s15, s16, s17 }
- + .else
- + ld.w r9, r11[r10] // r9 = { s10, s11, s12, s13 }
- + ld.d r4, r12[0]
- + ld.w r7, r11++ // r7 = { s00, s01, s02, s03 }, src += 4
- + ld.w r6, r11[0] // r6 = { s04, s05, s06, s07 }
- + ld.w r8, r11[r10] // r8 = { s14, s15, s16, s17 }
- + pavg.ub r6, r6, r4
- + pavg.ub r7, r7, r5
- + ld.d r4, r12[r10]
- + .endif
- + st.d r12, r6 // *dst = { s00, s01, s02, s03, s04, s05, s06, s07 }
- + add r11, r11, r3 << 1 // src += stride2 * 2
- + .ifeq \put
- + pavg.ub r8, r8, r4
- + pavg.ub r9, r9, r5
- + .endif
- + st.d r12[r10 << 0], r8 // *(dst + stride) = { s10, s11, s12, s13, s14, s15, s16, s17 }
- + add r12, r12, r10 << 1 // dst += 2*stride
- + sub lr, 2
- + brne 0b
- + ldm sp++, r3-r7,pc
- +
- + .endm
- +
- + .global put_no_rnd_pixels8_hv_avr32
- + .text
- +put_no_rnd_pixels8_hv_avr32:
- + pixels8_hv 0, 1
- +
- + .global put_pixels8_hv_avr32
- + .text
- +put_pixels8_hv_avr32:
- + pixels8_hv 1, 1
- +
- + .global avg_no_rnd_pixels8_hv_avr32
- + .text
- +avg_no_rnd_pixels8_hv_avr32:
- + pixels8_hv 0, 0
- +
- + .global avg_pixels8_hv_avr32
- + .text
- +avg_pixels8_hv_avr32:
- + pixels8_hv 1, 0
- +
- + .global put_no_rnd_pixels8_v_avr32
- + .text
- +put_no_rnd_pixels8_v_avr32:
- + pixels8_v 0, 1
- +
- + .global put_pixels8_v_avr32
- + .text
- +put_pixels8_v_avr32:
- + pixels8_v 1, 1
- +
- + .global avg_no_rnd_pixels8_v_avr32
- + .text
- +avg_no_rnd_pixels8_v_avr32:
- + pixels8_v 0, 0
- +
- + .global avg_pixels8_v_avr32
- + .text
- +avg_pixels8_v_avr32:
- + pixels8_v 1, 0
- +
- + .global put_no_rnd_pixels8_h_avr32
- + .text
- +put_no_rnd_pixels8_h_avr32:
- + pixels8_h 0, 1
- +
- + .global put_pixels8_h_avr32
- + .text
- +put_pixels8_h_avr32:
- + pixels8_h 1, 1
- +
- + .global avg_no_rnd_pixels8_h_avr32
- + .text
- +avg_no_rnd_pixels8_h_avr32:
- + pixels8_h 0, 0
- +
- + .global avg_pixels8_h_avr32
- + .text
- +avg_pixels8_h_avr32:
- + pixels8_h 1, 0
- +
- + .global put_pixels8_avr32
- + .global put_no_rnd_pixels8_avr32
- + .text
- +put_pixels8_avr32:
- +put_no_rnd_pixels8_avr32:
- + pixels8 1
- +
- + .global avg_no_rnd_pixels8_avr32
- + .global avg_pixels8_avr32
- + .text
- +avg_pixels8_avr32:
- +avg_no_rnd_pixels8_avr32:
- + pixels8 0
- diff --git a/libavcodec/avr32/pico.h b/libavcodec/avr32/pico.h
- new file mode 100644
- index 0000000..32201ba
- --- /dev/null
- +++ b/libavcodec/avr32/pico.h
- @@ -0,0 +1,260 @@
- +/*
- + * Copyright (c) 2007 Atmel Corporation. All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + *
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + *
- + * 2. Redistributions in binary form must reproduce the above
- + * copyright notice, this list of conditions and the following
- + * disclaimer in the documentation and/or other materials provided
- + * with the distribution.
- + *
- + * 3. The name of ATMEL may not be used to endorse or promote products
- + * derived from this software without specific prior written
- + * permission.
- + *
- + * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
- + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- + * DAMAGE.
- + */
- +#ifndef __PICO_H__
- +#define __PICO_H__
- +
- +
- +
- +/* Coprocessor Number */
- +#define PICO_CPNO 1
- +
- +/* Pixel Coprocessor Register file */
- +#define PICO_REGVECT_INPIX2 cr0
- +#define PICO_REGVECT_INPIX1 cr1
- +#define PICO_REGVECT_INPIX0 cr2
- +#define PICO_REGVECT_OUTPIX2 cr3
- +#define PICO_REGVECT_OUTPIX1 cr4
- +#define PICO_REGVECT_OUTPIX0 cr5
- +#define PICO_REGVECT_COEFF0_A cr6
- +#define PICO_REGVECT_COEFF0_B cr7
- +#define PICO_REGVECT_COEFF1_A cr8
- +#define PICO_REGVECT_COEFF1_B cr9
- +#define PICO_REGVECT_COEFF2_A cr10
- +#define PICO_REGVECT_COEFF2_B cr11
- +#define PICO_REGVECT_VMU0_OUT cr12
- +#define PICO_REGVECT_VMU1_OUT cr13
- +#define PICO_REGVECT_VMU2_OUT cr14
- +#define PICO_REGVECT_CONFIG cr15
- +
- +#define PICO_INPIX2 0
- +#define PICO_INPIX1 1
- +#define PICO_INPIX0 2
- +#define PICO_OUTPIX2 3
- +#define PICO_OUTPIX1 4
- +#define PICO_OUTPIX0 5
- +#define PICO_COEFF0_A 6
- +#define PICO_COEFF0_B 7
- +#define PICO_COEFF1_A 8
- +#define PICO_COEFF1_B 9
- +#define PICO_COEFF2_A 10
- +#define PICO_COEFF2_B 11
- +#define PICO_VMU0_OUT 12
- +#define PICO_VMU1_OUT 13
- +#define PICO_VMU2_OUT 14
- +#define PICO_CONFIG 15
- +
- +/* Config Register */
- +#define PICO_COEFF_FRAC_BITS_OFFSET 0
- +#define PICO_COEFF_FRAC_BITS_SIZE 4
- +#define PICO_OFFSET_FRAC_BITS_OFFSET 4
- +#define PICO_OFFSET_FRAC_BITS_SIZE 4
- +#define PICO_INPUT_MODE_OFFSET 8
- +#define PICO_INPUT_MODE_SIZE 2
- +#define PICO_OUTPUT_MODE_OFFSET 10
- +#define PICO_OUTPUT_MODE_SIZE 1
- +
- +struct pico_config_t {
- + unsigned int : 32 - PICO_OUTPUT_MODE_OFFSET - PICO_OUTPUT_MODE_SIZE;
- + unsigned int output_mode : PICO_OUTPUT_MODE_SIZE;
- + unsigned int input_mode : PICO_INPUT_MODE_SIZE;
- + unsigned int offset_frac_bits : PICO_OFFSET_FRAC_BITS_SIZE;
- + unsigned int coeff_frac_bits : PICO_COEFF_FRAC_BITS_SIZE;
- + int vmu2_out;
- + int vmu1_out;
- + int vmu0_out;
- + short coeff2_2;
- + short coeff2_3;
- + short coeff2_0;
- + short coeff2_1;
- + short coeff1_2;
- + short coeff1_3;
- + short coeff1_0;
- + short coeff1_1;
- + short coeff0_2;
- + short coeff0_3;
- + short coeff0_0;
- + short coeff0_1;
- +};
- +
- +
- +#define PICO_COEFF_FRAC_BITS(x) (x << PICO_COEFF_FRAC_BITS_OFFSET)
- +#define PICO_OFFSET_FRAC_BITS(x) (x << PICO_OFFSET_FRAC_BITS_OFFSET)
- +#define PICO_INPUT_MODE(x) (x << PICO_INPUT_MODE_OFFSET)
- +#define PICO_OUTPUT_MODE(x) (x << PICO_OUTPUT_MODE_OFFSET)
- +
- +#define GET_PICO_COEFF_FRAC_BITS(x) ((x >> PICO_COEFF_FRAC_BITS_OFFSET)&((1 << PICO_COEFF_FRAC_BITS_SIZE)-1))
- +#define GET_PICO_OFFSET_FRAC_BITS(x) ((x >> PICO_OFFSET_FRAC_BITS_OFFSET)&((1 << PICO_OFFSET_FRAC_BITS_SIZE)-1))
- +#define GET_PICO_INPUT_MODE(x) ((x >> PICO_INPUT_MODE_OFFSET)&((1 << PICO_INPUT_MODE_SIZE)-1))
- +#define GET_PICO_OUTPUT_MODE(x) ((x >> PICO_OUTPUT_MODE_OFFSET)&((1 << PICO_OUTPUT_MODE_SIZE)-1))
- +
- +enum pico_input_mode { PICO_TRANSFORMATION_MODE,
- + PICO_HOR_FILTER_MODE,
- + PICO_VERT_FILTER_MODE };
- +
- +enum pico_output_mode { PICO_PACKED_MODE,
- + PICO_PLANAR_MODE };
- +
- +/* Bits in coefficients */
- +#define PICO_COEFF_BITS 12
- +
- +/* Operation bits */
- +#define PICO_MATRIX (0)
- +#define PICO_USE_ACC (1 << 2)
- +#define PICO_SINGLE_VECTOR (1 << 3)
- +
- +
- +#define __str(x...) #x
- +#define __xstr(x...) __str(x)
- +
- +#define PICO_PUT_W(pico_reg, x) \
- + __builtin_mvrc_w(PICO_CPNO, pico_reg, x);
- +#define PICO_GET_W(pico_reg) \
- + __builtin_mvcr_w(PICO_CPNO, pico_reg)
- +
- +#define PICO_MVCR_W(x, pico_reg) \
- + asm ("mvcr.w\tcp" __xstr(PICO_CPNO) ", %0, cr" __xstr(pico_reg) : "=r"(x));
- +
- +#define PICO_MVRC_W(pico_reg, x) \
- + asm ("mvrc.w\tcp" __xstr(PICO_CPNO) ", cr" __xstr(pico_reg) ", %0" :: "r"(x));
- +
- +#define PICO_PUT_D(pico_reg, x) \
- + __builtin_mvrc_d(PICO_CPNO, pico_reg, x);
- +#define PICO_GET_D(pico_reg) \
- + __builtin_mvcr_d(PICO_CPNO, pico_reg)
- +
- +#define PICO_MVCR_D(x, pico_reg) \
- + asm volatile ("mvcr.d\tcp" __xstr(PICO_CPNO) ", %0, cr" __xstr(pico_reg) : "=r"(x));
- +#define PICO_MVRC_D(pico_reg, x) \
- + asm volatile ("mvrc.d\tcp" __xstr(PICO_CPNO) ", cr" __xstr(pico_reg) ", %0" :: "r"(x));
- +
- +#define PICO_STCM_W(ptr, pico_regs...) \
- + asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
- +#define PICO_STCM_D(ptr, pico_regs...) \
- + asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
- +
- +#define PICO_STCM_W_DEC(ptr, pico_regs...) \
- + asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr));
- +#define PICO_STCM_D_DEC(ptr, pico_regs...) \
- + asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr));
- +
- +#define PICO_LDCM_W(ptr, pico_regs...) \
- + asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
- +#define PICO_LDCM_D(ptr, pico_regs...) \
- + asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
- +
- +#define PICO_LDCM_W_INC(ptr, pico_regs...) \
- + asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr));
- +#define PICO_LDCM_D_INC(ptr, pico_regs...) \
- + asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr));
- +
- +#define PICO_OP(op, dst_addr, addr0, addr1, addr2) \
- + __builtin_cop(PICO_CPNO, addr0, addr1, addr2, op | dst_addr);
- +
- +static inline void set_pico_config(struct pico_config_t *config){
- + PICO_LDCM_D(config,
- + PICO_REGVECT_COEFF0_A, PICO_REGVECT_COEFF0_B,
- + PICO_REGVECT_COEFF1_A, PICO_REGVECT_COEFF1_B,
- + PICO_REGVECT_COEFF2_A, PICO_REGVECT_COEFF2_B,
- + PICO_REGVECT_VMU0_OUT, PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT, PICO_REGVECT_CONFIG);
- +}
- +
- +static inline void get_pico_config(struct pico_config_t *config){
- + PICO_STCM_D(config,
- + PICO_REGVECT_COEFF0_A, PICO_REGVECT_COEFF0_B,
- + PICO_REGVECT_COEFF1_A, PICO_REGVECT_COEFF1_B,
- + PICO_REGVECT_COEFF2_A, PICO_REGVECT_COEFF2_B,
- + PICO_REGVECT_VMU0_OUT, PICO_REGVECT_VMU1_OUT,
- + PICO_REGVECT_VMU2_OUT, PICO_REGVECT_CONFIG);
- +}
- +
- +static inline void dump_pico_config(){
- + struct pico_config_t pico_config;
- + char *input_mode, *output_mode;
- + get_pico_config(&pico_config);
- +
- +
- + av_log(NULL, AV_LOG_INFO, "Dumping pico configuration:\n\n");
- + av_log(NULL, AV_LOG_INFO, "\tcoeff_frac_bits = %d\n", pico_config.coeff_frac_bits);
- + av_log(NULL, AV_LOG_INFO, "\toffset_frac_bits = %d\n", pico_config.offset_frac_bits);
- +
- + switch ( pico_config.input_mode ){
- + case PICO_TRANSFORMATION_MODE:
- + input_mode = "Transformation Mode";
- + break;
- + case PICO_HOR_FILTER_MODE:
- + input_mode = "Horisontal Filter Mode";
- + break;
- + case PICO_VERT_FILTER_MODE:
- + input_mode = "Vertical Filter Mode";
- + break;
- + default:
- + input_mode = "Unknown Mode!!";
- + break;
- + }
- + av_log(NULL, AV_LOG_INFO, "\tinput_mode = %s\n", input_mode);
- +
- + switch ( pico_config.output_mode ){
- + case PICO_PLANAR_MODE:
- + output_mode = "Planar Mode";
- + break;
- + case PICO_PACKED_MODE:
- + output_mode = "Packed Mode";
- + break;
- + default:
- + output_mode = "Unknown Mode!!";
- + break;
- + }
- +
- + av_log(NULL, AV_LOG_INFO, "\toutput_mode = %s\n", output_mode);
- +
- + av_log(NULL, AV_LOG_INFO, "\tCoeff0_0 = %f\n", (float)pico_config.coeff0_0/(float)(1 << pico_config.coeff_frac_bits));
- + av_log(NULL, AV_LOG_INFO, "\tCoeff0_1 = %f\n", (float)pico_config.coeff0_1/(float)(1 << pico_config.coeff_frac_bits));
- + av_log(NULL, AV_LOG_INFO, "\tCoeff0_2 = %f\n", (float)pico_config.coeff0_2/(float)(1 << pico_config.coeff_frac_bits));
- + av_log(NULL, AV_LOG_INFO, "\tCoeff0_3 = %f\n", (float)pico_config.coeff0_3/(float)(1 << pico_config.offset_frac_bits));
- +
- + av_log(NULL, AV_LOG_INFO, "\tCoeff1_0 = %f\n", (float)pico_config.coeff1_0/(float)(1 << pico_config.coeff_frac_bits));
- + av_log(NULL, AV_LOG_INFO, "\tCoeff1_1 = %f\n", (float)pico_config.coeff1_1/(float)(1 << pico_config.coeff_frac_bits));
- + av_log(NULL, AV_LOG_INFO, "\tCoeff1_2 = %f\n", (float)pico_config.coeff1_2/(float)(1 << pico_config.coeff_frac_bits));
- + av_log(NULL, AV_LOG_INFO, "\tCoeff1_3 = %f\n", (float)pico_config.coeff1_3/(float)(1 << pico_config.offset_frac_bits));
- +
- + av_log(NULL, AV_LOG_INFO, "\tCoeff2_0 = %f\n", (float)pico_config.coeff2_0/(float)(1 << pico_config.coeff_frac_bits));
- + av_log(NULL, AV_LOG_INFO, "\tCoeff2_1 = %f\n", (float)pico_config.coeff2_1/(float)(1 << pico_config.coeff_frac_bits));
- + av_log(NULL, AV_LOG_INFO, "\tCoeff2_2 = %f\n", (float)pico_config.coeff2_2/(float)(1 << pico_config.coeff_frac_bits));
- + av_log(NULL, AV_LOG_INFO, "\tCoeff2_3 = %f\n", (float)pico_config.coeff2_3/(float)(1 << pico_config.offset_frac_bits));
- +}
- +
- +
- +
- +#endif
- +
- diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h
- index 26b4f8d..1f8fabf 100644
- --- a/libavcodec/bitstream.h
- +++ b/libavcodec/bitstream.h
- @@ -171,7 +171,7 @@ typedef struct RL_VLC_ELEM {
- #endif
-
- /* used to avoid missaligned exceptions on some archs (alpha, ...) */
- -#if defined(ARCH_X86) || defined(ARCH_X86_64)
- +#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_AVR32)
- # define unaligned16(a) (*(const uint16_t*)(a))
- # define unaligned32(a) (*(const uint32_t*)(a))
- # define unaligned64(a) (*(const uint64_t*)(a))
- @@ -813,6 +813,44 @@ void free_vlc(VLC *vlc);
- * if the vlc code is invalid and max_depth>1 than the number of bits removed
- * is undefined
- */
- +
- +#if defined(ARCH_AVR32)
- +#define GET_VLC(code, name, gb, table, bits, max_depth)\
- +{\
- + int n, index, nb_bits;\
- + union { VLC_TYPE vlc[2];\
- + uint32_t u32; } table_elem;\
- +\
- + index= SHOW_UBITS(name, gb, bits);\
- + table_elem.u32 = unaligned32(&table[index]); \
- + code = table_elem.vlc[0];\
- + n = table_elem.vlc[1];\
- +\
- + if(max_depth > 1 && n < 0 ){\
- + LAST_SKIP_BITS(name, gb, bits)\
- + UPDATE_CACHE(name, gb)\
- +\
- + nb_bits = -n;\
- +\
- + index= SHOW_UBITS(name, gb, nb_bits) + code;\
- + table_elem.u32 = unaligned32(&table[index]); \
- + code = table_elem.vlc[0];\
- + n = table_elem.vlc[1];\
- + if(max_depth > 2 && n < 0){\
- + LAST_SKIP_BITS(name, gb, nb_bits)\
- + UPDATE_CACHE(name, gb)\
- +\
- + nb_bits = -n;\
- +\
- + index= SHOW_UBITS(name, gb, nb_bits) + code;\
- + code = table[index][0];\
- + n = table[index][1];\
- + }\
- + }\
- + SKIP_BITS(name, gb, n)\
- +}
- +
- +#else
- #define GET_VLC(code, name, gb, table, bits, max_depth)\
- {\
- int n, index, nb_bits;\
- @@ -821,7 +859,7 @@ void free_vlc(VLC *vlc);
- code = table[index][0];\
- n = table[index][1];\
- \
- - if(max_depth > 1 && n < 0){\
- + if(max_depth > 1 && n < 0 ){\
- LAST_SKIP_BITS(name, gb, bits)\
- UPDATE_CACHE(name, gb)\
- \
- @@ -843,7 +881,38 @@ void free_vlc(VLC *vlc);
- }\
- SKIP_BITS(name, gb, n)\
- }
- +#endif
-
- +#if defined(ARCH_AVR32)
- +#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
- +{\
- + int n, index, nb_bits;\
- + union { RL_VLC_ELEM vlc;\
- + uint32_t u32; } table_elem;\
- +\
- + index= SHOW_UBITS(name, gb, bits);\
- + table_elem.u32 = unaligned32(&table[index]); \
- + level = table_elem.vlc.level;\
- + n = table_elem.vlc.len;\
- +\
- + if(max_depth > 1 && n < 0 ){\
- + SKIP_BITS(name, gb, bits)\
- + if(need_update){\
- + UPDATE_CACHE(name, gb)\
- + }\
- +\
- + nb_bits = -n;\
- +\
- + index= SHOW_UBITS(name, gb, nb_bits) + level;\
- + table_elem.u32 = unaligned32(&table[index]); \
- + level = table_elem.vlc.level;\
- + n = table_elem.vlc.len;\
- + }\
- + run= table_elem.vlc.run;\
- + SKIP_BITS(name, gb, n)\
- +}
- +
- +#else
- #define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
- {\
- int n, index, nb_bits;\
- @@ -852,7 +921,7 @@ void free_vlc(VLC *vlc);
- level = table[index].level;\
- n = table[index].len;\
- \
- - if(max_depth > 1 && n < 0){\
- + if(max_depth > 1 && n < 0 ){\
- SKIP_BITS(name, gb, bits)\
- if(need_update){\
- UPDATE_CACHE(name, gb)\
- @@ -867,7 +936,7 @@ void free_vlc(VLC *vlc);
- run= table[index].run;\
- SKIP_BITS(name, gb, n)\
- }
- -
- +#endif
-
- /**
- * parses a vlc code, faster then get_vlc()
- diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
- index 56c42b9..8fc10c6 100644
- --- a/libavcodec/dsputil.c
- +++ b/libavcodec/dsputil.c
- @@ -4197,6 +4197,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
- #ifdef ARCH_BFIN
- dsputil_init_bfin(c,avctx);
- #endif
- +#ifdef ARCH_AVR32
- + dsputil_init_avr32(c,avctx);
- +#endif
-
- for(i=0; i<64; i++){
- if(!c->put_2tap_qpel_pixels_tab[0][i])
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index 865e80a..8f7c3f1 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -3258,7 +3258,12 @@ static void free_tables(H264Context *h){
-
- static void init_dequant8_coeff_table(H264Context *h){
- int i,q,x;
- +#ifdef ARCH_AVR32
- + const int transpose = 0;
- +#else
- const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
- +#endif
- +
- h->dequant8_coeff[0] = h->dequant8_buffer[0];
- h->dequant8_coeff[1] = h->dequant8_buffer[1];
-
- @@ -3281,7 +3286,13 @@ static void init_dequant8_coeff_table(H264Context *h){
-
- static void init_dequant4_coeff_table(H264Context *h){
- int i,j,q,x;
- + // Yes this is ugly as hell....
- +#ifdef ARCH_AVR32
- + const int transpose = 0;
- +#else
- const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
- +#endif
- +
- for(i=0; i<6; i++ ){
- h->dequant4_coeff[i] = h->dequant4_buffer[i];
- for(j=0; j<i; j++){
- @@ -4663,7 +4674,11 @@ static int decode_slice_header(H264Context *h){
- if (MPV_common_init(s) < 0)
- return -1;
-
- +#ifdef ARCH_AVR32
- + if ( 1 ){
- +#else
- if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
- +#endif
- memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
- memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
- }else{
- diff --git a/libavutil/common.h b/libavutil/common.h
- index 3ae5971..7e52b90 100644
- --- a/libavutil/common.h
- +++ b/libavutil/common.h
- @@ -283,23 +283,39 @@ static inline int mid_pred(int a, int b, int c)
- * @param amax maximum value of the clip range
- * @return cliped value
- */
- +#if defined(ARCH_AVR32)
- +#define clip(a, amin, amax) \
- + ({ int __tmp__; \
- + asm ("min\t%0, %1, %2\n" \
- + "max\t%0, %0, %3\n" \
- + : "=&r"(__tmp__) : "r"(a), "r"(amax), "r"(amin)); \
- + __tmp__; })
- +#else
- static inline int clip(int a, int amin, int amax)
- {
- if (a < amin) return amin;
- else if (a > amax) return amax;
- else return a;
- }
- +#endif
-
- /**
- * clip a signed integer value into the 0-255 range
- * @param a value to clip
- * @return cliped value
- */
- +#if defined(ARCH_AVR32)
- +#define clip_uint8(a) \
- + ({ int __tmp__ = a; \
- + asm ("satu\t%0 >> 0, 8" : "+r"(__tmp__)); \
- + __tmp__; })
- +#else
- static inline uint8_t clip_uint8(int a)
- {
- if (a&(~255)) return (-a)>>31;
- else return a;
- }
- +#endif
-
- /* math */
- int64_t ff_gcd(int64_t a, int64_t b);
- diff --git a/libavutil/internal.h b/libavutil/internal.h
- index 285d304..a8b0718 100644
- --- a/libavutil/internal.h
- +++ b/libavutil/internal.h
- @@ -210,6 +210,15 @@ if((y)<(x)){\
- }\
- }
-
- +/* XXX: Hack for uclibc which declares lrintf but does not implement it... */
- +#ifdef ARCH_AVR32
- +#undef HAVE_LRINTF
- +#define HAVE_LRINTF 1
- +#define lrintf(x) rint(x)
- +#define llrint(x) (long long)rint(x)
- +#endif
- +
- +
- #ifndef HAVE_LRINTF
- /* XXX: add ISOC specific test to avoid specific BSD testing. */
- /* better than nothing implementation. */
- diff --git a/libfaad2/common.h b/libfaad2/common.h
- index f809042..6c5fb21 100644
- --- a/libfaad2/common.h
- +++ b/libfaad2/common.h
- @@ -67,7 +67,7 @@ extern "C" {
- /* Use if target platform has address generators with autoincrement */
- //#define PREFER_POINTERS
-
- -#if defined(_WIN32_WCE) || defined(__arm__)
- +#if defined(_WIN32_WCE) || defined(__arm__) || defined(__avr32__)
- #define FIXED_POINT
- #endif
-
- diff --git a/libmpcodecs/ad_libmad.c b/libmpcodecs/ad_libmad.c
- index 076359a..51b77fe 100644
- --- a/libmpcodecs/ad_libmad.c
- +++ b/libmpcodecs/ad_libmad.c
- @@ -86,6 +86,11 @@ static int init(sh_audio_t *sh){
- sh->channels=(this->frame.header.mode == MAD_MODE_SINGLE_CHANNEL) ? 1 : 2;
- sh->samplerate=this->frame.header.samplerate;
- sh->i_bps=this->frame.header.bitrate/8;
- +#ifdef WORDS_BIGENDIAN
- + sh->sample_format = AF_FORMAT_S16_BE;
- +#else
- + sh->sample_format = AF_FORMAT_S16_LE;
- +#endif
- sh->samplesize=2;
-
- return 1;
- diff --git a/libswscale/pico-avr32.h b/libswscale/pico-avr32.h
- new file mode 100644
- index 0000000..7ac6200
- --- /dev/null
- +++ b/libswscale/pico-avr32.h
- @@ -0,0 +1,137 @@
- +/*
- + * Copyright (c) 2007 Atmel Corporation. All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + *
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + *
- + * 2. Redistributions in binary form must reproduce the above
- + * copyright notice, this list of conditions and the following
- + * disclaimer in the documentation and/or other materials provided
- + * with the distribution.
- + *
- + * 3. The name of ATMEL may not be used to endorse or promote products
- + * derived from this software without specific prior written
- + * permission.
- + *
- + * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
- + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- + * DAMAGE.
- + */
- +#ifndef __PICO_H__
- +#define __PICO_H__
- +
- +/* Coprocessor Number */
- +#define PICO_CPNO 1
- +
- +/* Pixel Coprocessor Register file */
- +#define PICO_REGVECT_INPIX2 cr0
- +#define PICO_REGVECT_INPIX1 cr1
- +#define PICO_REGVECT_INPIX0 cr2
- +#define PICO_REGVECT_OUTPIX2 cr3
- +#define PICO_REGVECT_OUTPIX1 cr4
- +#define PICO_REGVECT_OUTPIX0 cr5
- +#define PICO_REGVECT_COEFF0_A cr6
- +#define PICO_REGVECT_COEFF0_B cr7
- +#define PICO_REGVECT_COEFF1_A cr8
- +#define PICO_REGVECT_COEFF1_B cr9
- +#define PICO_REGVECT_COEFF2_A cr10
- +#define PICO_REGVECT_COEFF2_B cr11
- +#define PICO_REGVECT_VMU0_OUT cr12
- +#define PICO_REGVECT_VMU1_OUT cr13
- +#define PICO_REGVECT_VMU2_OUT cr14
- +#define PICO_REGVECT_CONFIG cr15
- +
- +#define PICO_INPIX2 0
- +#define PICO_INPIX1 1
- +#define PICO_INPIX0 2
- +#define PICO_OUTPIX2 3
- +#define PICO_OUTPIX1 4
- +#define PICO_OUTPIX0 5
- +#define PICO_COEFF0_A 6
- +#define PICO_COEFF0_B 7
- +#define PICO_COEFF1_A 8
- +#define PICO_COEFF1_B 9
- +#define PICO_COEFF2_A 10
- +#define PICO_COEFF2_B 11
- +#define PICO_VMU0_OUT 12
- +#define PICO_VMU1_OUT 13
- +#define PICO_VMU2_OUT 14
- +#define PICO_CONFIG 15
- +
- +/* Config Register */
- +#define PICO_COEFF_FRAC_BITS 0
- +#define PICO_COEFF_FRAC_BITS_WIDTH 4
- +#define PICO_OFFSET_FRAC_BITS 4
- +#define PICO_OFFSET_FRAC_BITS_WIDTH 4
- +#define PICO_INPUT_MODE 8
- +#define PICO_INPUT_MODE_WIDTH 2
- +#define PICO_OUTPUT_MODE 10
- +
- +#define PICO_TRANSFORMATION_MODE 0
- +#define PICO_HOR_FILTER_MODE 1
- +#define PICO_VERT_FILTER_MODE 2
- +
- +#define PICO_PLANAR_MODE 1
- +#define PICO_PACKED_MODE 0
- +
- +/* Bits in coefficients */
- +#define PICO_COEFF_BITS 12
- +
- +/* Operation bits */
- +#define PICO_USE_ACC (1 << 2)
- +#define PICO_SINGLE_VECTOR (1 << 3)
- +
- +
- +#define __str(x...) #x
- +#define __xstr(x...) __str(x)
- +
- +#define PICO_PUT_W(pico_reg, x) \
- + __builtin_mvrc_w(PICO_CPNO, pico_reg, x);
- +#define PICO_GET_W(pico_reg) \
- + __builtin_mvcr_w(PICO_CPNO, pico_reg)
- +
- +#define PICO_PUT_D(pico_reg, x) \
- + __builtin_mvrc_d(PICO_CPNO, pico_reg, x);
- +#define PICO_GET_D(pico_reg) \
- + __builtin_mvcr_d(PICO_CPNO, pico_reg)
- +
- +
- +#define PICO_STCM_W(ptr, pico_regs...) \
- + asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
- +#define PICO_STCM_D(ptr, pico_regs...) \
- + asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
- +
- +#define PICO_STCM_W_DEC(ptr, pico_regs...) \
- + asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr));
- +#define PICO_STCM_D_DEC(ptr, pico_regs...) \
- + asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr));
- +
- +#define PICO_LDCM_W(ptr, pico_regs...) \
- + asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
- +#define PICO_LDCM_D(ptr, pico_regs...) \
- + asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
- +
- +#define PICO_LDCM_W_INC(ptr, pico_regs...) \
- + asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr));
- +#define PICO_LDCM_D_INC(ptr, pico_regs...) \
- + asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr));
- +
- +#define PICO_OP(op, dst_addr, addr0, addr1, addr2) \
- + __builtin_cop(PICO_CPNO, addr0, addr1, addr2, op | dst_addr);
- +
- +
- +#endif
- +
- diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
- index ecd28f5..3221d0c 100644
- --- a/libswscale/swscale_internal.h
- +++ b/libswscale/swscale_internal.h
- @@ -173,7 +173,7 @@ typedef struct SwsContext{
- SwsFunc yuv2rgb_get_func_ptr (SwsContext *c);
- int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation);
-
- -char *sws_format_name(int format);
- +char *sws_format_name(enum PixelFormat format);
-
- //FIXME replace this with something faster
- #define isPlanarYUV(x) ((x)==PIX_FMT_YUV410P || (x)==PIX_FMT_YUV420P \
- diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
- index 71759bc..fa83985 100644
- --- a/libswscale/yuv2rgb.c
- +++ b/libswscale/yuv2rgb.c
- @@ -44,6 +44,10 @@
- #include "yuv2rgb_mlib.c"
- #endif
-
- +#ifdef ARCH_AVR32
- +#include "yuv2rgb_avr32.c"
- +#endif
- +
- #define DITHER1XBPP // only for mmx
-
- const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={
- @@ -601,6 +605,12 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c)
- if(t) return t;
- }
- #endif
- +#ifdef ARCH_AVR32
- + {
- + SwsFunc t= yuv2rgb_init_avr32(c);
- + if(t) return t;
- + }
- +#endif
- #ifdef HAVE_ALTIVEC
- if (c->flags & SWS_CPU_CAPS_ALTIVEC)
- {
- @@ -678,6 +688,10 @@ int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange,
- //printf("%lld %lld %lld %lld %lld\n", cy, crv, cbu, cgu, cgv);
- oy -= 256*brightness;
-
- +#ifdef ARCH_AVR32
- + yuv2rgb_c_init_tables_avr32 (c, inv_table, fullRange, brightness, contrast, saturation);
- +#endif
- +
- for (i = 0; i < 1024; i++) {
- int j;
-
- diff --git a/libswscale/yuv2rgb_avr32.c b/libswscale/yuv2rgb_avr32.c
- new file mode 100644
- index 0000000..4a8341e
- --- /dev/null
- +++ b/libswscale/yuv2rgb_avr32.c
- @@ -0,0 +1,416 @@
- +/*
- + * Copyright (c) 2007 Atmel Corporation. All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + *
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + *
- + * 2. Redistributions in binary form must reproduce the above
- + * copyright notice, this list of conditions and the following
- + * disclaimer in the documentation and/or other materials provided
- + * with the distribution.
- + *
- + * 3. The name of ATMEL may not be used to endorse or promote products
- + * derived from this software without specific prior written
- + * permission.
- + *
- + * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
- + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- + * DAMAGE.
- + */
- +#include "pico-avr32.h"
- +
- +
- +#define RGB(uv_part) \
- + __asm__ volatile ( \
- + "ld.w\t%0, %3[%7:" uv_part " << 2]\n\t" /* tmp = c->table_gV[V] */ \
- + "ld.w\t%1, %4[%8:" uv_part " << 2]\n\t" /* g = c->table_gU[U] */ \
- + "ld.w\t%2, %5[%8:" uv_part " << 2]\n\t" /* b = c->table_bU[U] */ \
- + "add\t%1, %0\n\t" /* g += tmp */\
- + "ld.w\t%0, %6[%7:" uv_part " << 2]" /* r = c->table_rV[V] */ \
- + : "=&r" (r), "=&r" (g), "=&r" (b) \
- + : "r" (&c->table_gV[0]), "r" (&c->table_gU[0]),"r" (&c->table_bU[0]), \
- + "r" (&c->table_rV[0]), "r" (V), "r" (U));
- +
- +
- +#undef YUV2RGB1
- +#define YUV2RGB1(dst, src, y, idx) \
- + { int tmp2; __asm__ volatile ( \
- + "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
- + "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \
- + "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[2] = tmp; */ \
- + "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \
- + "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \
- + "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
- + "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[0] = tmp; */ \
- + "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \
- + "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 3], %1\n\t" /* dst_1[5] = tmp; */ \
- + "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \
- + "st.b\t%7[6*%8 + 5], %1" /* dst_1[3] = tmp; */ \
- + : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \
- + : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); }
- +
- +#undef YUV2RGB2
- +#define YUV2RGB2(dst, src, y, idx) \
- + { int tmp2; __asm__ volatile ( \
- + "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
- + "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \
- + "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[2] = tmp; */ \
- + "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \
- + "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \
- + "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
- + "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[0] = tmp; */ \
- + "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \
- + "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 3], %1\n\t" /* dst_1[5] = tmp; */ \
- + "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \
- + "st.b\t%7[6*%8 + 5], %1" /* dst_1[3] = tmp; */ \
- + : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \
- + : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); }
- +
- +
- +#undef YUV2BGR1
- +#define YUV2BGR1(dst, src, y, idx) \
- + { int tmp2; __asm__ volatile ( \
- + "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
- + "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \
- + "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[2] = tmp; */ \
- + "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \
- + "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \
- + "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
- + "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[0] = tmp; */ \
- + "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \
- + "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 5], %1\n\t" /* dst_1[5] = tmp; */ \
- + "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \
- + "st.b\t%7[6*%8 + 3], %1" /* dst_1[3] = tmp; */ \
- + : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \
- + : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); }
- +
- +#undef YUV2BGR2
- +#define YUV2BGR2(dst, src, y, idx) \
- + { int tmp2; __asm__ volatile ( \
- + "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
- + "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \
- + "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[2] = tmp; */ \
- + "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \
- + "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \
- + "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
- + "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[0] = tmp; */ \
- + "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \
- + "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 5], %1\n\t" /* dst_1[5] = tmp; */ \
- + "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \
- + "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \
- + "st.b\t%7[6*%8 + 3], %1" /* dst_1[3] = tmp; */ \
- + : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \
- + : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); }
- +
- +
- +
- +int yuv2bgr24_avr32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
- + int srcSliceH, uint8_t* dst[], int dstStride[]){
- + int y;
- +
- + if(c->srcFormat == PIX_FMT_YUV422P){
- + srcStride[1] *= 2;
- + srcStride[2] *= 2;
- + }
- +
- +
- + for(y=0; y<srcSliceH; y+=2){
- + uint8_t *dst_1= (uint8_t*)(dst[0] + (y+srcSliceY )*dstStride[0]);
- + uint8_t *dst_2= (uint8_t*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);
- + uint32_t *r, *g, *b;
- + uint8_t *py_1= src[0] + y*srcStride[0];
- + uint8_t *py_2= py_1 + srcStride[0];
- + uint8_t *pu= src[1] + (y>>1)*srcStride[1];
- + uint8_t *pv= src[2] + (y>>1)*srcStride[2];
- + unsigned int h_size= c->dstW>>3;
- + while (h_size--) {
- + uint32_t U, V, Y1, Y2, tmp;
- + U = ((uint32_t*)pu)[0];
- + V = ((uint32_t*)pv)[0];
- +
- + RGB("t")
- + YUV2BGR1(dst_1, py_1, Y1, 0)
- + YUV2BGR1(dst_2, py_2, Y2, 0)
- +
- + RGB("u")
- + YUV2BGR2(dst_1, py_1, Y1, 1)
- + YUV2BGR2(dst_2, py_2, Y2, 1)
- +
- + RGB("l")
- + YUV2BGR1(dst_1, py_1, Y1, 2)
- + YUV2BGR1(dst_2, py_2, Y2, 2)
- +
- + RGB("b")
- + YUV2BGR2(dst_1, py_1, Y1, 3)
- + YUV2BGR2(dst_2, py_2, Y2, 3)
- +
- +
- +
- + pu += 4;
- + pv += 4;
- + py_1 += 8;
- + py_2 += 8;
- + dst_1 += 24;
- + dst_2 += 24;
- + }
- + }
- + return srcSliceH;
- +}
- +
- +
- +
- +static int yuv2rgb24_avr32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
- + int srcSliceH, uint8_t* dst[], int dstStride[]){
- + int y;
- +
- + if(c->srcFormat == PIX_FMT_YUV422P){
- + srcStride[1] *= 2;
- + srcStride[2] *= 2;
- + }
- + for(y=0; y<srcSliceH; y+=2){
- + uint8_t *dst_1= (uint8_t*)(dst[0] + (y+srcSliceY )*dstStride[0]);
- + uint8_t *dst_2= (uint8_t*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);
- + uint8_t *r, *g, *b;
- + uint8_t *py_1= src[0] + y*srcStride[0];
- + uint8_t *py_2= py_1 + srcStride[0];
- + uint8_t *pu= src[1] + (y>>1)*srcStride[1];
- + uint8_t *pv= src[2] + (y>>1)*srcStride[2];
- + unsigned int h_size= c->dstW>>3;
- + while (h_size--) {
- + uint32_t U, V, Y1, Y2, tmp;
- + U = ((uint32_t*)pu)[0];
- + V = ((uint32_t*)pv)[0];
- +
- + RGB("t")
- + YUV2RGB1(dst_1, py_1, Y1, 0)
- + YUV2RGB1(dst_2, py_2, Y2, 0)
- +
- + RGB("u")
- + YUV2RGB2(dst_1, py_1, Y1, 1)
- + YUV2RGB2(dst_2, py_2, Y2, 1)
- +
- + RGB("l")
- + YUV2RGB1(dst_1, py_1, Y1, 2)
- + YUV2RGB1(dst_2, py_2, Y2, 2)
- +
- + RGB("b")
- + YUV2RGB2(dst_1, py_1, Y1, 3)
- + YUV2RGB2(dst_2, py_2, Y2, 3)
- +
- + pu += 4;
- + pv += 4;
- + py_1 += 8;
- + py_2 += 8;
- + dst_1 += 24;
- + dst_2 += 24;
- + }
- + }
- + return srcSliceH;
- +}
- +
- +#define SCALE(x, bits) (((x) + ( 1 << (bits - 1))) >> bits)
- +#define COEFF_FRAC_BITS 9
- +#define OFFSET_FRAC_BITS 2
- +
- +/* Coefficients used in the pico */
- +static struct {
- + short coeff2_2;
- + short coeff2_3;
- + short coeff2_0;
- + short coeff2_1;
- + short coeff1_2;
- + short coeff1_3;
- + short coeff1_0;
- + short coeff1_1;
- + short coeff0_2;
- + short coeff0_3;
- + short coeff0_0;
- + short coeff0_1;
- +} pico_coeff;
- +
- +
- +static int yuv2bgr24_avr32_pico(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
- + int srcSliceH, uint8_t* dst[], int dstStride[]){
- + int y;
- + static int first_time = 1;
- +
- + /* Initialize pico */
- + PICO_LDCM_D(&pico_coeff,
- + PICO_REGVECT_COEFF0_A, PICO_REGVECT_COEFF0_B,
- + PICO_REGVECT_COEFF1_A, PICO_REGVECT_COEFF1_B,
- + PICO_REGVECT_COEFF2_A, PICO_REGVECT_COEFF2_B);
- +
- + PICO_PUT_W(PICO_CONFIG,
- + (PICO_PACKED_MODE << PICO_OUTPUT_MODE
- + | PICO_TRANSFORMATION_MODE << PICO_INPUT_MODE
- + | OFFSET_FRAC_BITS << PICO_OFFSET_FRAC_BITS
- + | COEFF_FRAC_BITS << PICO_COEFF_FRAC_BITS));
- +
- +
- + if(c->srcFormat == PIX_FMT_YUV422P){
- + srcStride[1] *= 2;
- + srcStride[2] *= 2;
- + }
- +
- + for(y=0; y<srcSliceH; y+=2){
- + uint8_t *dst_1= (uint8_t*)(dst[0] + (y+srcSliceY )*dstStride[0]);
- + uint8_t *dst_2= (uint8_t*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);
- + uint8_t *r, *g, *b;
- + uint8_t *py_1= src[0] + y*srcStride[0];
- + uint8_t *py_2= py_1 + srcStride[0];
- + uint8_t *pu= src[1] + (y>>1)*srcStride[1];
- + uint8_t *pv= src[2] + (y>>1)*srcStride[2];
- + unsigned int h_size= c->dstW>>3;
- + int *py_1_int = (int *)py_1;
- + int *py_2_int = (int *)py_2;
- + int *pu_int = (int *)pu;
- + int *pv_int = (int *)pv;
- + while (h_size--) {
- + PICO_PUT_W(PICO_INPIX0, *py_1_int++);
- + PICO_PUT_W(PICO_INPIX1, *pu_int++);
- + PICO_PUT_W(PICO_INPIX2, *pv_int++);
- + PICO_OP(0, 0, 0, 4, 8);
- + PICO_OP(0, 1, 1, 4, 8);
- + PICO_OP(0, 2, 2, 5, 9);
- + PICO_OP(0, 3, 3, 5, 9);
- + PICO_PUT_W(PICO_INPIX0, *py_1_int++);
- + PICO_STCM_W(dst_1, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0);
- + PICO_OP(0, 0, 0, 6, 10);
- + PICO_OP(0, 1, 1, 6, 10);
- + PICO_OP(0, 2, 2, 7, 11);
- + PICO_OP(0, 3, 3, 7, 11);
- + PICO_PUT_W(PICO_INPIX0, *py_2_int++);
- + PICO_STCM_W(dst_1 + 12, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0);
- +
- + PICO_OP(0, 0, 0, 4, 8);
- + PICO_OP(0, 1, 1, 4, 8);
- + PICO_OP(0, 2, 2, 5, 9);
- + PICO_OP(0, 3, 3, 5, 9);
- + PICO_PUT_W(PICO_INPIX0, *py_2_int++);
- + PICO_STCM_W(dst_2, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0);
- + PICO_OP(0, 0, 0, 6, 10);
- + PICO_OP(0, 1, 1, 6, 10);
- + PICO_OP(0, 2, 2, 7, 11);
- + PICO_OP(0, 3, 3, 7, 11);
- + PICO_STCM_W(dst_2 + 12, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0);
- +
- + dst_1 += 24;
- + dst_2 += 24;
- + }
- + }
- + return srcSliceH;
- +}
- +
- +extern int avr32_use_pico;
- +
- +SwsFunc yuv2rgb_init_avr32 (SwsContext *c){
- + switch(c->dstFormat){
- + case PIX_FMT_BGR24:
- + {
- + if ( avr32_use_pico ){
- + MSG_ERR("AVR32 BGR24: Using PICO for color space conversion\n");
- + return yuv2bgr24_avr32_pico;
- + } else {
- + MSG_ERR("AVR32 BGR24: Using optimized color space conversion\n");
- + return yuv2bgr24_avr32;
- + }
- + }
- + break;
- + case PIX_FMT_RGB24:
- + {
- + if ( avr32_use_pico ){
- + MSG_ERR("AVR32 RGB24: Using PICO for color space conversion\n");
- + return yuv2bgr24_avr32_pico;
- + } else {
- + MSG_ERR("AVR32 RGB24: Using optimized color space conversion\n");
- + return yuv2rgb24_avr32;
- + }
- + }
- + }
- + return NULL;
- +}
- +
- +
- +int yuv2rgb_c_init_tables_avr32 (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation){
- + const int isRgb = (c->dstFormat == PIX_FMT_RGB24);
- +
- + int64_t crv = inv_table[0];
- + int64_t cbu = inv_table[1];
- + int64_t cgu = -inv_table[2];
- + int64_t cgv = -inv_table[3];
- + int64_t cy = 1<<16;
- + int64_t oy = 0;
- +
- + if(!fullRange){
- + cy= (cy*255) / 219;
- + oy= 16<<16;
- + }
- +
- + cy = (cy *contrast )>>16;
- + crv= (crv*contrast * saturation)>>32;
- + cbu= (cbu*contrast * saturation)>>32;
- + cgu= (cgu*contrast * saturation)>>32;
- + cgv= (cgv*contrast * saturation)>>32;
- +
- + oy -= 256*brightness;
- +
- + pico_coeff.coeff1_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* G <- Y */
- + pico_coeff.coeff1_1 = SCALE(cgu, 16 - COEFF_FRAC_BITS); /* G <- U */
- + pico_coeff.coeff1_2 = SCALE(cgv, 16 - COEFF_FRAC_BITS); /* G <- V */
- + pico_coeff.coeff1_3 = (SCALE(-128*cgu - 128*cgv - 16*cy, 16 - OFFSET_FRAC_BITS)
- + + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* G offset */
- +
- + if ( isRgb ){
- + pico_coeff.coeff0_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* R <- Y */
- + pico_coeff.coeff0_1 = 0; /* R <- U */
- + pico_coeff.coeff0_2 = SCALE(crv, 16 - COEFF_FRAC_BITS); /* R <- V */
- + pico_coeff.coeff0_3 = (SCALE(-128*crv - 16*cy, 16 - OFFSET_FRAC_BITS)
- + + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* R offset */
- +
- + pico_coeff.coeff2_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* B <- Y */
- + pico_coeff.coeff2_1 = SCALE(cbu, 16 - COEFF_FRAC_BITS); /* B <- U */
- + pico_coeff.coeff2_2 = 0; /* B <- V */
- + pico_coeff.coeff2_3 = (SCALE(-128*cbu - 16*cy, 16 - OFFSET_FRAC_BITS)
- + + /*0.5*/(1 << (OFFSET_FRAC_BITS-1)));/* B offset */
- + } else {
- + pico_coeff.coeff2_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* R <- Y */
- + pico_coeff.coeff2_1 = 0; /* R <- U */
- + pico_coeff.coeff2_2 = SCALE(crv, 16 - COEFF_FRAC_BITS); /* R <- V */
- + pico_coeff.coeff2_3 = (SCALE(-128*crv - 16*cy, 16 - OFFSET_FRAC_BITS)
- + + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* R offset */
- +
- + pico_coeff.coeff0_0 = SCALE(cy, 16 - COEFF_FRAC_BITS); /* B <- Y */
- + pico_coeff.coeff0_1 = SCALE(cbu, 16 - COEFF_FRAC_BITS); /* B <- U */
- + pico_coeff.coeff0_2 = 0; /* B <- V */
- + pico_coeff.coeff0_3 = (SCALE(-128*cbu - 16*cy, 16 - OFFSET_FRAC_BITS)
- + + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* B offset */
- + }
- +
- +}
- +
- +
- +#undef RGB
- diff --git a/libvo/vo_fbdev2.c b/libvo/vo_fbdev2.c
- index 053c193..7017770 100644
- --- a/libvo/vo_fbdev2.c
- +++ b/libvo/vo_fbdev2.c
- @@ -22,6 +22,9 @@
- #include "sub.h"
- #include "mp_msg.h"
-
- +/* Draw directly to framebuffer */
- +#define USE_CONVERT2FB
- +
- static vo_info_t info = {
- "Framebuffer Device",
- "fbdev2",
- @@ -178,6 +181,15 @@ static int fb_preinit(int reset)
- }
- fb_orig_vinfo = fb_vinfo;
-
- + /* Reset panning offset */
- + fb_vinfo.yoffset = 0;
- + if (ioctl(fb_dev_fd, FBIOPAN_DISPLAY, &fb_vinfo)) {
- + mp_msg(MSGT_VO, MSGL_ERR,
- + "[fbdev2] FBIOPAN_DISPLAY failed: %s\n",
- + strerror(errno));
- + return 0;
- + }
- +
- fb_bpp = fb_vinfo.bits_per_pixel;
-
- /* 16 and 15 bpp is reported as 16 bpp */
- @@ -289,6 +301,10 @@ static int config(uint32_t width, uint32_t height, uint32_t d_width,
- mp_msg(MSGT_VO, MSGL_ERR, "[fbdev2] Can't malloc next_frame: %s\n", strerror(errno));
- return 1;
- }
- +#else
- + if ((fb_line_len * fb_vinfo.yres) <= (fb_finfo.smem_len / 2)
- + && fb_vinfo.yoffset == 0)
- + center += fb_line_len * fb_vinfo.yres;
- #endif
- if (fs) memset(frame_buffer, '\0', fb_line_len * fb_vinfo.yres);
-
- @@ -299,14 +315,22 @@ static int query_format(uint32_t format)
- {
- // open the device, etc.
- if (fb_preinit(0)) return 0;
- - if ((format & IMGFMT_BGR_MASK) == IMGFMT_BGR) {
- + if ((format & IMGFMT_RGB_MASK) == IMGFMT_RGB) {
- int fb_target_bpp = format & 0xff;
- set_bpp(&fb_vinfo, fb_target_bpp);
- fb_vinfo.xres_virtual = fb_vinfo.xres;
- - fb_vinfo.yres_virtual = fb_vinfo.yres;
- + fb_vinfo.yres_virtual = fb_vinfo.yres * 2;
- if (ioctl(fb_dev_fd, FBIOPUT_VSCREENINFO, &fb_vinfo)) {
- - mp_msg(MSGT_VO, MSGL_ERR, "[fbdev2] Can't put VSCREENINFO: %s\n", strerror(errno));
- - return 0;
- + mp_msg(MSGT_VO, MSGL_WARN,
- + "[fbdev2] Can't double virtual y resolution: %s\n",
- + strerror(errno));
- + fb_vinfo.yres_virtual = fb_vinfo.yres;
- + if (ioctl(fb_dev_fd, FBIOPUT_VSCREENINFO, &fb_vinfo)) {
- + mp_msg(MSGT_VO, MSGL_ERR,
- + "[fbdev2] Can't put VSCREENINFO: %s\n",
- + strerror(errno));
- + return -1;
- + }
- }
- fb_pixel_size = fb_vinfo.bits_per_pixel / 8;
- fb_bpp = fb_vinfo.red.length + fb_vinfo.green.length +
- @@ -367,16 +391,67 @@ static void check_events(void)
-
- static void flip_page(void)
- {
- -#ifndef USE_CONVERT2FB
- int i, out_offset = 0, in_offset = 0;
-
- - for (i = 0; i < in_height; i++) {
- - memcpy(center + out_offset, next_frame + in_offset,
- - in_width * fb_pixel_size);
- - out_offset += fb_line_len;
- - in_offset += in_width * fb_pixel_size;
- - }
- +#ifndef USE_CONVERT2FB
- + if (1) {
- +#else
- + if (fb_vinfo.yres_virtual == fb_vinfo.yres) {
- #endif
- + for (i = 0; i < in_height; i++) {
- + memcpy(center + out_offset, next_frame + in_offset,
- + in_width * fb_pixel_size);
- + out_offset += fb_line_len;
- + in_offset += in_width * fb_pixel_size;
- + }
- + } else {
- + if (fb_vinfo.yoffset == 0) {
- + fb_vinfo.yoffset += fb_vinfo.yres;
- + center -= fb_line_len * fb_vinfo.yres;
- + } else {
- + fb_vinfo.yoffset = 0;
- + center += fb_line_len * fb_vinfo.yres;
- + }
- +
- + if (ioctl(fb_dev_fd, FBIOPAN_DISPLAY, &fb_vinfo)) {
- + mp_msg(MSGT_VO, MSGL_ERR,
- + "[fbdev2] Can't FBIOPAN_DISPLAY: %s\n",
- + strerror(errno));
- + }
- + }
- +}
- +
- +static uint32_t get_image(mp_image_t *mpi)
- +{
- + if(mpi->flags&MP_IMGFLAG_READABLE)
- + return VO_FALSE; // slow video ram
- + if(mpi->type==MP_IMGTYPE_STATIC)
- + return VO_FALSE; // it is not static
- +
- + if (mpi->flags & (MP_IMGFLAG_ACCEPT_STRIDE | MP_IMGFLAG_ACCEPT_WIDTH)) {
- + // we're lucky or codec accepts stride => ok, let's go!
- +
- + //YUY2 and RGB formats
- + mpi->planes[0] = center;
- + mpi->width = in_width;
- + mpi->stride[0] = fb_line_len;
- +
- + // center image
- +
- + mpi->flags |= MP_IMGFLAG_DIRECT;
- +
- + return VO_TRUE;
- + }
- +
- + return VO_FALSE;
- +}
- +
- +static uint32_t put_image(mp_image_t *mpi)
- +{
- + // already out?
- + if ((mpi->flags & (MP_IMGFLAG_DIRECT | MP_IMGFLAG_DRAW_CALLBACK)))
- + return VO_TRUE;
- + return VO_FALSE;
- }
-
- static void uninit(void)
- @@ -403,6 +478,10 @@ static int control(uint32_t request, void *data, ...)
- switch (request) {
- case VOCTRL_QUERY_FORMAT:
- return query_format(*((uint32_t*)data));
- + case VOCTRL_GET_IMAGE:
- + return get_image(data);
- + case VOCTRL_DRAW_IMAGE:
- + return put_image(data);
- }
- return VO_NOTIMPL;
- }
- diff --git a/version.sh b/version.sh
- index 44b5c5d..cf22a68 100755
- --- a/version.sh
- +++ b/version.sh
- @@ -1,2 +1,2 @@
- #!/bin/sh
- -echo "#define VERSION \"1.0rc1-$1\"" > version.h
- +echo "#define VERSION \"1.0rc1.atmel.2-$1\"" > version.h
|